"Refactoring is a disciplined technique for restructuring an existing body of code, altering its internal structure without changing its external behavior." -- © Martin Fowler
We're going to refactor a sample console movie reviews aggregation utility to demonstrate some examples of all kinds of refactoring.
We'll start with a working application that aggregates reviews from Twitter and IMDB using the movie name as input and we'll apply one-by-one different refartoring patters to make it more readable and maintainable python code.
In [73]:
"""Movie Reviews.
Usage: movie_reviews.py <title>
movie_reviews.py (-h | --help)
movie_reviews.py --version
Arguments:
<title> Movie title
Options:
-h --help Show this screen.
--version Show version.
"""
from docopt import docopt
from TwitterSearch import *
from dateutil import parser
from imdbpie import Imdb
import logging
import os
def main(title):
reviews = []
# Search tweets
ts = TwitterSearch(
consumer_key = os.environ.get('TWITTER_CONSUMER_KEY'),
consumer_secret = os.environ.get('TWITTER_CONSUMER_SECRET'),
access_token = os.environ.get('TWITTER_ACCESS_TOKEN'),
access_token_secret = os.environ.get('TWITTER_TOKEN_SECRET')
)
try:
ts.connect()
tso = TwitterSearchOrder() # create a TwitterSearchOrder object
tso.setKeywords(['#' + title + 'Movie']) # let's define all words we would like to have a look for
tso.setLanguage('en') # we want to see German tweets only
tso.setIncludeEntities(False) # and don't give us all those entity information
# add tweets to reviews list
results = ts.getSearchResults(tso)
except TwitterSearchException as e: # take care of all those ugly errors if there are some
logging.exception(str(e))
ts.cleanUp()
else:
for offset in range(results.getSize()):
if offset > 9:
break
tweet = results.getTweetByIndex(offset)
reviews.append({
'author': tweet.getUserName(),
'summary': tweet.getText(),
'text': tweet.getText(),
'date': parser.parse(tweet.getCreatedDate(), ignoretz=True),
'source': 'Twitter'
})
finally:
ts.disconnect()
# Search Imdb
imdb = Imdb()
try:
response = imdb.search_for_title(title)[0]
title_id = response['imdb_id']
response = imdb.get_title_reviews(title_id, max_results=10)
except IndexError as e:
logging.exception(str(e))
else:
for review in response:
reviews.append({
'author': review.username,
'summary': review.summary,
'text': review.text,
'date': parser.parse(review.date, ignoretz=True),
'source': 'IMDB'
})
# Sort reviews by date
reviews.sort(cmp=_cmprev)
# Print reviews
for review in reviews:
print('(%s) @%s: %s [Source: %s]' % ( review['date'].strftime('%Y-%m-%d'), review['author'], review['summary'], review['source'] ) )
def _cmprev(r1, r2):
if r1['date']>r2['date']:
return -1
elif r1['date']<r2['date']:
return 1
else:
return 0
In [74]:
from docopt import docopt
from TwitterSearch import *
from dateutil import parser
from imdbpie import Imdb
import logging
import os
In [75]:
import logging
import os
from docopt import docopt
from dateutil import parser
from imdbpie import Imdb
from TwitterSearch import TwitterSearch, TwitterSearchOrder, TwitterSearchException
In [76]:
def _cmprev(r1, r2):
if r1['date']>r2['date']:
return -1
elif r1['date']<r2['date']:
return 1
else:
return 0
In [77]:
def _sort_by_date_desc(first, second):
first_date = first['date']
second_date = second['date']
if first_date > second_date:
return -1
elif first_date < second_date:
return 1
return 0
In [78]:
def main(title):
reviews = []
# Search tweets
ts = TwitterSearch(
consumer_key = os.environ.get('TWITTER_CONSUMER_KEY'),
consumer_secret = os.environ.get('TWITTER_CONSUMER_SECRET'),
access_token = os.environ.get('TWITTER_ACCESS_TOKEN'),
access_token_secret = os.environ.get('TWITTER_TOKEN_SECRET')
)
try:
ts.connect()
tso = TwitterSearchOrder() # create a TwitterSearchOrder object
tso.setKeywords(['#' + title + 'Movie']) # let's define all words we would like to have a look for
tso.setLanguage('en') # we want to see German tweets only
tso.setIncludeEntities(False) # and don't give us all those entity information
# add tweets to reviews list
results = ts.getSearchResults(tso)
except TwitterSearchException as e: # take care of all those ugly errors if there are some
logging.exception(str(e))
ts.cleanUp()
else:
for offset in range(results.getSize()):
if offset > 9:
break
tweet = results.getTweetByIndex(offset)
reviews.append({
'author': tweet.getUserName(),
'summary': tweet.getText(),
'text': tweet.getText(),
'date': parser.parse(tweet.getCreatedDate(), ignoretz=True),
'source': 'Twitter'
})
finally:
ts.disconnect()
In [79]:
import logging
import os
from dateutil import parser
from TwitterSearch import TwitterSearch, TwitterSearchOrder, TwitterSearchException
class TwitterReviews(object):
def __init__(self, movie, limit=10, language='en'):
self.movie = movie
self.limit = 10
self.language = language
self.client = TwitterSearch(
consumer_key=os.environ.get('TWITTER_CONSUMER_KEY'),
consumer_secret=os.environ.get('TWITTER_CONSUMER_SECRET'),
access_token=os.environ.get('TWITTER_ACCESS_TOKEN'),
access_token_secret=os.environ.get('TWITTER_TOKEN_SECRET')
)
def __enter__(self):
self.client.connect()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_type == TwitterSearchException:
logging.exception(str(exc_val))
self.client.cleanUp()
self.client.disconnect()
@property
def reviews(self):
return Reviews(self._get_results(), limit=self.limit)
def _prepare_request(self):
tso = TwitterSearchOrder()
tso.setKeywords(self._get_keywords())
tso.setLanguage(self.language)
tso.setIncludeEntities(False)
return tso
def _get_keywords(self):
return ['#' + self.movie + 'Movie']
def _get_results(self):
request = self._prepare_request()
return self.client.getSearchResults(request)
class Reviews(object):
def __init__(self, tweets, limit=10):
self.limit = limit
self.tweets = tweets
def __len__(self):
size = self.tweets.getSize()
return size if size < self.limit else self.limit
def __getitem__(self, item):
if item >= len(self):
raise IndexError
tweet = self.tweets.getTweetByIndex(item)
return Review(tweet)
class Review(object):
def __init__(self, review):
self.review = review
@property
def author(self):
return self.review.getUserName()
@property
def summary(self):
return self.review.getText()
@property
def text(self):
return self.review.getText()
@property
def date(self):
return parser.parse(self.review.getCreatedDate(), ignoretz=True)
@property
def source(self):
return 'Twitter'
In [80]:
def main(title, year=None):
reviews = []
with TwitterReviews(title) as reviews_backend:
for review in reviews_backend.reviews:
reviews.append(review)
In [81]:
def main(title, year=None):
reviews = []
with TwitterReviews(title) as reviews_backend:
for review in reviews_backend.reviews:
reviews.append(review)
# Search Imdb
imdb = Imdb()
try:
response = imdb.search_for_title(title)[0]
title_id = response['imdb_id']
response = imdb.get_title_reviews(title_id, max_results=10)
except IndexError as e:
logging.exception(str(e))
else:
for review in response:
reviews.append({
'author': review.username,
'summary': review.summary,
'text': review.text,
'date': parser.parse(review.date, ignoretz=True),
'source': 'IMDB'
})
In [82]:
import logging
from dateutil import parser
from imdbpie import Imdb
class IMDBReviews(object):
def __init__(self, movie, limit=10, language='en'):
self.movie = movie
self.limit = limit
self.language = language
self.client = Imdb()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_val:
logging.exception(str(exc_val))
@property
def reviews(self):
return [Review(result) for result in self._get_results()]
def _get_results(self):
try:
response = self.client.search_for_title(self.movie)[0]
title_id = response['imdb_id']
response = self.client.get_title_reviews(title_id, max_results=10)
return response
except IndexError as e:
logging.exception(str(e))
return []
class Review(object):
def __init__(self, review):
self.review = review
@property
def author(self):
return self.review.username
@property
def summary(self):
return self.review.summary
@property
def text(self):
return self.review.text
@property
def date(self):
return parser.parse(self.review.date, ignoretz=True)
@property
def source(self):
return 'IMDB'
In [83]:
def main(title):
reviews = []
for backend_class in (TwitterReviews, IMDBReviews):
with backend_class(title) as reviews_backend:
for review in reviews_backend.reviews:
reviews.append(review)
In [84]:
class BaseReview(object):
def __init__(self, review):
self.review = review
@property
def author(self):
raise NotImplementedError
@property
def summary(self):
raise NotImplementedError
@property
def text(self):
raise NotImplementedError
@property
def date(self):
raise NotImplementedError
@property
def source(self):
raise NotImplementedError
def display(self):
print '(%s) @%s: %s [Source: %s]' % (
self.date.strftime('%Y-%m-%d'),
self.author,
self.summary,
self.source)
In [85]:
import logging
import os
import requests
from dateutil import parser
BaseReview = object # ignore this line, it's just for Jupyter notebook
class NYTimesReviews(object):
def __init__(self, movie, limit=10, language='en'):
self.movie = movie
self.limit = 10
self.language = language
self.url = 'https://api.nytimes.com/svc/movies/v2/reviews/search.json'
self.api_key = os.environ.get('NY_TIMES_API_KEY')
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_val:
logging.exception(str(exc_val))
@property
def reviews(self):
return [Review(result) for result in self._get_results()]
def _get_results(self):
response = requests.get(self.url, self._prepare_request_data())
return response.json()['results']
def _prepare_request_data(self):
data = {
'query': self.movie,
'api-key': self.api_key
}
return data
class Review(BaseReview):
@property
def author(self):
return self.review['byline']
@property
def summary(self):
return self.review['headline']
@property
def text(self):
return self.review['summary_short']
@property
def date(self):
return parser.parse(self.review['date_updated'], ignoretz=True)
@property
def source(self):
return 'NYTimes'
In [86]:
def main(title):
reviews = []
for backend_class in (TwitterReviews, IMDBReviews):
with backend_class(title) as reviews_backend:
for review in reviews_backend.reviews:
reviews.append(review)
# Search NYTimes
url = "https://api.nytimes.com/svc/movies/v2/reviews/search.json"
data = {
'query': title,
'api-key': os.environ.get('NY_TIMES_API_KEY')
}
response = requests.get(url, data)
count = 0
for review in response.json()['results']:
if count > 9:
break
reviews.append({
'author': review['byline'],
'summary': review['headline'],
'text': review['summary_short'],
'date': parser.parse(review['date_updated'], ignoretz=True),
'source': 'NYTimes'
})
count += 1
In [87]:
def main(title):
reviews = []
for backend_class in (TwitterReviews, IMDBReviews, NYTimesReviews):
with backend_class(title) as reviews_backend:
for review in reviews_backend.reviews:
reviews.append(review)
In [88]:
def _cmprev(r1, r2):
if r1['date']>r2['date']:
return -1
elif r1['date']<r2['date']:
return 1
else:
return 0
In [89]:
def get_reviews(title):
reviews = []
for backend_class in (TwitterReviews, IMDBReviews, NYTimesReviews):
with backend_class(title) as reviews_backend:
for review in reviews_backend.reviews:
reviews.append(review)
return reversed(sorted(reviews, key=_get_review_date))
def _get_review_date(review):
return review.date
In [90]:
"""Movie Reviews.
Usage: movie_reviews.py <title>
movie_reviews.py (-h | --help)
movie_reviews.py --version
Arguments:
<title> Movie title
Options:
-h --help Show this screen.
--version Show version.
"""
from docopt import docopt
from TwitterSearch import *
from dateutil import parser
from imdbpie import Imdb
import logging
import os
def main(title):
reviews = []
# Search tweets
ts = TwitterSearch(
consumer_key = os.environ.get('TWITTER_CONSUMER_KEY'),
consumer_secret = os.environ.get('TWITTER_CONSUMER_SECRET'),
access_token = os.environ.get('TWITTER_ACCESS_TOKEN'),
access_token_secret = os.environ.get('TWITTER_TOKEN_SECRET')
)
try:
ts.connect()
tso = TwitterSearchOrder() # create a TwitterSearchOrder object
tso.setKeywords(['#' + title + 'Movie']) # let's define all words we would like to have a look for
tso.setLanguage('en') # we want to see German tweets only
tso.setIncludeEntities(False) # and don't give us all those entity information
# add tweets to reviews list
results = ts.getSearchResults(tso)
except TwitterSearchException as e: # take care of all those ugly errors if there are some
logging.exception(str(e))
ts.cleanUp()
else:
for offset in range(results.getSize()):
if offset > 9:
break
tweet = results.getTweetByIndex(offset)
reviews.append({
'author': tweet.getUserName(),
'summary': tweet.getText(),
'text': tweet.getText(),
'date': parser.parse(tweet.getCreatedDate(), ignoretz=True),
'source': 'Twitter'
})
finally:
ts.disconnect()
# Search Imdb
imdb = Imdb()
try:
response = imdb.search_for_title(title)[0]
title_id = response['imdb_id']
response = imdb.get_title_reviews(title_id, max_results=10)
except IndexError as e:
logging.exception(str(e))
else:
for review in response:
reviews.append({
'author': review.username,
'summary': review.summary,
'text': review.text,
'date': parser.parse(review.date, ignoretz=True),
'source': 'IMDB'
})
# Search NYTimes
url = "https://api.nytimes.com/svc/movies/v2/reviews/search.json"
data = {
'query': title,
'api-key': os.environ.get('NY_TIMES_API_KEY')
}
response = requests.get(url, data)
count = 0
for review in response.json()['results']:
if count > 9:
break
reviews.append({
'author': review['byline'],
'summary': review['headline'],
'text': review['summary_short'],
'date': parser.parse(review['date_updated'], ignoretz=True),
'source': 'NYTimes'
})
count += 1
# Sort reviews by date
reviews.sort(cmp=_cmprev)
# Print reviews
for review in reviews:
print('(%s) @%s: %s [Source: %s]' % ( review['date'].strftime('%Y-%m-%d'), review['author'], review['summary'], review['source'] ) )
def _cmprev(r1, r2):
if r1['date']>r2['date']:
return -1
elif r1['date']<r2['date']:
return 1
else:
return 0
In [91]:
def main(title):
for review in get_reviews(title):
review.display()