In [1]:
import time
import logging
import os
import json
import requests
import pandas as pd
from s3fs.core import S3FileSystem

In [2]:
os.environ['AWS_CONFIG_FILE'] = 'aws_config.ini'

s3 = S3FileSystem(anon=False)
key = 'TheNumbers_budgets.csv'
bucket = 'movie-torrents'

df = pd.read_csv(s3.open('{}/{}'.format(bucket, key), mode='rb'), index_col=0)
df.head()


Out[2]:
title release_date production_budget domestic_gross world_gross
1 Avatar 2009-12-18 425000000 760507625 2783918982
2 Star Wars Ep. VII: The Force Awakens 2015-12-18 306000000 936662225 2058662225
3 Pirates of the Caribbean: At Worlds End 2007-05-24 300000000 309420425 963420425
4 Spectre 2015-11-06 300000000 200074175 879620923
5 The Dark Knight Rises 2012-07-20 275000000 448139099 1084439099

In [3]:
logger = logging.getLogger('OMDB_API')
logger.setLevel(logging.INFO)

# create a file handler
handler = logging.FileHandler('omdb_api.log')
handler.setLevel(logging.INFO)

# create a logging format
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)

In [4]:
# tupple of movie, title
title = df['title']
year = [year[:4] for year in df['release_date']]
movie_tup = [(title, year) for title, year in zip(title, year)]

In [5]:
with open('./scripts/omdb_api.key', 'r') as read_file:
    omdb_api_key = read_file.read().strip()

lst = []

for title, year in movie_tup:
    # meter number of requests to omdb api
    time.sleep(0.5)

    # omdb api address
    payload = {'t': title, 'y': year, 'apikey': omdb_api_key}
    html = requests.get('http://www.omdbapi.com', params=payload)

    # check for 200 code (good)
    resp = json.loads(html.text)
    if html.status_code != 200 or 'Error' in resp.keys():
        logger.info('Year:{0} - Title:{1}'.format(year, title))
        continue

    html_text = html.text
    html_json = json.loads(html_text)
    
    lst.append(html_json)
    
    if len(lst) > 20:
        break

In [6]:
df = pd.DataFrame.from_dict(lst, orient='columns')
df = df[['Actors', 'Awards', 'BoxOffice', 'Country', 'DVD', 'Director', 'Genre',
       'Language', 'Metascore', 'Production', 'Rated', 'Released', 'Runtime', 'Title', 
       'Type', 'Writer', 'imdbID', 'imdbRating', 'imdbVotes']]

In [7]:
for col in ['BoxOffice', 'imdbVotes']:
    df[col].replace(to_replace='N/A', value='0', inplace=True)
    df[col] = df[col].replace(r'[\$,]', '', regex=True).astype(int)

df['Runtime'] = df['Runtime'].replace(r'[ min]', '', regex=True).astype(int)

for col in ['DVD', 'Released']:
    df[col] = pd.to_datetime(df[col], errors='coerce', format='%d %b %Y')

for col in ['Metascore', 'imdbRating']:
    df[col].replace(to_replace='N/A', value='0', inplace=True)
    df[col] = df[col].astype(float)

In [8]:
df.head()


Out[8]:
Actors Awards BoxOffice Country DVD Director Genre Language Metascore Production Rated Released Runtime Title Type Writer imdbID imdbRating imdbVotes
0 Sam Worthington, Zoe Saldana, Sigourney Weaver... Won 3 Oscars. Another 85 wins & 128 nominations. 749700000 UK, USA 2010-04-22 James Cameron Action, Adventure, Fantasy English, Spanish 83.0 20th Century Fox PG-13 2009-12-18 162 Avatar movie James Cameron tt0499549 7.8 944150
1 Daniel Craig, Christoph Waltz, Léa Seydoux, Ra... Won 1 Oscar. Another 7 wins & 31 nominations. 208777731 UK, USA 2016-02-09 Sam Mendes Action, Adventure, Thriller English, Spanish, Italian, German, French 60.0 Sony Pictures PG-13 2015-11-06 148 Spectre movie John Logan (screenplay), Neal Purvis (screenpl... tt2379713 6.8 313528
2 Christian Bale, Gary Oldman, Tom Hardy, Joseph... Nominated for 1 BAFTA Film Award. Another 38 w... 448130642 UK, USA 2012-12-03 Christopher Nolan Action, Thriller English, Arabic 78.0 Warner Bros. Pictures PG-13 2012-07-20 164 The Dark Knight Rises movie Jonathan Nolan (screenplay), Christopher Nolan... tt1345836 8.4 1244136
3 Johnny Depp, Armie Hammer, William Fichtner, T... Nominated for 2 Oscars. Another 4 wins & 17 no... 89289910 USA 2013-12-17 Gore Verbinski Action, Adventure, Western English, North American Indian 37.0 Walt Disney Pictures PG-13 2013-07-03 150 The Lone Ranger movie Justin Haythe (screenplay), Ted Elliott (scree... tt1210819 6.5 193685
4 Taylor Kitsch, Lynn Collins, Samantha Morton, ... 2 wins & 8 nominations. 73058679 USA 2012-06-05 Andrew Stanton Action, Adventure, Sci-Fi English 51.0 Walt Disney Pictures PG-13 2012-03-09 132 John Carter movie Andrew Stanton (screenplay), Mark Andrews (scr... tt0401729 6.6 222756