In [19]:
from bs4 import BeautifulSoup
from itertools import islice
import urllib
import csv
import re

# Open csv file and read rows into a dict
movie_list = {}
with open('movie_list.csv', 'rb') as csvfile:
    reader = csv.DictReader(csvfile)
    for rows in reader:
        # Remove punctuation except "'s" e.g. Winter's Bone
        rows['movie_title'] = re.sub(r'[,:;!?/-]', '', rows['movie_title'])
        movie_list[rows['movie_title']] = rows['release_year']
        
print movie_list


{'Michael Clayton': '2007', 'Chocolat': '2000', 'In The Bedroom': '2001', 'Life of Pi': '2012', 'Sideways': '2004', 'American Sniper': '2014', 'Argo': '2012', 'War Horse': '2011', 'Precious': '2009', 'American Hustle': '2013', 'The Blind Side': '2009', 'An Education': '2009', 'The Wolf of Wall Street': '2013', 'Million Dollar Baby': '2004', 'The Hours': '2002', '12 Years a Slave': '2013', "The King's Speech": '2010', 'Gangs of New York': '2002', 'Milk': '2008', 'Ray': '2004', 'Up in the Air': '2009', 'Silver Linings Playbook': '2012', 'A Serious Man': '2009', 'A Beautiful Mind': '2001', 'Slumdog Millionaire': '2008', 'Gravity': '2013', 'Capote': '2005', 'Moneyball': '2011', 'Good Night and Good Luck': '2005', 'Brokeback Mountain': '2005', 'Seabiscuit': '2003', 'The Theory of Everything': '2014', 'Gosford Park': '2001', 'Letters from Iwo Jima': '2006', 'The Hurt Locker': '2009', 'There Will Be Blood': '2007', 'The Social Network': '2010', 'Up': '2009', 'The Artist': '2011', 'Juno': '2007', 'The Fighter': '2010', 'Whiplash': '2014', 'Crouching Tiger': ' Hidden Dragon', 'Gladiator': '2000', 'Midnight in Paris': '2011', 'Mystic River': '2003', 'Moulin Rouge': '2001', 'Finding Neverland': '2004', 'The Reader': '2008', 'Her': '2013', 'Mad Max Fury Road': '2015', 'Hugo': '2011', 'Captain Phillips': '2013', 'Django Unchained': '2012', 'Atonement': '2007', 'FrostNixon': '2008', "Winter's Bone": '2010', 'The Lord of the Rings The Return of the King': '2003', 'Crash': '2005', 'The Martian': '2015', 'Selma': '2014', 'No Country for Old Men': '2007', 'Brooklyn': '2015', 'Babel': '2006', 'Traffic': '2000', 'Les Miserables': '2012', '127 Hours': '2010', 'Lincoln': '2012', 'The Tree of Life': '2011', 'Inception': '2010', 'The Descendants': '2011', 'Spotlight': '2015', 'The Kids Are All Right': '2010', 'Bridge of Spies': '2015', 'Philomena': '2013', 'The Lord of the Rings Fellowship of the Ring': '2001', 'Little Miss Sunshine': '2006', 'Amour': '2012', 'Zero Dark Thirty': '2012', 'Master and Commander': '2003', 'The Big Short': '2015', 'The Imitation Game': '2014', 'Toy Story 3': '2010', 'Beasts of the Southern Wild': '2012', 'Black Swan': '2010', 'The Revenant': '2015', 'The Curious Case of Benjamin Button': '2008', 'True Grit': '2010', 'Inglourious Basterds': '2009', 'District 9': '2009', 'Munich': '2005', 'The Grand Budapest Hotel': '2014', 'The Help': '2011', 'Room': '2015', 'The Lord of the Rings The Two Towers': '2002', 'Erin Brockovich': '2000', 'Birdman': '2014', 'Nebraska': '2013', 'Avatar': '2009', 'Extremely Loud and Incredibly Close': '2011', 'Lost in Translation': '2003', 'The Aviator': '2004', 'Boyhood': '2014', 'The Pianist': '2002', 'The Departed': '2006', 'Chicago': '2002', 'The Queen': '2006', 'Dallas Buyers Club': '2013'}

In [2]:
def get_imdbid(movie_list):
    '''
    Retrieve IMDb IDs from movie list via OMDb API
    '''
    imdbid = {}
    for title, year in movie_list.iteritems():
        # Add + in between spaces in movie title
        title_query = re.sub(r'\s', '+', title)
        # Extract IMDb IDs
        r = urllib.urlopen('http://www.omdbapi.com/?t=' 
                           + title_query + '&y=' + year + '&plot=short&r=xml').read()
        soup = BeautifulSoup(r, 'xml')
        # Handle invalid movie title/year search
        if soup.error:
            print title + " not found"
        else:
            imdbid[title] = soup.movie['imdbID']
    return imdbid

imdbid = get_imdbid(movie_list)
print imdbid


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-2-9df9e826a3c4> in <module>()
     18     return imdbid
     19 
---> 20 imdbid = get_imdbid(movie_list)
     21 print imdbid

<ipython-input-2-9df9e826a3c4> in get_imdbid(movie_list)
     15             print title + " not found"
     16         else:
---> 17             imdbid[title] = soup.movie['imdbID']
     18     return imdbid
     19 

TypeError: 'NoneType' object has no attribute '__getitem__'

In [3]:
# Workaround as OMDB service is down as of 4-Feb-2017
imdbid = {'Spotlight': 'tt1895587','The Big Short': 'tt1596363','Bridge of Spies': 'tt3682448','Brooklyn': 'tt2381111','Mad Max: Fury Road': 'tt1392190',
          'The Martian': 'tt3659388','The Revenant': 'tt1663202','Room': 'tt3170832','Birdman': 'tt2562232','American Sniper': 'tt2179136', 
         'Boyhood': 'tt1065073','The Grand Budapest Hotel': 'tt2278388','The Imitation Game': 'tt2084970','Selma': 'tt1020072','The Theory of Everything': 'tt2980516',
        'Whiplash': 'tt2582802','12 Years a Slave': 'tt2024544','American Hustle': 'tt1800241','Captain Phillips': 'tt1535109','Dallas Buyers Club': 'tt0790636',
          'Gravity': 'tt1454468','Her': 'tt1798709','Nebraska': 'tt1821549','Philomena': 'tt2431286','The Wolf of Wall Street': 'tt0993846',
          'Argo': 'tt1024648','Amour': 'tt1602620','Beasts of the Southern Wild': 'tt2125435','Django Unchained': 'tt1853728','Les Miserables': 'tt1707386',
          'Life of Pi': 'tt0454876','Lincoln': 'tt0443272','Silver Linings Playbook': 'tt1045658','Zero Dark Thirty': 'tt1790885','The Artist': 'tt1655442',
         'The Descendants': 'tt1033575','Extremely Loud and Incredibly Close': 'tt0477302','The Help': 'tt1454029','Hugo': 'tt0970179','Midnight in Paris': 'tt1605783',
         'Moneyball': 'tt1210166','The Tree of Life': 'tt0478304','War Horse': 'tt1568911','The King''s Speech': 'tt1504320','127 Hours': 'tt1542344',
         'Black Swan': 'tt0947798','The Fighter': 'tt0964517','Inception': 'tt1375666','The Kids Are All Right': 'tt0842926',
         'The Social Network': 'tt1285016','Toy Story 3': 'tt0435761','True Grit': 'tt1403865','Winter''s Bone': 'tt1399683',
         'The Hurt Locker': 'tt0887912','Avatar': 'tt0499549','The Blind Side': 'tt0878804','District 9': 'tt1136608','An Education': 'tt1174732',
         'Inglourious Basterds': 'tt0361748','Precious': 'tt0929632','A Serious Man': 'tt1019452','Up': 'tt1049413','Up in the Air': 'tt1193138',
         'Slumdog Millionaire': 'tt1010048','The Curious Case of Benjamin Button': 'tt0421715','Frost Nixon': 'tt0870111','Milk': 'tt1013753',
         'The Reader': 'tt0976051','No Country for Old Men': 'tt0477348','Atonement': 'tt0783233','Juno': 'tt0467406','Michael Clayton': 'tt0465538',
        'There Will Be Blood': 'tt0469494','The Departed': 'tt0407887','Babel': 'tt0449467','Letters from Iwo Jima': 'tt0498380','Little Miss Sunshine': 'tt0449059',
          'The Queen': 'tt0436697','Crash': 'tt0375679','Brokeback Mountain': 'tt0388795','Capote': 'tt0379725',
        'Good Night and Good Luck': 'tt0433383','Munich': 'tt0408306','Million Dollar Baby': 'tt0405159','The Aviator': 'tt0338751',
         'Finding Neverland': 'tt0308644','Ray': 'tt0350258','Sideways': 'tt0375063','The Lord of the Rings: The Return of the King': 'tt0167260',
         'Lost in Translation': 'tt0335266','Master and Commander': 'tt0311113','Mystic River': 'tt0327056','Seabiscuit': 'tt0329575',
         'Chicago': 'tt0299658','Gangs of New York': 'tt0217505','The Hours': 'tt0274558','The Lord of the Rings The Two Towers': 'tt0167261',
         'The Pianist': 'tt0253474','A Beautiful Mind': 'tt0268978','Gosford Park': 'tt0280707','In The Bedroom': 'tt0247425',
         'The Lord of the Rings Fellowship of the Ring': 'tt0120737','Moulin Rouge': 'tt0203009','Gladiator': 'tt0172495','Chocolat': 'tt0241303', 
         'Crouching Tiger Hidden Dragon': 'tt0190332','Erin Brockovich': 'tt0195685','Traffic': 'tt0181865'}

In [7]:
def get_oscar_status(content, title):
    '''
    Retrieves Best Picture status for a movie title
    '''
    result = content.find_next_sibling()
    res = result.find_all(class_="award_description")
    for j in res:
        if unicode(j.next_element).strip() == u"Best Motion Picture of the Year":
            if unicode(j.parent.td.b) == u"<b>Won</b>": # Strange, get_text() doesnt work
                return "Won"
            else:
                return None
    
def get_awards(imdbid, award_list):
    '''
    Scrape list of awards nominated and won for each movie title
    '''
    dataset = {}
    for title, id in imdbid.iteritems():
        # Add movie titles in dataset
        dataset[title] = {}
        r = urllib.urlopen('http://www.imdb.com/title/' + id + '/awards?ref_=tt_awd').read()
        soup = BeautifulSoup(r, 'lxml')
        awards = soup.find("div", class_="article listo")
        for a in awards.contents:
            for award in award_list:
                # Search for h3 tags which contain the award title
                # NavigableString element is converted to unicode string to save memory
                if a.name == "h3" and unicode(a.next_element).strip() == award:
                    # Add oscar for best picture status
                    if unicode(a.next_element).strip() == u"Academy Awards, USA":
                        best_pic = get_oscar_status(a, title)
                        if best_pic == "Won":
                            dataset[title]["Academy Awards, USA"] = 1
                        else:
                            dataset[title]["Academy Awards, USA"] = 0
                    else:
                        # Search award results in its next sibling
                        result = a.find_next_sibling().td.b
                        # If award is won, add award = 1 to dataset
                        if result.get_text() == "Won":
                            dataset[title][award] = 1
                else:
                    # Add award = 0 if not won
                    try:
                        dataset[title][award] != 1
                    except:
                        dataset[title][award] = 0
    return dataset

award_list = ["Screen Actors Guild Awards", "Directors Guild of America, USA", "PGA Awards", "Academy Awards, USA"]       
dataset = get_awards(imdbid, award_list)
print dataset


{'Michael Clayton': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Chocolat': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'In The Bedroom': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Life of Pi': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Sideways': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'American Sniper': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Argo': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 1, 'PGA Awards': 1, 'Directors Guild of America, USA': 1}, 'War Horse': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Precious': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 0}, 'American Hustle': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Blind Side': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'An Education': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Wolf of Wall Street': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Million Dollar Baby': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 1, 'PGA Awards': 0, 'Directors Guild of America, USA': 1}, 'The Hours': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, '12 Years a Slave': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 1, 'PGA Awards': 1, 'Directors Guild of America, USA': 0}, 'Gangs of New York': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Milk': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 0}, 'Ray': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Up in the Air': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Silver Linings Playbook': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'A Serious Man': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Lord of the Rings: The Return of the King': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 1}, 'Slumdog Millionaire': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 1, 'PGA Awards': 1, 'Directors Guild of America, USA': 1}, 'Gravity': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 1}, 'Capote': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Moneyball': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Good Night and Good Luck': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 0}, 'Brokeback Mountain': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 1}, 'Seabiscuit': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Winters Bone': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Theory of Everything': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Gosford Park': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Letters from Iwo Jima': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Hurt Locker': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 1, 'PGA Awards': 1, 'Directors Guild of America, USA': 1}, 'There Will Be Blood': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Social Network': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Up': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 0}, 'The Artist': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 1, 'PGA Awards': 1, 'Directors Guild of America, USA': 1}, 'The Martian': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Fighter': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Whiplash': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Gladiator': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 0}, 'Midnight in Paris': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Avatar': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Big Short': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 0}, 'Finding Neverland': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Reader': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Her': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Hugo': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Captain Phillips': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Django Unchained': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Atonement': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Mad Max: Fury Road': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Crash': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 1, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Juno': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Selma': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'No Country for Old Men': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 1, 'PGA Awards': 1, 'Directors Guild of America, USA': 1}, 'A Beautiful Mind': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 1}, 'Brooklyn': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Babel': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Traffic': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Les Miserables': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, '127 Hours': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Lincoln': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Tree of Life': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Inception': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Descendants': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Spotlight': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 1, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Kids Are All Right': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Bridge of Spies': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Philomena': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Lord of the Rings Fellowship of the Ring': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Little Miss Sunshine': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 0}, 'Amour': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Zero Dark Thirty': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Master and Commander': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Moulin Rouge': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 0}, 'The Imitation Game': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Toy Story 3': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 0}, 'Beasts of the Southern Wild': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Black Swan': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Revenant': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 1}, 'The Curious Case of Benjamin Button': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'True Grit': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Kings Speech': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 1, 'PGA Awards': 1, 'Directors Guild of America, USA': 1}, 'Inglourious Basterds': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'District 9': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Munich': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Grand Budapest Hotel': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Help': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Room': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Lord of the Rings The Two Towers': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Erin Brockovich': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Birdman': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 1, 'PGA Awards': 1, 'Directors Guild of America, USA': 1}, 'Nebraska': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Mystic River': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Extremely Loud and Incredibly Close': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Lost in Translation': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Aviator': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 0}, 'Boyhood': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Pianist': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Frost Nixon': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'The Departed': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 1, 'PGA Awards': 0, 'Directors Guild of America, USA': 1}, 'Chicago': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 1, 'Directors Guild of America, USA': 1}, 'The Queen': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Dallas Buyers Club': {'Screen Actors Guild Awards': 1, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 0}, 'Crouching Tiger Hidden Dragon': {'Screen Actors Guild Awards': 0, 'Academy Awards, USA': 0, 'PGA Awards': 0, 'Directors Guild of America, USA': 1}}

In [17]:
# Output scraped data into csv
with open("movies.csv", "wb") as toWrite:
    writer = csv.writer(toWrite, delimiter=",")
    writer.writerow(["Title", "Screen Actors Guild Awards", "PGA Awards", "Directors Guild of America, USA", "Academy Awards, USA"])
    for i in dataset.keys():
        writer.writerow([i.encode("utf-8"), 
                         dataset[i]["Screen Actors Guild Awards"], 
                         dataset[i]["PGA Awards"], 
                         dataset[i]["Directors Guild of America, USA"], 
                         dataset[i]["Academy Awards, USA"]])

In [ ]: