Goal One

text


In [1]:
fileName = 'tmdb_5000_movies.csv'
thisFile = open( fileName, 'r' )

In [2]:
# You can uncomment this line and run it if you like, to verify that the read was successful, but it's really slow.

# for line in thisFile:
#     print( line )

In [3]:
thisFile.close()

list of lines


In [4]:
thisFile = open( fileName, 'r' )
lines = [ line for line in thisFile ]
thisFile.close()

In [5]:
print( lines[ :3 ] )


['budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count\n', '237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""name"": ""Fantasy""}, {""id"": 878, ""name"": ""Science Fiction""}]",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"": 2964, ""name"": ""future""}, {""id"": 3386, ""name"": ""space war""}, {""id"": 3388, ""name"": ""space colony""}, {""id"": 3679, ""name"": ""society""}, {""id"": 3801, ""name"": ""space travel""}, {""id"": 9685, ""name"": ""futuristic""}, {""id"": 9840, ""name"": ""romance""}, {""id"": 9882, ""name"": ""space""}, {""id"": 9951, ""name"": ""alien""}, {""id"": 10148, ""name"": ""tribe""}, {""id"": 10158, ""name"": ""alien planet""}, {""id"": 10987, ""name"": ""cgi""}, {""id"": 11399, ""name"": ""marine""}, {""id"": 13065, ""name"": ""soldier""}, {""id"": 14643, ""name"": ""battle""}, {""id"": 14720, ""name"": ""love affair""}, {""id"": 165431, ""name"": ""anti war""}, {""id"": 193554, ""name"": ""power relations""}, {""id"": 206690, ""name"": ""mind and soul""}, {""id"": 209714, ""name"": ""3d""}]",en,Avatar,"In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization.",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289}, {""name"": ""Twentieth Century Fox Film Corporation"", ""id"": 306}, {""name"": ""Dune Entertainment"", ""id"": 444}, {""name"": ""Lightstorm Entertainment"", ""id"": 574}]","[{""iso_3166_1"": ""US"", ""name"": ""United States of America""}, {""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""}]",2009-12-10,2787965087,162,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso_639_1"": ""es"", ""name"": ""Espa\\u00f1ol""}]",Released,Enter the World of Pandora.,Avatar,7.2,11800\n', '300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""name"": ""Fantasy""}, {""id"": 28, ""name"": ""Action""}]",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""name"": ""drug abuse""}, {""id"": 911, ""name"": ""exotic island""}, {""id"": 1319, ""name"": ""east india trading company""}, {""id"": 2038, ""name"": ""love of one\'s life""}, {""id"": 2052, ""name"": ""traitor""}, {""id"": 2580, ""name"": ""shipwreck""}, {""id"": 2660, ""name"": ""strong woman""}, {""id"": 3799, ""name"": ""ship""}, {""id"": 5740, ""name"": ""alliance""}, {""id"": 5941, ""name"": ""calypso""}, {""id"": 6155, ""name"": ""afterlife""}, {""id"": 6211, ""name"": ""fighter""}, {""id"": 12988, ""name"": ""pirate""}, {""id"": 157186, ""name"": ""swashbuckler""}, {""id"": 179430, ""name"": ""aftercreditsstinger""}]",en,Pirates of the Caribbean: At World\'s End,"Captain Barbossa, long believed to be dead, has come back to life and is headed to the edge of the Earth with Will Turner and Elizabeth Swann. But nothing is quite as it seems.",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""name"": ""Jerry Bruckheimer Films"", ""id"": 130}, {""name"": ""Second Mate Productions"", ""id"": 19936}]","[{""iso_3166_1"": ""US"", ""name"": ""United States of America""}]",2007-05-19,961000000,169,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World\'s End,6.9,4500\n']

pandas dataframe


In [6]:
import pandas as pd
df = pd.read_csv( fileName )

In [7]:
df.head( 3 )


Out[7]:
budget genres homepage id keywords original_language original_title overview popularity production_companies production_countries release_date revenue runtime spoken_languages status tagline title vote_average vote_count
0 237000000 [{"id": 28, "name": "Action"}, {"id": 12, "nam... http://www.avatarmovie.com/ 19995 [{"id": 1463, "name": "culture clash"}, {"id":... en Avatar In the 22nd century, a paraplegic Marine is di... 150.437577 [{"name": "Ingenious Film Partners", "id": 289... [{"iso_3166_1": "US", "name": "United States o... 2009-12-10 2787965087 162.0 [{"iso_639_1": "en", "name": "English"}, {"iso... Released Enter the World of Pandora. Avatar 7.2 11800
1 300000000 [{"id": 12, "name": "Adventure"}, {"id": 14, "... http://disney.go.com/disneypictures/pirates/ 285 [{"id": 270, "name": "ocean"}, {"id": 726, "na... en Pirates of the Caribbean: At World's End Captain Barbossa, long believed to be dead, ha... 139.082615 [{"name": "Walt Disney Pictures", "id": 2}, {"... [{"iso_3166_1": "US", "name": "United States o... 2007-05-19 961000000 169.0 [{"iso_639_1": "en", "name": "English"}] Released At the end of the world, the adventure begins. Pirates of the Caribbean: At World's End 6.9 4500
2 245000000 [{"id": 28, "name": "Action"}, {"id": 12, "nam... http://www.sonypictures.com/movies/spectre/ 206647 [{"id": 470, "name": "spy"}, {"id": 818, "name... en Spectre A cryptic message from Bond’s past sends him o... 107.376788 [{"name": "Columbia Pictures", "id": 5}, {"nam... [{"iso_3166_1": "GB", "name": "United Kingdom"... 2015-10-26 880674609 148.0 [{"iso_639_1": "fr", "name": "Fran\u00e7ais"},... Released A Plan No One Escapes Spectre 6.3 4466

Goal Two


In [8]:
print( df[ 'genres' ].head( 3 ) )


0    [{"id": 28, "name": "Action"}, {"id": 12, "nam...
1    [{"id": 12, "name": "Adventure"}, {"id": 14, "...
2    [{"id": 28, "name": "Action"}, {"id": 12, "nam...
Name: genres, dtype: object

In [9]:
def get_unique_genres():
    genre_set = set()
    for index, row in df[ 'genres' ].iteritems():
        df_this_genre = pd.DataFrame( eval( str( df.loc[ index, 'genres' ] ) ) )
        df_length = len( df_this_genre )
        for i in range( 0, df_length ):
            specific_genre = df_this_genre.ix[ i, 'name' ]
            if specific_genre not in genre_set:
                genre_set.add( specific_genre )

    return genre_set

In [10]:
Genres_lst = list( get_unique_genres() )

In [11]:
for element in Genres_lst:
    df[ element ] = False

In [12]:
df.columns


Out[12]:
Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'vote_average',
       'vote_count', 'History', 'Drama', 'Science Fiction', 'Foreign',
       'TV Movie', 'Comedy', 'Action', 'Adventure', 'Family', 'Horror',
       'Documentary', 'Romance', 'Music', 'Western', 'War', 'Animation',
       'Thriller', 'Crime', 'Mystery', 'Fantasy'],
      dtype='object')

In [13]:
for index, row in df[ 'genres' ].iteritems():
    this_movie_genre_list = []
    df_this_genre = pd.DataFrame( eval( str( df.loc[ index, 'genres' ] ) ) )
    df_length = len( df_this_genre )
    for i in range( 0, df_length ):
        specific_genre = df_this_genre.ix[ i, 'name' ]
        this_movie_genre_list.append( specific_genre )
    df.loc[ index, this_movie_genre_list ] = True

In [14]:
df.head( 5 )


Out[14]:
budget genres homepage id keywords original_language original_title overview popularity production_companies ... Documentary Romance Music Western War Animation Thriller Crime Mystery Fantasy
0 237000000 [{"id": 28, "name": "Action"}, {"id": 12, "nam... http://www.avatarmovie.com/ 19995 [{"id": 1463, "name": "culture clash"}, {"id":... en Avatar In the 22nd century, a paraplegic Marine is di... 150.437577 [{"name": "Ingenious Film Partners", "id": 289... ... False False False False False False False False False True
1 300000000 [{"id": 12, "name": "Adventure"}, {"id": 14, "... http://disney.go.com/disneypictures/pirates/ 285 [{"id": 270, "name": "ocean"}, {"id": 726, "na... en Pirates of the Caribbean: At World's End Captain Barbossa, long believed to be dead, ha... 139.082615 [{"name": "Walt Disney Pictures", "id": 2}, {"... ... False False False False False False False False False True
2 245000000 [{"id": 28, "name": "Action"}, {"id": 12, "nam... http://www.sonypictures.com/movies/spectre/ 206647 [{"id": 470, "name": "spy"}, {"id": 818, "name... en Spectre A cryptic message from Bond’s past sends him o... 107.376788 [{"name": "Columbia Pictures", "id": 5}, {"nam... ... False False False False False False False True False False
3 250000000 [{"id": 28, "name": "Action"}, {"id": 80, "nam... http://www.thedarkknightrises.com/ 49026 [{"id": 849, "name": "dc comics"}, {"id": 853,... en The Dark Knight Rises Following the death of District Attorney Harve... 112.312950 [{"name": "Legendary Pictures", "id": 923}, {"... ... False False False False False False True True False False
4 260000000 [{"id": 28, "name": "Action"}, {"id": 12, "nam... http://movies.disney.com/john-carter 49529 [{"id": 818, "name": "based on novel"}, {"id":... en John Carter John Carter is a war-weary, former military ca... 43.926995 [{"name": "Walt Disney Pictures", "id": 2}] ... False False False False False False False False False False

5 rows × 40 columns


In [15]:
df[ Genres_lst ].head( 5 )


Out[15]:
History Drama Science Fiction Foreign TV Movie Comedy Action Adventure Family Horror Documentary Romance Music Western War Animation Thriller Crime Mystery Fantasy
0 False False True False False False True True False False False False False False False False False False False True
1 False False False False False False True True False False False False False False False False False False False True
2 False False False False False False True True False False False False False False False False False True False False
3 False True False False False False True False False False False False False False False False True True False False
4 False False True False False False True True False False False False False False False False False False False False

In [ ]:

Goal Three


In [20]:
import pandas as pd
df3 = pd.read_csv( fileName )

In [21]:
df3.head( 3 )


Out[21]:
budget genres homepage id keywords original_language original_title overview popularity production_companies production_countries release_date revenue runtime spoken_languages status tagline title vote_average vote_count
0 237000000 [{"id": 28, "name": "Action"}, {"id": 12, "nam... http://www.avatarmovie.com/ 19995 [{"id": 1463, "name": "culture clash"}, {"id":... en Avatar In the 22nd century, a paraplegic Marine is di... 150.437577 [{"name": "Ingenious Film Partners", "id": 289... [{"iso_3166_1": "US", "name": "United States o... 2009-12-10 2787965087 162.0 [{"iso_639_1": "en", "name": "English"}, {"iso... Released Enter the World of Pandora. Avatar 7.2 11800
1 300000000 [{"id": 12, "name": "Adventure"}, {"id": 14, "... http://disney.go.com/disneypictures/pirates/ 285 [{"id": 270, "name": "ocean"}, {"id": 726, "na... en Pirates of the Caribbean: At World's End Captain Barbossa, long believed to be dead, ha... 139.082615 [{"name": "Walt Disney Pictures", "id": 2}, {"... [{"iso_3166_1": "US", "name": "United States o... 2007-05-19 961000000 169.0 [{"iso_639_1": "en", "name": "English"}] Released At the end of the world, the adventure begins. Pirates of the Caribbean: At World's End 6.9 4500
2 245000000 [{"id": 28, "name": "Action"}, {"id": 12, "nam... http://www.sonypictures.com/movies/spectre/ 206647 [{"id": 470, "name": "spy"}, {"id": 818, "name... en Spectre A cryptic message from Bond’s past sends him o... 107.376788 [{"name": "Columbia Pictures", "id": 5}, {"nam... [{"iso_3166_1": "GB", "name": "United Kingdom"... 2015-10-26 880674609 148.0 [{"iso_639_1": "fr", "name": "Fran\u00e7ais"},... Released A Plan No One Escapes Spectre 6.3 4466

In [22]:
print( df3.columns )


Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'vote_average',
       'vote_count'],
      dtype='object')

In [23]:
data3 = []

In [24]:
for index, row in df3.iterrows():
    for item in eval( df3.ix[ index, 'genres' ] ):
        t_d = dict( item )
        data3.append( { 'budget'               : row[ 'budget'               ],
                        'Specific_genre'       : t_d[ 'name'                 ],
                        'homepage'             : row[ 'homepage'             ],
                        'id'                   : row[ 'id'                   ],
                        'keywords'             : row[ 'keywords'             ],
                        'original_language'    : row[ 'original_language'    ],
                        'original_title'       : row[ 'original_title'       ],
                        'overview'             : row[ 'overview'             ],
                        'popularity'           : row[ 'popularity'           ],
                        'production_companies' : row[ 'production_companies' ],
                        'production_countries' : row[ 'production_countries' ],
                        'release_date'         : row[ 'release_date'         ],
                        'revenue'              : row[ 'release_date'         ],
                        'runtime'              : row[ 'runtime'              ],
                        'spoken_languages'     : row[ 'spoken_languages'     ],
                        'status'               : row[ 'status'               ],
                        'tagline'              : row[ 'tagline'              ],
                        'title'                : row[ 'title'                ],
                        'vote_average'         : row[ 'vote_average'         ],
                        'vote_count'           : row[ 'vote_count'           ] } )

In [25]:
df3 = pd.DataFrame( data3 )

In [26]:
df3.head( 10 )


Out[26]:
Specific_genre budget homepage id keywords original_language original_title overview popularity production_companies production_countries release_date revenue runtime spoken_languages status tagline title vote_average vote_count
0 Action 237000000 http://www.avatarmovie.com/ 19995 [{"id": 1463, "name": "culture clash"}, {"id":... en Avatar In the 22nd century, a paraplegic Marine is di... 150.437577 [{"name": "Ingenious Film Partners", "id": 289... [{"iso_3166_1": "US", "name": "United States o... 2009-12-10 2009-12-10 162.0 [{"iso_639_1": "en", "name": "English"}, {"iso... Released Enter the World of Pandora. Avatar 7.2 11800
1 Adventure 237000000 http://www.avatarmovie.com/ 19995 [{"id": 1463, "name": "culture clash"}, {"id":... en Avatar In the 22nd century, a paraplegic Marine is di... 150.437577 [{"name": "Ingenious Film Partners", "id": 289... [{"iso_3166_1": "US", "name": "United States o... 2009-12-10 2009-12-10 162.0 [{"iso_639_1": "en", "name": "English"}, {"iso... Released Enter the World of Pandora. Avatar 7.2 11800
2 Fantasy 237000000 http://www.avatarmovie.com/ 19995 [{"id": 1463, "name": "culture clash"}, {"id":... en Avatar In the 22nd century, a paraplegic Marine is di... 150.437577 [{"name": "Ingenious Film Partners", "id": 289... [{"iso_3166_1": "US", "name": "United States o... 2009-12-10 2009-12-10 162.0 [{"iso_639_1": "en", "name": "English"}, {"iso... Released Enter the World of Pandora. Avatar 7.2 11800
3 Science Fiction 237000000 http://www.avatarmovie.com/ 19995 [{"id": 1463, "name": "culture clash"}, {"id":... en Avatar In the 22nd century, a paraplegic Marine is di... 150.437577 [{"name": "Ingenious Film Partners", "id": 289... [{"iso_3166_1": "US", "name": "United States o... 2009-12-10 2009-12-10 162.0 [{"iso_639_1": "en", "name": "English"}, {"iso... Released Enter the World of Pandora. Avatar 7.2 11800
4 Adventure 300000000 http://disney.go.com/disneypictures/pirates/ 285 [{"id": 270, "name": "ocean"}, {"id": 726, "na... en Pirates of the Caribbean: At World's End Captain Barbossa, long believed to be dead, ha... 139.082615 [{"name": "Walt Disney Pictures", "id": 2}, {"... [{"iso_3166_1": "US", "name": "United States o... 2007-05-19 2007-05-19 169.0 [{"iso_639_1": "en", "name": "English"}] Released At the end of the world, the adventure begins. Pirates of the Caribbean: At World's End 6.9 4500
5 Fantasy 300000000 http://disney.go.com/disneypictures/pirates/ 285 [{"id": 270, "name": "ocean"}, {"id": 726, "na... en Pirates of the Caribbean: At World's End Captain Barbossa, long believed to be dead, ha... 139.082615 [{"name": "Walt Disney Pictures", "id": 2}, {"... [{"iso_3166_1": "US", "name": "United States o... 2007-05-19 2007-05-19 169.0 [{"iso_639_1": "en", "name": "English"}] Released At the end of the world, the adventure begins. Pirates of the Caribbean: At World's End 6.9 4500
6 Action 300000000 http://disney.go.com/disneypictures/pirates/ 285 [{"id": 270, "name": "ocean"}, {"id": 726, "na... en Pirates of the Caribbean: At World's End Captain Barbossa, long believed to be dead, ha... 139.082615 [{"name": "Walt Disney Pictures", "id": 2}, {"... [{"iso_3166_1": "US", "name": "United States o... 2007-05-19 2007-05-19 169.0 [{"iso_639_1": "en", "name": "English"}] Released At the end of the world, the adventure begins. Pirates of the Caribbean: At World's End 6.9 4500
7 Action 245000000 http://www.sonypictures.com/movies/spectre/ 206647 [{"id": 470, "name": "spy"}, {"id": 818, "name... en Spectre A cryptic message from Bond’s past sends him o... 107.376788 [{"name": "Columbia Pictures", "id": 5}, {"nam... [{"iso_3166_1": "GB", "name": "United Kingdom"... 2015-10-26 2015-10-26 148.0 [{"iso_639_1": "fr", "name": "Fran\u00e7ais"},... Released A Plan No One Escapes Spectre 6.3 4466
8 Adventure 245000000 http://www.sonypictures.com/movies/spectre/ 206647 [{"id": 470, "name": "spy"}, {"id": 818, "name... en Spectre A cryptic message from Bond’s past sends him o... 107.376788 [{"name": "Columbia Pictures", "id": 5}, {"nam... [{"iso_3166_1": "GB", "name": "United Kingdom"... 2015-10-26 2015-10-26 148.0 [{"iso_639_1": "fr", "name": "Fran\u00e7ais"},... Released A Plan No One Escapes Spectre 6.3 4466
9 Crime 245000000 http://www.sonypictures.com/movies/spectre/ 206647 [{"id": 470, "name": "spy"}, {"id": 818, "name... en Spectre A cryptic message from Bond’s past sends him o... 107.376788 [{"name": "Columbia Pictures", "id": 5}, {"nam... [{"iso_3166_1": "GB", "name": "United Kingdom"... 2015-10-26 2015-10-26 148.0 [{"iso_639_1": "fr", "name": "Fran\u00e7ais"},... Released A Plan No One Escapes Spectre 6.3 4466

In [ ]:


In [ ]:


In [ ]: