Goal One



In [2]:

    
## Gnani Beldhari

## Goal One

## Injest a csv file as pure text, then as list of lines, and then as a data frame.



In [1]:

    
import csv
import numpy as np
import pandas as pd
import json



In [2]:

    
# Text - Read csv file, iterating through the lines and append the text to the string type

movielistString = ''
with open('tmdb_5000_movies.csv',mode='r' ,encoding='utf8') as f:    
    for line in f:
        movielistString += line        

print(movielistString[:1000])









    



budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""name"": ""Fantasy""}, {""id"": 878, ""name"": ""Science Fiction""}]",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"": 2964, ""name"": ""future""}, {""id"": 3386, ""name"": ""space war""}, {""id"": 3388, ""name"": ""space colony""}, {""id"": 3679, ""name"": ""society""}, {""id"": 3801, ""name"": ""space travel""}, {""id"": 9685, ""name"": ""futuristic""}, {""id"": 9840, ""name"": ""romance""}, {""id"": 9882, ""name"": ""space""}, {""id"": 9951, ""name"": ""alien""}, {""id"": 10148, ""name"": ""tribe""}, {""id"": 10158, ""name"": ""alien planet""}, {""id"": 10987, ""name"": ""cgi""}, {""id"": 11399, ""name"": ""marine""}, {""id"":



In [9]:

    
# List - Read csv file, iterating through each line and adding the lines to the List

movieList = []
with open('tmdb_5000_movies.csv',mode='r',encoding='utf8') as f:
    for line in f:
        movieList.append(line.replace('\n',''))      

movieList[1]









    Out[9]:





'237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""name"": ""Fantasy""}, {""id"": 878, ""name"": ""Science Fiction""}]",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"": 2964, ""name"": ""future""}, {""id"": 3386, ""name"": ""space war""}, {""id"": 3388, ""name"": ""space colony""}, {""id"": 3679, ""name"": ""society""}, {""id"": 3801, ""name"": ""space travel""}, {""id"": 9685, ""name"": ""futuristic""}, {""id"": 9840, ""name"": ""romance""}, {""id"": 9882, ""name"": ""space""}, {""id"": 9951, ""name"": ""alien""}, {""id"": 10148, ""name"": ""tribe""}, {""id"": 10158, ""name"": ""alien planet""}, {""id"": 10987, ""name"": ""cgi""}, {""id"": 11399, ""name"": ""marine""}, {""id"": 13065, ""name"": ""soldier""}, {""id"": 14643, ""name"": ""battle""}, {""id"": 14720, ""name"": ""love affair""}, {""id"": 165431, ""name"": ""anti war""}, {""id"": 193554, ""name"": ""power relations""}, {""id"": 206690, ""name"": ""mind and soul""}, {""id"": 209714, ""name"": ""3d""}]",en,Avatar,"In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization.",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289}, {""name"": ""Twentieth Century Fox Film Corporation"", ""id"": 306}, {""name"": ""Dune Entertainment"", ""id"": 444}, {""name"": ""Lightstorm Entertainment"", ""id"": 574}]","[{""iso_3166_1"": ""US"", ""name"": ""United States of America""}, {""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""}]",12/10/2009,2787965087,162,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso_639_1"": ""es"", ""name"": ""Espa\\u00f1ol""}]",Released,Enter the World of Pandora.,Avatar,7.2,11800'



In [38]:

    
# Data Frame - Use Panda's read_csv function to load the csv file directly to the dataFrame.

df_movielist = pd.read_csv('tmdb_5000_movies.csv')
df_movielist.head(2)









    Out[38]:






  
    
      
      budget
      genres
      homepage
      id
      keywords
      original_language
      original_title
      overview
      popularity
      production_companies
      production_countries
      release_date
      revenue
      runtime
      spoken_languages
      status
      tagline
      title
      vote_average
      vote_count
    
  
  
    
      0
      237000000
      [{"id": 28, "name": "Action"}, {"id": 12, "nam...
      http://www.avatarmovie.com/
      19995
      [{"id": 1463, "name": "culture clash"}, {"id":...
      en
      Avatar
      In the 22nd century, a paraplegic Marine is di...
      150.437577
      [{"name": "Ingenious Film Partners", "id": 289...
      [{"iso_3166_1": "US", "name": "United States o...
      12/10/2009
      2787965087
      162.0
      [{"iso_639_1": "en", "name": "English"}, {"iso...
      Released
      Enter the World of Pandora.
      Avatar
      7.2
      11800
    
    
      1
      300000000
      [{"id": 12, "name": "Adventure"}, {"id": 14, "...
      http://disney.go.com/disneypictures/pirates/
      285
      [{"id": 270, "name": "ocean"}, {"id": 726, "na...
      en
      Pirates of the Caribbean: At World's End
      Captain Barbossa, long believed to be dead, ha...
      139.082615
      [{"name": "Walt Disney Pictures", "id": 2}, {"...
      [{"iso_3166_1": "US", "name": "United States o...
      5/19/2007
      961000000
      169.0
      [{"iso_639_1": "en", "name": "English"}]
      Released
      At the end of the world, the adventure begins.
      Pirates of the Caribbean: At World's End
      6.9
      4500

Goal Two - Genres



In [13]:

    
## Goal Two

## Right now, the file is in a 'narrow' format. In other words, several interesting bits are collapsed into a single field. Let's attempt to make the data frame a 'wide' format. All the collapsed items expanded horizontally



In [30]:

    
# Read the subset of columns[id, title, genres] to work with genres data

df_genres = df_movielist[['title','genres']].copy()                       
print(df_genres.shape)
df_genres.head()









    



(4803, 2)






    Out[30]:






  
    
      
      title
      genres
    
  
  
    
      0
      Avatar
      [{"id": 28, "name": "Action"}, {"id": 12, "nam...
    
    
      1
      Pirates of the Caribbean: At World's End
      [{"id": 12, "name": "Adventure"}, {"id": 14, "...
    
    
      2
      Spectre
      [{"id": 28, "name": "Action"}, {"id": 12, "nam...
    
    
      3
      The Dark Knight Rises
      [{"id": 28, "name": "Action"}, {"id": 80, "nam...
    
    
      4
      John Carter
      [{"id": 28, "name": "Action"}, {"id": 12, "nam...



In [31]:

    
# Convert josn formatted data to text, and assign just the list of values to the genres column in the dataframe.

df_genres['genres'] = df_genres['genres'].apply(pd.read_json)
df_genres['genres'] = df_genres['genres'].apply(lambda x : '' if(x.empty) else x['name'].values )
df_genres.head()









    Out[31]:






  
    
      
      title
      genres
    
  
  
    
      0
      Avatar
      [Action, Adventure, Fantasy, Science Fiction]
    
    
      1
      Pirates of the Caribbean: At World's End
      [Adventure, Fantasy, Action]
    
    
      2
      Spectre
      [Action, Adventure, Crime]
    
    
      3
      The Dark Knight Rises
      [Action, Crime, Drama, Thriller]
    
    
      4
      John Carter
      [Action, Adventure, Science Fiction]



In [32]:

    
# Define a method to get the list of unique values from a panda dataframe column(series)

def get_unique(df,col_name):    
    col_List = []
    [[col_List.append(val) for val in gname] for gname in df[col_name]]
    col_unique_list = np.unique(col_List)
    return col_unique_list



In [33]:

    
#Get the list of unique columns

unique_cols = get_unique(df_genres,'genres')
unique_cols









    Out[33]:





array(['Action', 'Adventure', 'Animation', 'Comedy', 'Crime',
       'Documentary', 'Drama', 'Family', 'Fantasy', 'Foreign', 'History',
       'Horror', 'Music', 'Mystery', 'Romance', 'Science Fiction',
       'TV Movie', 'Thriller', 'War', 'Western'], 
      dtype='<U15')



In [34]:

    
# Create a new dataframe with the list of unique columns and map the values

df_genres_temp  = pd.DataFrame(columns=unique_cols)
[[df_genres_temp.set_value(key,col=genre,value=1) for genre in value] for key, value in df_genres['genres'].iteritems()]
#df_genres_temp.fillna(0,inplace=True)
print(df_genres_temp.shape)
df_genres_temp.head()









    



(4775, 20)






    Out[34]:






  
    
      
      Action
      Adventure
      Animation
      Comedy
      Crime
      Documentary
      Drama
      Family
      Fantasy
      Foreign
      History
      Horror
      Music
      Mystery
      Romance
      Science Fiction
      TV Movie
      Thriller
      War
      Western
    
  
  
    
      0
      1
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
      NaN
      NaN
      NaN
    
    
      1
      1
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      1
      1
      NaN
      NaN
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      3
      1
      NaN
      NaN
      NaN
      1
      NaN
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
      NaN
    
    
      4
      1
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
      NaN
      NaN
      NaN



In [36]:

    
# Add the new dataframe to the original dataframe

df_genres_wide = pd.concat([df_genres, df_genres_temp], axis=1, join_axes=[df_genres.index])

print(df_genres_wide.shape)
df_genres_wide.head()









    



(4803, 22)






    Out[36]:






  
    
      
      title
      genres
      Action
      Adventure
      Animation
      Comedy
      Crime
      Documentary
      Drama
      Family
      ...
      History
      Horror
      Music
      Mystery
      Romance
      Science Fiction
      TV Movie
      Thriller
      War
      Western
    
  
  
    
      0
      Avatar
      [Action, Adventure, Fantasy, Science Fiction]
      1
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
      NaN
      NaN
      NaN
    
    
      1
      Pirates of the Caribbean: At World's End
      [Adventure, Fantasy, Action]
      1
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      Spectre
      [Action, Adventure, Crime]
      1
      1
      NaN
      NaN
      1
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      3
      The Dark Knight Rises
      [Action, Crime, Drama, Thriller]
      1
      NaN
      NaN
      NaN
      1
      NaN
      1
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
      NaN
    
    
      4
      John Carter
      [Action, Adventure, Science Fiction]
      1
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
      NaN
      NaN
      NaN
    
  

5 rows × 22 columns

Goal Three



In [ ]:

    
# # Wide data sets are good for exploration, but 'long' data sets are good for training. 
# Let's attempt to expand all the collapsed field vertically instead of horizontally. 
# Does this result in data duplication? What do you think about that? Yes and No are both correct -- but what's the context?

# The long data sets result's may result in data duplication and repetetion, but it provides the simple and standardized way 
# of structuring a dataset which makes it easier for analysit/computer to extract needed variables.

# Hadley Wickham described Tidy data should have the followng characteristics.
# 1. Each variable forms a column
# 2. Each Observations forma a row
# 3. Each type of observation forms a table.

# The IMDB data set is untidy with multiple values and observations in a column(genres, production companies etc). 
# We can consider each of these columns as a observation units and can be saperated as tables for observation and analysis.



In [37]:

    
# Use Panda's melt function to display the list of column values as rows value
df_long_genres = pd.melt(frame=df_genres_wide,
                       id_vars = ['title','genres'],
                       var_name="genre_category",
                       value_name="is_type")

df_long_genres.fillna(0,inplace=True)
df_long_genres = df_long_genres.sort_values(by=['title'],ascending=False)
df_long_genres.head(30)









    Out[37]:






  
    
      
      title
      genres
      genre_category
      is_type
    
  
  
    
      29672
      Æon Flux
      [Action, Science Fiction]
      Drama
      0
    
    
      63293
      Æon Flux
      [Action, Science Fiction]
      Mystery
      0
    
    
      87308
      Æon Flux
      [Action, Science Fiction]
      War
      0
    
    
      20066
      Æon Flux
      [Action, Science Fiction]
      Crime
      0
    
    
      72899
      Æon Flux
      [Action, Science Fiction]
      Science Fiction
      1
    
    
      24869
      Æon Flux
      [Action, Science Fiction]
      Documentary
      0
    
    
      10460
      Æon Flux
      [Action, Science Fiction]
      Animation
      0
    
    
      44081
      Æon Flux
      [Action, Science Fiction]
      Foreign
      0
    
    
      58490
      Æon Flux
      [Action, Science Fiction]
      Music
      0
    
    
      5657
      Æon Flux
      [Action, Science Fiction]
      Adventure
      0
    
    
      15263
      Æon Flux
      [Action, Science Fiction]
      Comedy
      0
    
    
      53687
      Æon Flux
      [Action, Science Fiction]
      Horror
      0
    
    
      77702
      Æon Flux
      [Action, Science Fiction]
      TV Movie
      0
    
    
      34475
      Æon Flux
      [Action, Science Fiction]
      Family
      0
    
    
      82505
      Æon Flux
      [Action, Science Fiction]
      Thriller
      0
    
    
      39278
      Æon Flux
      [Action, Science Fiction]
      Fantasy
      0
    
    
      68096
      Æon Flux
      [Action, Science Fiction]
      Romance
      0
    
    
      854
      Æon Flux
      [Action, Science Fiction]
      Action
      1
    
    
      92111
      Æon Flux
      [Action, Science Fiction]
      Western
      0
    
    
      48884
      Æon Flux
      [Action, Science Fiction]
      History
      0
    
    
      68009
      xXx: State of the Union
      [Action, Adventure, Crime, Drama, Mystery, Thr...
      Romance
      0
    
    
      24782
      xXx: State of the Union
      [Action, Adventure, Crime, Drama, Mystery, Thr...
      Documentary
      0
    
    
      15176
      xXx: State of the Union
      [Action, Adventure, Crime, Drama, Mystery, Thr...
      Comedy
      0
    
    
      43994
      xXx: State of the Union
      [Action, Adventure, Crime, Drama, Mystery, Thr...
      Foreign
      0
    
    
      39191
      xXx: State of the Union
      [Action, Adventure, Crime, Drama, Mystery, Thr...
      Fantasy
      0
    
    
      10373
      xXx: State of the Union
      [Action, Adventure, Crime, Drama, Mystery, Thr...
      Animation
      0
    
    
      34388
      xXx: State of the Union
      [Action, Adventure, Crime, Drama, Mystery, Thr...
      Family
      0
    
    
      29585
      xXx: State of the Union
      [Action, Adventure, Crime, Drama, Mystery, Thr...
      Drama
      1
    
    
      767
      xXx: State of the Union
      [Action, Adventure, Crime, Drama, Mystery, Thr...
      Action
      1
    
    
      48797
      xXx: State of the Union
      [Action, Adventure, Crime, Drama, Mystery, Thr...
      History
      0

Production Companies



In [39]:

    
# Read the subset of columns[id, title, genres] to work with genres data

df_pcompany = df_movielist[['title','production_companies']].copy()                       
print(df_pcompany.shape)

# Convert josn formatted data to text, and assign just the list of values to the genres column in the dataframe.

df_pcompany['production_companies'] = df_pcompany['production_companies'].apply(pd.read_json)
df_pcompany['production_companies'] = df_pcompany['production_companies'].apply(lambda x : '' if(x.empty) else x['name'].values )
df_pcompany.head()









    



(4803, 2)






    Out[39]:






  
    
      
      title
      production_companies
    
  
  
    
      0
      Avatar
      [Ingenious Film Partners, Twentieth Century Fo...
    
    
      1
      Pirates of the Caribbean: At World's End
      [Walt Disney Pictures, Jerry Bruckheimer Films...
    
    
      2
      Spectre
      [Columbia Pictures, Danjaq, B24]
    
    
      3
      The Dark Knight Rises
      [Legendary Pictures, Warner Bros., DC Entertai...
    
    
      4
      John Carter
      [Walt Disney Pictures]



In [44]:

    
#Get the list of unique columns

unique_cols = get_unique(df_pcompany,'production_companies')
unique_cols

# Create a new dataframe with the list of unique columns and map the values

df_pcompany_temp  = pd.DataFrame(columns=unique_cols)
[[df_pcompany_temp.set_value(key,col=genre,value=1) for genre in value] for key, value in df_pcompany['production_companies'].iteritems()]
#df_genres_temp.fillna(0,inplace=True)
print(df_pcompany_temp.shape)

# Add the new dataframe to the original dataframe

df_pcompany_wide = pd.concat([df_pcompany, df_pcompany_temp], axis=1, join_axes=[df_pcompany.index])

print(df_pcompany_wide.shape)
df_pcompany_wide.head()









    



(4452, 5017)
(4803, 5019)






    Out[44]:






  
    
      
      title
      production_companies
      "DIA" Productions GmbH & Co. KG
      1.85 Films
      10 West Studios
      100 Bares
      1019 Entertainment
      101st Street Films
      10th Hole Productions
      120 Films
      ...
      rusty bear entertainment
      thefyzz
      thinkfilm
      uFilm
      unafilm
      verture Films
      warner bross Turkey
      winchester films
      África Filmes
      Österreichischer Rundfunk (ORF)
    
  
  
    
      0
      Avatar
      [Ingenious Film Partners, Twentieth Century Fo...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      Pirates of the Caribbean: At World's End
      [Walt Disney Pictures, Jerry Bruckheimer Films...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      Spectre
      [Columbia Pictures, Danjaq, B24]
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      3
      The Dark Knight Rises
      [Legendary Pictures, Warner Bros., DC Entertai...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      4
      John Carter
      [Walt Disney Pictures]
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
  

5 rows × 5019 columns



In [59]:

    
# Use Panda's melt function to display the list of column values as rows value
df_long_pcompany = pd.melt(frame=df_pcompany_wide,
                       id_vars = ['title','production_companies'],
                       var_name="produced_by",
                       value_name="is_from")

df_long_pcompany.fillna(0,inplace=True)
df_long_pcompany.head()
df_sub_pcompany = df_long_pcompany[df_long_pcompany.is_from==1]
df_sub_pcompany.set_index('title')
df_sub_pcompany[['title','produced_by']].sort_values(by=['title'],ascending=True)









    Out[59]:






  
    
      
      title
      produced_by
    
  
  
    
      465345
      #Horror
      AST Studios
    
    
      12732207
      #Horror
      Lowland Pictures
    
    
      23273874
      (500) Days of Summer
      Watermark
    
    
      8158833
      (500) Days of Summer
      Fox Searchlight Pictures
    
    
      6324087
      (500) Days of Summer
      Dune Entertainment III
    
    
      16151242
      10 Cloverfield Lane
      Paramount Pictures
    
    
      19825537
      10 Cloverfield Lane
      Spectrum Effects
    
    
      2044831
      10 Cloverfield Lane
      Bad Robot
    
    
      13042884
      10 Things I Hate About You
      Mad Chance
    
    
      11102472
      10 Things I Hate About You
      Jaret Entertainment
    
    
      21865995
      10 Things I Hate About You
      Touchstone Pictures
    
    
      23131641
      102 Dalmatians
      Walt Disney Pictures
    
    
      5182830
      102 Dalmatians
      Cruella Productions
    
    
      21525536
      10th & Wolf
      Thinkfilm
    
    
      12880347
      11:14
      MDP Worldwide
    
    
      7846803
      11:14
      Firm Films
    
    
      13644024
      11:14
      Media 8 Entertainment
    
    
      15753639
      12 Angry Men
      Orion-Nova Productions
    
    
      22439415
      12 Angry Men
      United Artists
    
    
      103070
      12 Rounds
      20th Century Fox Home Entertainment
    
    
      23066213
      12 Rounds
      WWE Studios
    
    
      8128883
      12 Rounds
      Fox Atomic
    
    
      13926104
      12 Rounds
      Midnight Sun Pictures
    
    
      21361148
      12 Rounds
      The Mark Gordon Company
    
    
      7595648
      12 Years a Slave
      Film4
    
    
      14910617
      12 Years a Slave
      New Regency Pictures
    
    
      16682924
      12 Years a Slave
      Plan B Entertainment
    
    
      17729978
      12 Years a Slave
      Regency Enterprises
    
    
      18027764
      12 Years a Slave
      River Road Entertainment
    
    
      7595891
      127 Hours
      Film4
    
    
      ...
      ...
      ...
    
    
      17878217
      Zoom
      Revolution Studios
    
    
      4397247
      Zulu
      Ciné+
    
    
      6909216
      Zulu
      Eskwad
    
    
      3532707
      Zulu
      Canal+
    
    
      23008872
      Zulu
      W9
    
    
      16313490
      Zulu
      Pathé
    
    
      12600771
      Zulu
      Lobster Tree
    
    
      12840921
      Zulu
      M6 Films
    
    
      7655245
      [REC]
      Filmax
    
    
      7654703
      [REC]²
      Filmax
    
    
      21284177
      eXistenZ
      The Harold Greenberg Fund
    
    
      22412882
      eXistenZ
      Union Générale Cinématographique
    
    
      21385040
      eXistenZ
      The Movie Network
    
    
      857018
      eXistenZ
      Alliance Atlantis Communications
    
    
      18964328
      eXistenZ
      Serendipity Point Films
    
    
      3513077
      eXistenZ
      Canadian Television Fund
    
    
      14752097
      eXistenZ
      Natural Nylon Entertainment
    
    
      22268792
      eXistenZ
      Téléfilm Canada
    
    
      15725590
      xXx
      Original Film
    
    
      17877334
      xXx
      Revolution Studios
    
    
      4697902
      xXx
      Columbia Pictures
    
    
      17877533
      xXx: State of the Union
      Revolution Studios
    
    
      4702904
      xXx: State of the Union
      Columbia Pictures Corporation
    
    
      15725789
      xXx: State of the Union
      Original Film
    
    
      13012181
      Æon Flux
      MTV Films
    
    
      22613378
      Æon Flux
      Valhalla Motion Pictures
    
    
      11984339
      Æon Flux
      Lakeshore Entertainment
    
    
      13016984
      Æon Flux
      MTV Productions
    
    
      4693385
      Æon Flux
      Colossal Pictures
    
    
      16148540
      Æon Flux
      Paramount Pictures
    
  

13677 rows × 2 columns



In [ ]:

	budget	genres	homepage	id	keywords	original_language	original_title	overview	popularity	production_companies	production_countries	release_date	revenue	runtime	spoken_languages	status	tagline	title	vote_average	vote_count
0	237000000	[{"id": 28, "name": "Action"}, {"id": 12, "nam...	http://www.avatarmovie.com/	19995	[{"id": 1463, "name": "culture clash"}, {"id":...	en	Avatar	In the 22nd century, a paraplegic Marine is di...	150.437577	[{"name": "Ingenious Film Partners", "id": 289...	[{"iso_3166_1": "US", "name": "United States o...	12/10/2009	2787965087	162.0	[{"iso_639_1": "en", "name": "English"}, {"iso...	Released	Enter the World of Pandora.	Avatar	7.2	11800
1	300000000	[{"id": 12, "name": "Adventure"}, {"id": 14, "...	http://disney.go.com/disneypictures/pirates/	285	[{"id": 270, "name": "ocean"}, {"id": 726, "na...	en	Pirates of the Caribbean: At World's End	Captain Barbossa, long believed to be dead, ha...	139.082615	[{"name": "Walt Disney Pictures", "id": 2}, {"...	[{"iso_3166_1": "US", "name": "United States o...	5/19/2007	961000000	169.0	[{"iso_639_1": "en", "name": "English"}]	Released	At the end of the world, the adventure begins.	Pirates of the Caribbean: At World's End	6.9	4500

	title	genres
0	Avatar	[Action, Adventure, Fantasy, Science Fiction]
1	Pirates of the Caribbean: At World's End	[Adventure, Fantasy, Action]
2	Spectre	[Action, Adventure, Crime]
3	The Dark Knight Rises	[Action, Crime, Drama, Thriller]
4	John Carter	[Action, Adventure, Science Fiction]

	Action	Adventure	Animation	Comedy	Crime	Documentary	Drama	Family	Fantasy	Foreign	History	Horror	Music	Mystery	Romance	Science Fiction	TV Movie	Thriller	War	Western
0	1	1	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN	NaN	NaN
1	1	1	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	1	1	NaN	NaN	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	1	NaN	NaN	NaN	1	NaN	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN
4	1	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN	NaN	NaN

	title	genres	genre_category	is_type
29672	Æon Flux	[Action, Science Fiction]	Drama	0
63293	Æon Flux	[Action, Science Fiction]	Mystery	0
87308	Æon Flux	[Action, Science Fiction]	War	0
20066	Æon Flux	[Action, Science Fiction]	Crime	0
72899	Æon Flux	[Action, Science Fiction]	Science Fiction	1
24869	Æon Flux	[Action, Science Fiction]	Documentary	0
10460	Æon Flux	[Action, Science Fiction]	Animation	0
44081	Æon Flux	[Action, Science Fiction]	Foreign	0
58490	Æon Flux	[Action, Science Fiction]	Music	0
5657	Æon Flux	[Action, Science Fiction]	Adventure	0
15263	Æon Flux	[Action, Science Fiction]	Comedy	0
53687	Æon Flux	[Action, Science Fiction]	Horror	0
77702	Æon Flux	[Action, Science Fiction]	TV Movie	0
34475	Æon Flux	[Action, Science Fiction]	Family	0
82505	Æon Flux	[Action, Science Fiction]	Thriller	0
39278	Æon Flux	[Action, Science Fiction]	Fantasy	0
68096	Æon Flux	[Action, Science Fiction]	Romance	0
854	Æon Flux	[Action, Science Fiction]	Action	1
92111	Æon Flux	[Action, Science Fiction]	Western	0
48884	Æon Flux	[Action, Science Fiction]	History	0
68009	xXx: State of the Union	[Action, Adventure, Crime, Drama, Mystery, Thr...	Romance	0
24782	xXx: State of the Union	[Action, Adventure, Crime, Drama, Mystery, Thr...	Documentary	0
15176	xXx: State of the Union	[Action, Adventure, Crime, Drama, Mystery, Thr...	Comedy	0
43994	xXx: State of the Union	[Action, Adventure, Crime, Drama, Mystery, Thr...	Foreign	0
39191	xXx: State of the Union	[Action, Adventure, Crime, Drama, Mystery, Thr...	Fantasy	0
10373	xXx: State of the Union	[Action, Adventure, Crime, Drama, Mystery, Thr...	Animation	0
34388	xXx: State of the Union	[Action, Adventure, Crime, Drama, Mystery, Thr...	Family	0
29585	xXx: State of the Union	[Action, Adventure, Crime, Drama, Mystery, Thr...	Drama	1
767	xXx: State of the Union	[Action, Adventure, Crime, Drama, Mystery, Thr...	Action	1
48797	xXx: State of the Union	[Action, Adventure, Crime, Drama, Mystery, Thr...	History	0

	title	production_companies
0	Avatar	[Ingenious Film Partners, Twentieth Century Fo...
1	Pirates of the Caribbean: At World's End	[Walt Disney Pictures, Jerry Bruckheimer Films...
2	Spectre	[Columbia Pictures, Danjaq, B24]
3	The Dark Knight Rises	[Legendary Pictures, Warner Bros., DC Entertai...
4	John Carter	[Walt Disney Pictures]

	title	produced_by
465345	#Horror	AST Studios
12732207	#Horror	Lowland Pictures
23273874	(500) Days of Summer	Watermark
8158833	(500) Days of Summer	Fox Searchlight Pictures
6324087	(500) Days of Summer	Dune Entertainment III
16151242	10 Cloverfield Lane	Paramount Pictures
19825537	10 Cloverfield Lane	Spectrum Effects
2044831	10 Cloverfield Lane	Bad Robot
13042884	10 Things I Hate About You	Mad Chance
11102472	10 Things I Hate About You	Jaret Entertainment
21865995	10 Things I Hate About You	Touchstone Pictures
23131641	102 Dalmatians	Walt Disney Pictures
5182830	102 Dalmatians	Cruella Productions
21525536	10th & Wolf	Thinkfilm
12880347	11:14	MDP Worldwide
7846803	11:14	Firm Films
13644024	11:14	Media 8 Entertainment
15753639	12 Angry Men	Orion-Nova Productions
22439415	12 Angry Men	United Artists
103070	12 Rounds	20th Century Fox Home Entertainment
23066213	12 Rounds	WWE Studios
8128883	12 Rounds	Fox Atomic
13926104	12 Rounds	Midnight Sun Pictures
21361148	12 Rounds	The Mark Gordon Company
7595648	12 Years a Slave	Film4
14910617	12 Years a Slave	New Regency Pictures
16682924	12 Years a Slave	Plan B Entertainment
17729978	12 Years a Slave	Regency Enterprises
18027764	12 Years a Slave	River Road Entertainment
7595891	127 Hours	Film4
...	...	...
17878217	Zoom	Revolution Studios
4397247	Zulu	Ciné+
6909216	Zulu	Eskwad
3532707	Zulu	Canal+
23008872	Zulu	W9
16313490	Zulu	Pathé
12600771	Zulu	Lobster Tree
12840921	Zulu	M6 Films
7655245	[REC]	Filmax
7654703	[REC]²	Filmax
21284177	eXistenZ	The Harold Greenberg Fund
22412882	eXistenZ	Union Générale Cinématographique
21385040	eXistenZ	The Movie Network
857018	eXistenZ	Alliance Atlantis Communications
18964328	eXistenZ	Serendipity Point Films
3513077	eXistenZ	Canadian Television Fund
14752097	eXistenZ	Natural Nylon Entertainment
22268792	eXistenZ	Téléfilm Canada
15725590	xXx	Original Film
17877334	xXx	Revolution Studios
4697902	xXx	Columbia Pictures
17877533	xXx: State of the Union	Revolution Studios
4702904	xXx: State of the Union	Columbia Pictures Corporation
15725789	xXx: State of the Union	Original Film
13012181	Æon Flux	MTV Films
22613378	Æon Flux	Valhalla Motion Pictures
11984339	Æon Flux	Lakeshore Entertainment
13016984	Æon Flux	MTV Productions
4693385	Æon Flux	Colossal Pictures
16148540	Æon Flux	Paramount Pictures

	Action	Adventure	Animation	Comedy	Crime	Documentary	Drama	Family	Fantasy	Foreign	History	Horror	Music	Mystery	Romance	Science Fiction	TV Movie	Thriller	War	Western
0	1	1	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN	NaN	NaN
1	1	1	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	1	1	NaN	NaN	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	1	NaN	NaN	NaN	1	NaN	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN
4	1	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN	NaN	NaN

	Action	Adventure	Animation	Comedy	Crime	Documentary	Drama	Family	Fantasy	Foreign	History	Horror	Music	Mystery	Romance	Science Fiction	TV Movie	Thriller	War	Western
0	1	1	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN	NaN	NaN
1	1	1	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	1	1	NaN	NaN	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	1	NaN	NaN	NaN	1	NaN	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN
4	1	1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN	NaN	NaN