Cleaning the dataset

Cleansing of the dataset: removal of unimportant features, NaN elements, parsing of some elements.



In [1]:

    
%matplotlib inline

import configparser
import os

import requests
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse, stats, spatial
import scipy.sparse.linalg
import statistics as stat
from sklearn import preprocessing, decomposition
import librosa
import IPython.display as ipd
import json

1. Merging dataframes

Merge the information from two separate datasets



In [7]:

    
all_movies_new =     pd.read_csv('350000-movies/AllMoviesDetailsCleaned.csv', sep=';', encoding='utf-8', low_memory=False,
                                 error_bad_lines=False)
all_movies_casting = pd.read_csv('350000-movies/AllMoviesCastingRaw.csv', sep=';', encoding='utf-8', low_memory=False,
                                 error_bad_lines=False)



In [8]:

    
print('There are {} movies'.format(len(all_movies_new)))
print('There are {} casting infos'.format(len(all_movies_casting)))









    



There are 329044 movies
There are 329044 casting infos



In [9]:

    
all_movies_casting.head(10)









    Out[9]:







  
    
      
      id
      actor1_name
      actor1_gender
      actor2_name
      actor2_gender
      actor3_name
      actor3_gender
      actor4_name
      actor4_gender
      actor5_name
      actor5_gender
      actor_number
      director_name
      director_gender
      director_number
      producer_name
      producer_number
      screeplay_name
      editor_name
    
  
  
    
      0
      2
      Turo Pajala
      0
      Susanna Haavisto
      0.0
      Matti Pellonpää
      2
      Eetu Hilkamo
      0
      none
      0
      4
      Aki Kaurismäki
      0.0
      1
      none
      0
      Aki Kaurismäki
      Raija Talvio
    
    
      1
      3
      Matti Pellonpää
      2
      Kati Outinen
      1.0
      Sakari Kuosmanen
      2
      Esko Nikkari
      2
      Kylli Köngäs
      0
      7
      Aki Kaurismäki
      0.0
      1
      Mika Kaurismäki
      1
      Aki Kaurismäki
      Raija Talvio
    
    
      2
      5
      Tim Roth
      2
      Antonio Banderas
      2.0
      Jennifer Beals
      1
      Madonna
      1
      Marisa Tomei
      1
      24
      Allison Anders
      1.0
      4
      Lawrence Bender
      1
      none
      Margaret Goodspeed
    
    
      3
      6
      Emilio Estevez
      2
      Cuba Gooding Jr.
      2.0
      Denis Leary
      2
      Jeremy Piven
      2
      Peter Greene
      2
      15
      Stephen Hopkins
      2.0
      1
      Gene Levy
      1
      Lewis Colick
      Tim Wellburn
    
    
      4
      8
      none
      0
      none
      0.0
      none
      0
      none
      0
      none
      0
      0
      Timo Novotny
      0.0
      1
      Timo Novotny
      2
      Michael Glawogger
      Timo Novotny
    
    
      5
      9
      Rita Lengyel
      1
      Milton Welsh
      2.0
      none
      0
      none
      0
      none
      0
      2
      Marc Meyer
      0.0
      2
      Marc Meyer
      1
      none
      Marc Meyer
    
    
      6
      11
      Mark Hamill
      2
      Harrison Ford
      2.0
      Carrie Fisher
      1
      Peter Cushing
      2
      Alec Guinness
      2
      106
      George Lucas
      2.0
      1
      Gary Kurtz
      2
      none
      Marcia Lucas
    
    
      7
      12
      Albert Brooks
      2
      Ellen DeGeneres
      1.0
      Alexander Gould
      2
      Willem Dafoe
      2
      Brad Garrett
      2
      24
      Andrew Stanton
      2.0
      1
      Graham Walters
      1
      Andrew Stanton
      David Ian Salter
    
    
      8
      13
      Tom Hanks
      2
      Robin Wright
      1.0
      Gary Sinise
      2
      Mykelti Williamson
      2
      Sally Field
      1
      67
      Robert Zemeckis
      2.0
      1
      Wendy Finerman
      3
      Eric Roth
      Arthur Schmidt
    
    
      9
      14
      Kevin Spacey
      2
      Annette Bening
      1.0
      Thora Birch
      1
      Wes Bentley
      2
      Mena Suvari
      1
      41
      Sam Mendes
      2.0
      1
      Bruce Cohen
      2
      Alan Ball
      Christopher Greenbury

Merge while taken into account duplicate columns



In [10]:

    
cols_to_use = all_movies_casting.columns.difference(all_movies_new.columns)
df = all_movies_new.merge(all_movies_casting, left_index=True, right_index=True, how='outer')
print('We have {} movies after merging'.format(len(df)))









    



We have 329044 movies after merging

2. Data cleaning

Remove columns with unimportant features, movies with NaN entries, zeros for the budget or the revenue. Non english films are also removed.



In [11]:

    
df.drop(['spoken_languages_number','production_countries_number', 'status', 'runtime', 'actor_number', 'actor5_gender',
         'actor4_gender', 'actor3_gender', 'actor2_gender', 'actor1_gender', 'director_number', 'producer_number',
         'director_gender', 'original_title', 'popularity', 'production_countries', 'spoken_languages', 'vote_count',
         'vote_average', 'producer_name', 'screeplay_name', 'editor_name','production_companies_number'],
         axis=1, inplace = True)

df = df.drop(df[df.budget < 1000].index)
df = df.drop(df[df.revenue == 0].index)
df = df.drop(df[df.original_language != 'en'].index)
df = df.drop(df[df.director_name == 'none'].index)
df = df.drop(df[df.production_companies == 'none'].index)
df = df.drop(df[df['imdb_id'].isnull()].index)
df = df.drop(df[df['genres'].isnull()].index)

df['imdb_id'] = df['imdb_id'].str.replace('tt', '') 
df.drop(['id_y'],axis=1, inplace = True)
df.rename(columns={'id_x': 'id'}, inplace=True)
df.drop(['original_language'],axis=1, inplace = True)
df['release_date'] = pd.to_datetime(df['release_date'])

print('There are {} movies left in the dataframe'.format(len(df)))









    



There are 4625 movies left in the dataframe

Retain only the movies between the year 2000 and 2017 (not included since we consider that the revenue of some of them are not yet fixed)



In [12]:

    
print('There are {} movies before 2000'.format(len(df[df.release_date <= '1999-12-31'])))
print('There are {} movies after 2016'.format(len(df[df.release_date >= '2017-01-01'])))
print('{} movies would be dropped'. format(len(df[df.release_date <= '1999-12-31']) + len(df[df.release_date >= '2017-06-01'])))









    



There are 1809 movies before 2000
There are 40 movies after 2016
1824 movies would be dropped



In [13]:

    
df = df.drop(df[df.release_date <= '1999-12-31'].index)
df = df.drop(df[df.release_date >= '2017-01-01'].index)
print('There are {} movies left in the dataset'.format(len(df)))









    



There are 2776 movies left in the dataset



In [14]:

    
df.head()









    Out[14]:







  
    
      
      id
      budget
      genres
      imdb_id
      overview
      production_companies
      release_date
      revenue
      tagline
      title
      actor1_name
      actor2_name
      actor3_name
      actor4_name
      actor5_name
      director_name
    
  
  
    
      7
      12
      94000000
      Animation|Family
      0266543
      Nemo, an adventurous young clownfish, is unexp...
      Pixar Animation Studios
      2003-05-30
      940335536
      There are 3.7 trillion fish in the ocean, they...
      Finding Nemo
      Albert Brooks
      Ellen DeGeneres
      Alexander Gould
      Willem Dafoe
      Brad Garrett
      Andrew Stanton
    
    
      11
      16
      12800000
      Drama|Crime|Music
      0168629
      Selma, a Czech immigrant on the verge of blind...
      Fine Line Features
      2000-05-17
      40031879
      You don't need eyes to see.
      Dancer in the Dark
      Björk
      Catherine Deneuve
      David Morse
      Peter Stormare
      Joel Grey
      Lars von Trier
    
    
      17
      22
      140000000
      Adventure|Fantasy|Action
      0325980
      Jack Sparrow, a freewheeling 17th-century pira...
      Walt Disney Pictures
      2003-09-07
      655011224
      Prepare to be blown out of the water.
      Pirates of the Caribbean: The Curse of the Bla...
      Johnny Depp
      Geoffrey Rush
      Orlando Bloom
      Keira Knightley
      Jack Davenport
      Gore Verbinski
    
    
      18
      24
      30000000
      Action|Crime
      0266697
      An assassin is shot at the altar by her ruthle...
      Miramax Films
      2003-10-10
      180949000
      Go for the kill.
      Kill Bill: Vol. 1
      Uma Thurman
      Lucy Liu
      Vivica A. Fox
      Daryl Hannah
      David Carradine
      Quentin Tarantino
    
    
      19
      25
      72000000
      Drama|War
      0418763
      Jarhead is a film about a US Marine Anthony Sw...
      Universal Pictures
      2005-04-11
      96889998
      Welcome to the suck.
      Jarhead
      Jamie Foxx
      Scott MacDonald
      none
      Lucas Black
      Peter Sarsgaard
      Sam Mendes



In [15]:

    
df.to_csv('Saved_Datasets/CleanDataset.csv', index=False)

3. Rearrange columns



In [3]:

    
df = pd.read_csv('Saved_Datasets/CleanDataset.csv')
len(df) #verification









    Out[3]:





2621

3.1 Give an ID for each actor / director



In [4]:

    
#generate dictionnaries assigning ids for each actor/director

actor_dict = {}
director_dict = {}

actors = pd.concat([df['actor1_name'], df['actor2_name'], df['actor3_name'], df['actor4_name'], df['actor5_name']])
actors = actors[actors != 'none']
actors = actors.unique()

directors = df['director_name'].unique()

for i in range(0,len(actors)):
    actor_dict[actors[i]] = i

for i in range(0,len(directors)):
    director_dict[directors[i]] = i



In [5]:

    
#function to create list of actor for each movie (and ignore 'none')
def merge_actors(actor_list,mode='name',actor_dict={}):
    ''' mode "name" : (default) return same list as actor_list, but without "none"
        mode "id"   :           return id of actor. id dictionnary must be provided in actor_dict.
    '''
    
    id_l = []
    for actor in actor_list:
        if actor != 'none':
            if (mode == 'id' and len(actor_dict)>0):
                id_l.append(actor_dict[actor])
            else:
                id_l.append(actor)
                
    return id_l



In [6]:

    
#add list of actor's id to each movie in dataset
df['director_id'] = df.apply(lambda row: director_dict[row['director_name']], axis=1)
df['actor_names'] = df.apply(lambda row: merge_actors([row['actor1_name'], row['actor2_name'],row['actor3_name'],
                                                          row['actor4_name'], row['actor5_name']],'name'), axis=1)
df['actor_ids']   = df.apply(lambda row: merge_actors([row['actor1_name'], row['actor2_name'],row['actor3_name'],
                                                          row['actor4_name'], row['actor5_name']],'id',actor_dict), axis=1)



In [7]:

    
#Remove actors columns
df.drop(['actor1_name','actor2_name','actor3_name','actor4_name','actor5_name'],axis=1, inplace = True)



In [8]:

    
#Remove movies without any actor
df = df[df['actor_ids'].str.len() != 0]

3.2 Add Metacritic ratings

Merge with csv created in Get_Metacritic_ratings



In [9]:

    
#load critics from external file
metacritic_ratings = pd.read_csv('Saved_Datasets/metacritic_ratings.csv')
len(metacritic_ratings)









    Out[9]:





2776



In [10]:

    
#merge dataframes
df = pd.merge(df,metacritic_ratings,on=['imdb_id','title'])



In [11]:

    
#Remove Movies without Metacritic rating
df = df.drop(df[df.Metacritic == 'Error'].index)

3.4 YouTube Views



In [3]:

    
#load views from external file
views = pd.read_csv('Saved_Datasets/YouTube_views.csv')
len(views)









    Out[3]:





2621



In [4]:

    
#merge dataframes
df = pd.merge(df,views,on=['imdb_id','title'])

3.5 Final Cleaning



In [ ]:

    
df.drop(['tagline','director_id','actor_ids'],axis=1, inplace = True)

3.6. Save CleanDataset



In [20]:

    
print(len(df))
df.head(50)









    



2621






    Out[20]:







  
    
      
      id
      budget
      genres
      imdb_id
      overview
      production_companies
      release_date
      revenue
      title
      director_name
      ...
      actors_ids
      actors_tenures
      total_tenure
      average_tenure
      Total_profitability_actors
      Metacritic
      YouTube_Mean
      Profitability
      ROI
      success
    
  
  
    
      0
      12
      94000000
      Animation|Family
      266543
      Nemo, an adventurous young clownfish, is unexp...
      Pixar Animation Studios
      2003-05-30
      940335536
      Finding Nemo
      Andrew Stanton
      ...
      [14, 5293, 12, 13, 18]
      [18, 24, 2, 28, 14]
      86
      17.20
      7310194071
      90
      0.218
      846335536
      2.639
      1
    
    
      1
      16
      12800000
      Drama|Crime|Music
      168629
      Selma, a Czech immigrant on the verge of blind...
      Fine Line Features
      2000-05-17
      40031879
      Dancer in the Dark
      Lars von Trier
      ...
      [6748, 47, 52, 50, 53]
      [49, 19, 21, 44, 15]
      148
      29.60
      294261790
      61
      Error
      27231879
      2.127
      1
    
    
      2
      22
      140000000
      Adventure|Fantasy|Action
      325980
      Jack Sparrow, a freewheeling 17th-century pira...
      Walt Disney Pictures
      2003-09-07
      655011224
      Pirates of the Caribbean: The Curse of the Bla...
      Gore Verbinski
      ...
      [1709, 116, 114, 118, 85]
      [7, 9, 7, 22, 20]
      65
      13.00
      15077223101
      63
      1.0
      515011224
      2.639
      1
    
    
      3
      24
      30000000
      Action|Crime
      266697
      An assassin is shot at the altar by her ruthle...
      Miramax Films
      2003-10-10
      180949000
      Kill Bill: Vol. 1
      Quentin Tarantino
      ...
      [140, 589, 2535, 139, 141]
      [12, 26, 15, 17, 39]
      109
      21.80
      1994329604
      69
      1.0
      150949000
      2.639
      1
    
    
      4
      25
      72000000
      Drama|War
      418763
      Jarhead is a film about a US Marine Anthony Sw...
      Universal Pictures
      2005-04-11
      96889998
      Jarhead
      Sam Mendes
      ...
      [133, 134, 1350483, 155]
      [11, 14, 0, 12]
      37
      9.25
      2367434946
      58
      1.0
      24889998
      0.346
      1
    
    
      5
      27
      1000000
      Drama|Music|Romance
      411705
      Matt, a young glaciologist, soars across the v...
      Revolution Films
      2004-07-16
      1574623
      9 Songs
      Michael Winterbottom
      ...
      [1357012, 177, 1759261, 1087657]
      [1, 1, 1, 18, 15]
      36
      7.20
      2298492
      43
      Error
      574623
      0.575
      1
    
    
      6
      35
      75000000
      Animation|Comedy|Family
      462538
      After Homer accidentally pollutes the town's w...
      Gracie Films
      2007-07-25
      527068851
      The Simpsons Movie
      David Silverman
      ...
      [199, 5587, 5586, 198, 200]
      [33, 18, 23, 22, 27]
      123
      24.60
      3427993239
      80
      Error
      452068851
      2.639
      1
    
    
      7
      38
      20000000
      Science Fiction|Drama|Romance
      338013
      Joel Barish, heartbroken that his girlfriend u...
      Anonymous Content
      2004-03-19
      72258126
      Eternal Sunshine of the Spotless Mind
      Michel Gondry
      ...
      [103, 206, 205, 109, 204]
      [11, 24, 16, 16, 11]
      78
      15.60
      10973717071
      89
      0.148
      52258126
      2.613
      1
    
    
      8
      58
      200000000
      Adventure|Fantasy|Action
      383574
      Captain Jack Sparrow works his way out of a bl...
      Walt Disney Pictures
      2006-06-20
      1065659812
      Pirates of the Caribbean: Dead Man's Chest
      Gore Verbinski
      ...
      [116, 114, 1640, 85, 2440]
      [12, 10, 35, 23, 27]
      107
      21.40
      18153570766
      53
      0.001
      865659812
      2.639
      1
    
    
      9
      59
      32000000
      Drama|Thriller|Crime
      399146
      An average family is thrust into the spotlight...
      New Line Cinema
      2005-09-23
      60740827
      A History of Violence
      David Cronenberg
      ...
      [110, 225, 49, 226, 227]
      [21, 9, 8, 1, 28]
      67
      13.40
      3223414068
      81
      0.255
      28740827
      0.898
      1
    
    
      10
      65
      41000000
      Drama
      298203
      The setting is Detroit in 1995. The city is di...
      Imagine Entertainment
      2002-08-11
      215000000
      8 Mile
      Curtis Hanson
      ...
      [326, 325, 335, 328, 327]
      [25, 4, 11, 10, 8]
      58
      11.60
      1844467360
      77
      0.082
      174000000
      2.639
      1
    
    
      11
      69
      28000000
      Drama|Music|Romance
      358273
      A chronicle of country music legend Johnny Cas...
      Tree Line Films
      2005-09-13
      186438883
      Walk the Line
      James Mangold
      ...
      [73421, 418, 368, 419, 417]
      [22, 20, 15, 1, 4]
      62
      12.40
      3773313959
      72
      Error
      158438883
      2.639
      1
    
    
      12
      70
      30000000
      Drama
      405159
      Despondent over a painful estrangement from hi...
      Lakeshore Entertainment
      2004-12-15
      216763646
      Million Dollar Baby
      Clint Eastwood
      ...
      [448, 190, 192, 449, 450]
      [13, 50, 32, 6, 1]
      102
      20.40
      3355708931
      86
      1.0
      186763646
      2.639
      1
    
    
      13
      71
      5000000
      Drama|Comedy|Music
      249462
      Set against the background of the 1984 Miner's...
      BBC Films
      2000-05-18
      110000000
      Billy Elliot
      Stephen Daldry
      ...
      [478, 477, 479, 480, 481]
      [1, 23, 1, 8, 28]
      61
      12.20
      1641383685
      74
      Error
      105000000
      2.639
      1
    
    
      14
      74
      132000000
      Adventure|Thriller|Science Fiction
      407304
      Ray Ferrier is a divorced dockworker and less-...
      Paramount Pictures
      2005-06-23
      591739379
      War of the Worlds
      Steven Spielberg
      ...
      [503, 502, 501, 500, 504]
      [2, 20, 5, 25, 23]
      75
      15.00
      6233708409
      73
      0.519
      459739379
      2.639
      1
    
    
      15
      77
      9000000
      Mystery|Thriller
      209144
      Suffering short-term memory loss after a head ...
      Summit Entertainment
      2000-11-10
      39723096
      Memento
      Christopher Nolan
      ...
      [529, 532, 530, 537, 534]
      [11, 22, 7, 25, 20]
      85
      17.00
      2654048997
      80
      0.006
      30723096
      2.639
      1
    
    
      16
      80
      2700000
      Drama|Romance
      381681
      Nine years ago two strangers met by chance and...
      Castle Rock Entertainment
      2004-10-02
      15992615
      Before Sunset
      Richard Linklater
      ...
      [649, 569, 651, 1146]
      [40, 20, 3, 21]
      84
      21.00
      466207939
      90
      1.0
      13292615
      2.639
      1
    
    
      17
      82
      135000000
      Action|Adventure|Crime|Thriller
      430357
      Miami Vice is a feature film based on the 1980...
      Universal Pictures
      2006-07-27
      163794509
      Miami Vice
      Michael Mann
      ...
      [16867, 2038, 134, 643, 72466]
      [9, 5, 15, 19, 12]
      60
      12.00
      4569270530
      65
      0.928
      28794509
      0.213
      1
    
    
      18
      83
      130000
      Drama|Thriller
      374102
      Two divers are left out at sea without a boat....
      Plunge Pictures LLC
      2004-06-08
      54667954
      Open Water
      Chris Kentis
      ...
      [1420204, 644, 1187, 590483, 646]
      [1, 7, 20, 1, 1]
      30
      6.00
      272689770
      0
      0.683
      54537954
      2.639
      1
    
    
      19
      98
      103000000
      Action|Drama|Adventure
      172495
      In the year 180, the death of emperor Marcus A...
      DreamWorks SKG
      2000-01-05
      457640427
      Gladiator
      Ridley Scott
      ...
      [73421, 935, 934, 194, 936]
      [17, 17, 11, 42, 41]
      128
      25.60
      3888338936
      67
      0.272
      354640427
      2.639
      1
    
    
      20
      107
      10000000
      Thriller|Crime
      208092
      The second film from British director Guy Ritc...
      Columbia Pictures Corporation
      2000-01-09
      83557872
      Snatch
      Guy Ritchie
      ...
      [287, 1121, 976, 980, 1117]
      [14, 13, 3, 11, 20]
      61
      12.20
      5118165411
      45
      0.049
      73557872
      2.639
      1
    
    
      21
      116
      15000000
      Drama|Thriller|Crime|Romance
      416320
      Match Point is Woody Allen’s satire of the Bri...
      DreamWorks
      2005-10-26
      85306374
      Match Point
      Woody Allen
      ...
      [1244, 1249, 1246, 1245, 1248]
      [10, 29, 11, 12, 38]
      100
      20.00
      8271690710
      72
      Error
      70306374
      2.639
      1
    
    
      22
      118
      150000000
      Adventure|Comedy|Family|Fantasy
      367594
      A young boy wins a tour through the most magni...
      Village Roadshow Pictures
      2005-07-13
      474968763
      Charlie and the Chocolate Factory
      Tim Burton
      ...
      [1286, 1285, 1282, 85, 1281]
      [1, 2, 42, 22, 5]
      72
      14.40
      6700463144
      72
      1.0
      324968763
      2.166
      1
    
    
      23
      120
      93000000
      Adventure|Fantasy|Action
      120737
      Young hobbit Frodo Baggins, after inheriting a...
      WingNut Films
      2001-12-18
      871368364
      The Lord of the Rings: The Fellowship of the Ring
      Peter Jackson
      ...
      [48, 114, 1327, 112, 109]
      [17, 5, 33, 8, 13]
      76
      15.20
      19390142196
      92
      Error
      778368364
      2.639
      1
    
    
      24
      121
      79000000
      Adventure|Fantasy|Action
      167261
      Frodo and Sam are trekking to Mordor to destro...
      WingNut Films
      2002-12-18
      926287400
      The Lord of the Rings: The Two Towers
      Peter Jackson
      ...
      [110, 114, 1327, 109, 882]
      [18, 6, 34, 14, 9]
      81
      16.20
      18248254694
      87
      1.0
      847287400
      2.639
      1
    
    
      25
      122
      94000000
      Adventure|Fantasy|Action
      167260
      Aragorn is revealed as the heir to the ancient...
      WingNut Films
      2003-01-12
      1118888979
      The Lord of the Rings: The Return of the King
      Peter Jackson
      ...
      [110, 114, 1327, 109, 882]
      [19, 7, 35, 15, 10]
      86
      17.20
      18248254694
      94
      0.34
      1024888979
      2.639
      1
    
    
      26
      134
      26000000
      Action|Adventure|Comedy
      190590
      In the deep south during the 1930s, three esca...
      Universal Pictures
      2000-08-30
      71000000
      O Brother, Where Art Thou?
      Joel Coen
      ...
      [1462, 1230, 18686, 1241, 1461]
      [9, 21, 20, 21, 18]
      89
      17.80
      5854038212
      69
      0.73
      45000000
      1.731
      1
    
    
      27
      141
      6000000
      Fantasy|Drama|Mystery
      246578
      After narrowly escaping a bizarre accident, a ...
      Pandora Cinema
      2001-01-18
      1270522
      Donnie Darko
      Richard Kelly
      ...
      [131, 69597, 20089, 723, 1579]
      [11, 22, 6, 23, 9]
      71
      14.20
      2440332869
      71
      0.12
      -4729478
      -0.788
      0
    
    
      28
      142
      14000000
      Drama|Romance
      388795
      Brokeback Mountain is an Ang Lee film about tw...
      River Road Entertainment
      2005-02-09
      178043761
      Brokeback Mountain
      Ang Lee
      ...
      [131, 1810, 1812, 1813, 1811]
      [15, 9, 12, 5, 35]
      76
      15.20
      7540424599
      87
      Error
      164043761
      2.639
      1
    
    
      29
      153
      4000000
      Drama
      335266
      Two lost souls visiting Tokyo -- the young, ne...
      American Zoetrope
      2003-08-31
      119723856
      Lost in Translation
      Sofia Coppola
      ...
      [1771, 1770, 1532, 1772, 1245]
      [16, 1, 28, 8, 10]
      63
      12.60
      9452080296
      89
      0.8
      115723856
      2.639
      1
    
    
      30
      155
      185000000
      Drama|Action|Crime|Thriller
      468569
      Batman raises the stakes in his war on crime. ...
      DC Comics
      2008-07-16
      1004558444
      The Dark Knight
      Christopher Nolan
      ...
      [64, 1810, 3895, 3894, 6383]
      [27, 12, 53, 23, 17]
      132
      26.40
      13471550057
      82
      0.444
      819558444
      2.639
      1
    
    
      31
      161
      85000000
      Thriller|Crime
      240772
      Less than 24 hours into his parole, charismati...
      Village Roadshow Pictures
      2001-07-12
      450717150
      Ocean's Eleven
      Steven Soderbergh
      ...
      [1271, 287, 1204, 1892, 1461]
      [19, 15, 15, 14, 19]
      82
      16.40
      11981835752
      74
      1.0
      365717150
      2.639
      1
    
    
      32
      163
      110000000
      Thriller|Crime
      349903
      Danny Ocean reunites with his old flame and th...
      Village Roadshow Pictures
      2004-09-12
      362744280
      Ocean's Twelve
      Steven Soderbergh
      ...
      [1271, 287, 1204, 1922, 1461]
      [22, 18, 18, 15, 22]
      95
      19.00
      9465243532
      58
      1.0
      252744280
      2.298
      1
    
    
      33
      167
      48000000
      Drama|Science Fiction
      272152
      Prot is a patient at a mental hospital who cla...
      Intermedia Films
      2001-10-22
      50315140
      K-PAX
      Iain Softley
      ...
      [1980, 1229, 1979, 1982, 1981]
      [65, 51, 16, 16, 24]
      172
      34.40
      2075385244
      49
      0.268
      2315140
      0.048
      1
    
    
      34
      170
      5000000
      Horror|Thriller|Science Fiction
      289043
      Twenty-eight days after a killer virus was acc...
      DNA Films
      2002-10-31
      82719885
      28 Days Later
      Danny Boyle
      ...
      [2038, 2054, 2050, 2052, 2037]
      [1, 0, 11, 13, 5]
      30
      6.00
      1537918992
      73
      1.0
      77719885
      2.639
      1
    
    
      35
      176
      1200000
      Horror|Mystery|Crime
      387564
      Obsessed with teaching his victims the value o...
      Lions Gate Films
      2004-01-10
      103911669
      Saw
      James Wan
      ...
      [2047, 2131, 2140, 2136, 2130]
      [26, 10, 9, 15, 26]
      86
      17.20
      875698824
      46
      0.476
      102711669
      2.639
      1
    
    
      36
      179
      80000000
      Crime|Thriller
      373926
      After Silvia Broome, an interpreter at United ...
      Universal Pictures
      2005-08-04
      162944923
      The Interpreter
      Sydney Pollack
      ...
      [2227, 2245, 2229, 2228, 2244]
      [23, 17, 17, 25, 30]
      112
      22.40
      2651124728
      62
      0.365
      82944923
      1.037
      1
    
    
      37
      180
      102000000
      Action|Thriller|Science Fiction|Mystery
      181689
      John Anderton is a top 'Precrime' cop in the l...
      DreamWorks SKG
      2002-06-20
      358372926
      Minority Report
      Steven Spielberg
      ...
      [2206, 2201, 2207, 72466, 500]
      [7, 54, 48, 8, 22]
      139
      27.80
      6472905683
      80
      1.0
      256372926
      2.513
      1
    
    
      38
      182
      32000000
      War|Crime|Drama|Mystery|Romance|Thriller
      452624
      An American journalist played by George Cloone...
      Warner Bros.
      2006-08-12
      5914908
      The Good German
      Steven Soderbergh
      ...
      [2219, 112, 2221, 2220, 1461]
      [18, 13, 12, 21, 24]
      88
      17.60
      8040918411
      49
      Error
      -26085092
      -0.815
      0
    
    
      39
      186
      27000000
      Drama|Thriller|Crime|Mystery
      425210
      Slevin is mistakenly put in the middle of a pe...
      The Weinstein Company
      2006-02-24
      56308881
      Lucky Number Slevin
      Paul McGuigan
      ...
      [140, 2282, 2299, 62, 192]
      [15, 34, 9, 27, 34]
      119
      23.80
      7860410464
      53
      0.368
      29308881
      1.086
      1
    
    
      40
      187
      40000000
      Action|Thriller|Crime
      401792
      Welcome to Sin City. This town beckons to the ...
      Dimension Films
      2005-03-31
      158733820
      Sin City
      Robert Rodriguez
      ...
      [56731, 5916, 6278, 6280, 6279]
      [12, 11, 3, 26, 8]
      60
      12.00
      1347402319
      46
      0.8
      118733820
      2.639
      1
    
    
      41
      189
      65000000
      Crime|Thriller
      458481
      Some of Sin City's most hard-boiled citizens c...
      Miramax Films
      2014-08-20
      39407616
      Sin City: A Dame to Kill For
      Robert Rodriguez
      ...
      [56731, 5916, 2295, 16851, 24045]
      [21, 20, 36, 30, 27]
      134
      26.80
      3512611332
      46
      0.383
      -25592384
      -0.394
      0
    
    
      42
      201
      60000000
      Science Fiction|Action|Adventure|Thriller
      253754
      En route to the honeymoon of William Riker to ...
      Paramount Pictures
      2002-12-13
      67312826
      Star Trek: Nemesis
      Stuart Baird
      ...
      [2390, 2387, 2388, 2392, 1213786]
      [26, 30, 23, 19, 21]
      119
      23.80
      1118894447
      51
      Error
      7312826
      0.122
      1
    
    
      43
      205
      17500000
      Drama|History|War
      395169
      Inspired by true events, this film takes place...
      Lions Gate Films
      2004-11-09
      38000000
      Hotel Rwanda
      Terry George
      ...
      [2598, 1733, 2607, 73421, 1896]
      [14, 32, 11, 21, 20]
      98
      19.60
      3251345729
      79
      0.404
      20500000
      1.171
      1
    
    
      44
      214
      10000000
      Horror|Thriller|Crime
      489270
      Jigsaw has disappeared. Along with his new app...
      Lions Gate Films
      2006-10-27
      163876815
      Saw III
      Darren Lynn Bousman
      ...
      [2677, 2138, 2464, 2144, 2133]
      [3, 22, 15, 26, 12]
      78
      15.60
      1339628004
      48
      0.671
      153876815
      2.639
      1
    
    
      45
      215
      4000000
      Horror
      432348
      When a new murder victim is discovered with al...
      Lions Gate Films
      2005-10-28
      152925093
      Saw II
      Darren Lynn Bousman
      ...
      [2682, 2138, 2680, 2683, 2144]
      [4, 21, 10, 18, 25]
      78
      15.60
      1346302158
      48
      1.0
      148925093
      2.639
      1
    
    
      46
      217
      185000000
      Adventure|Action
      367882
      Set during the Cold War, the Soviets – led by ...
      Lucasfilm
      2008-05-21
      786636033
      Indiana Jones and the Kingdom of the Crystal S...
      Steven Spielberg
      ...
      [650, 112, 5538, 3, 10959]
      [31, 15, 33, 43, 11]
      133
      26.60
      10537417498
      65
      Error
      601636033
      2.639
      1
    
    
      47
      231
      50000000
      Drama|Thriller
      365737
      The Middle Eastern oil industry is the backdro...
      Section Eight
      2005-11-23
      94000000
      Syriana
      Stephen Gaghan
      ...
      [2954, 2955, 2956, 1892, 1461]
      [16, 19, 11, 18, 23]
      87
      17.40
      8006815262
      76
      0.469
      44000000
      0.880
      1
    
    
      48
      237
      6400000
      Drama|Thriller|Crime|Romance
      289635
      A young drifter working on a river barge disru...
      Recorded Picture Company (RPC)
      2003-09-26
      2500000
      Young Adam
      David Mackenzie
      ...
      [3061, 3063, 3065, 1246, 3064]
      [11, 18, 1, 9, 16]
      55
      11.00
      3188011025
      67
      Error
      -3900000
      -0.609
      0
    
    
      49
      243
      30000000
      Comedy|Drama|Romance|Music
      146882
      When record store owner Rob Gordon gets dumped...
      Buena Vista
      2000-03-17
      47126295
      High Fidelity
      Stephen Frears
      ...
      [70851, 1562, 3230, 3036, 3232]
      [9, 5, 9, 18, 18]
      59
      11.80
      3796611818
      79
      Error
      17126295
      0.571
      1
    
  

50 rows × 21 columns



In [7]:

    
df.to_csv('Saved_Datasets/CleanDataset.csv', encoding='utf-8', index=False)

4. Add New Features



In [2]:

    
df = pd.read_csv('Saved_Datasets/CleanDataset.csv')
len(df) #verification









    Out[2]:





2621

4.1 Compute ROI



In [3]:

    
df['ROI'] = df.apply(lambda row: np.round((row['revenue']-row['budget'])/row['budget'],3), axis=1)

Saturate rows with ROI too high: saturates the 25% highest ROIs



In [4]:

    
quartile = np.round(np.percentile(df.ROI,75),3)
print("Quartile = "+str(quartile))

df.loc[df.ROI>quartile,'ROI'] = quartile









    



Quartile = 2.639



In [5]:

    
plt.hist(df.ROI,bins='auto');

4.2 Compute Success

Add "success" column: a movie is successful if its ROI belongs to the top 75%



In [4]:

    
def classify_success(roi):
    #if (roi >= np.round(np.percentile(df.ROI,75),3)): return 1
    #if (roi >= np.round(np.percentile(df.ROI,50),3)): return 3
    if (roi >= np.round(np.percentile(df.ROI,25),3)): return 1
    else: return 0



In [5]:

    
df['success'] = df.apply(lambda row: classify_success(row['ROI']), axis=1)

4.3 Associate actors tmdb ids to movies of the dataset



In [9]:

    
Actors = pd.read_csv('Saved_Datasets/Actorsv4Dataset.csv')



In [10]:

    
#Assign actors TMDb ids to a movie in the dataset
def one_movies_actor_id(movies):
    actor_index = [i for i, item in enumerate(Actors['Name']) if item in df['actor_names'][movies]]
    #print(index_movie)
    one_movie_actor_id = []
    for i in actor_index:
        one_movie_actor_id.append(Actors['tmdb_id'][i])
    #print(one_actor_movie_date)    
    return one_movie_actor_id #ATTENTION ORDRE ACTTEUR MOVIE PAS GARDE



In [11]:

    
#Apply actors TMDb ids to the whole dataset
movies_actors_ids = list(map(one_movies_actor_id,np.arange(len(df))))



In [12]:

    
idx = df.columns.get_loc('actor_names')
df.insert(idx+1,'actors_ids',movies_actors_ids)

4.4 Associate actors tenures to movies of the dataset



In [13]:

    
#Forming data in column actor_names
df['actor_names']=df['actor_names'].str.replace('"','')
df['actor_names'] = df['actor_names'].str.replace("'",'')
df['actor_names'] = df['actor_names'].str.replace(", ",',')



In [14]:

    
#Convert string in dataset['actor_names'] into list
df['actor_names'] = df.actor_names.apply(lambda x: x[1:-1].split(","))



In [15]:

    
Actors['date'] = Actors['date'].str.replace("'",'')



In [16]:

    
Actors['date'] = Actors.date.apply(lambda x: x[1:-1].split(","))



In [17]:

    
def one_actor_tenure_movie(movies):
    actor_index = [i for i, item in enumerate(Actors['Name']) if item in df['actor_names'][movies]]
    tenure=[]
    for i in actor_index:
        if Actors['date'][i][0] !='':
            ten = int(df['release_date'][movies][0:4])-int(Actors['date'][i][0])+1
            #print(ten)
            if (ten <0):
                ten = 0
            if (ten > 72):
                ten = 72
            tenure.append (ten)
        else:
            tenure.append(0) 
    return tenure



In [18]:

    
movie_tenures = list(map(one_actor_tenure_movie,np.arange(len(df))))



In [19]:

    
idx = df.columns.get_loc('actors_ids')
df.insert(idx+1,'actors_tenures',movie_tenures)

4.5 Compute total tenures and averaged tenures of movies in the dataset



In [20]:

    
#Compute total tenure of each movie
movie_tenure_sum = []
for i in range(0,len(df)):
    movie_tenure_sum.append(sum(df['actors_tenures'][i]))



In [21]:

    
idx = df.columns.get_loc('actors_tenures')
df.insert(idx+1,'total_tenure',movie_tenure_sum)



In [22]:

    
#Compute average tenure of each movie
movie_tenure_average = []
for i in range(0,len(df)):
    #print(movie_tenures[i])
    if df['actors_tenures'][i] != []:
        movie_tenure_average.append(stat.mean(df['actors_tenures'][i]))
    else:
        movie_tenure_average.append(0)



In [23]:

    
idx = df.columns.get_loc('total_tenure')
df.insert(idx+1,'average_tenure',movie_tenure_average)

4.6 Compute Profitability



In [24]:

    
prof = df['revenue']-df['budget']



In [25]:

    
idx = df.columns.get_loc('Metacritic')
df.insert(idx+1,'Profitability',prof)

4.7 Compute Profitability of Actors in movies



In [26]:

    
### Compute actor profitabtility to one movie of the dataset
def one_movies_actor_prof(movies):
    #print(movies_actors_ids[movies])
    #print(len(Actors['tmdb_id']))
    #print(Act_prof['actors_ids'])
    actor_index = [i for i, item in enumerate(Actors['tmdb_id']) if item in movies_actors_ids[movies]]
    #print(actor_index)
    one_movie_actor_prof = []
    for i in actor_index:
        #print(i)
        one_movie_actor_prof.append(Actors['Profitability'][i])
    prof = sum(one_movie_actor_prof)  
    return prof



In [27]:

    
test = one_movies_actor_prof(0)
print(test)

7310194071



In [28]:

    
movies_actor_prof = list(map(one_movies_actor_prof,np.arange(len(df))))



In [29]:

    
idx = df.columns.get_loc('average_tenure')
df.insert(idx+1,'Total_profitability_actors',movies_actor_prof)

5. Save NewFeatures Dataset



In [21]:

    
df.head(40)









    Out[21]:







  
    
      
      id
      budget
      genres
      imdb_id
      overview
      production_companies
      release_date
      revenue
      title
      director_name
      ...
      actors_ids
      actors_tenures
      total_tenure
      average_tenure
      Total_profitability_actors
      Metacritic
      YouTube_Mean
      Profitability
      ROI
      success
    
  
  
    
      0
      12
      94000000
      Animation|Family
      266543
      Nemo, an adventurous young clownfish, is unexp...
      Pixar Animation Studios
      2003-05-30
      940335536
      Finding Nemo
      Andrew Stanton
      ...
      [14, 5293, 12, 13, 18]
      [18, 24, 2, 28, 14]
      86
      17.20
      7310194071
      90
      0.218
      846335536
      2.639
      1
    
    
      1
      16
      12800000
      Drama|Crime|Music
      168629
      Selma, a Czech immigrant on the verge of blind...
      Fine Line Features
      2000-05-17
      40031879
      Dancer in the Dark
      Lars von Trier
      ...
      [6748, 47, 52, 50, 53]
      [49, 19, 21, 44, 15]
      148
      29.60
      294261790
      61
      Error
      27231879
      2.127
      1
    
    
      2
      22
      140000000
      Adventure|Fantasy|Action
      325980
      Jack Sparrow, a freewheeling 17th-century pira...
      Walt Disney Pictures
      2003-09-07
      655011224
      Pirates of the Caribbean: The Curse of the Bla...
      Gore Verbinski
      ...
      [1709, 116, 114, 118, 85]
      [7, 9, 7, 22, 20]
      65
      13.00
      15077223101
      63
      1.0
      515011224
      2.639
      1
    
    
      3
      24
      30000000
      Action|Crime
      266697
      An assassin is shot at the altar by her ruthle...
      Miramax Films
      2003-10-10
      180949000
      Kill Bill: Vol. 1
      Quentin Tarantino
      ...
      [140, 589, 2535, 139, 141]
      [12, 26, 15, 17, 39]
      109
      21.80
      1994329604
      69
      1.0
      150949000
      2.639
      1
    
    
      4
      25
      72000000
      Drama|War
      418763
      Jarhead is a film about a US Marine Anthony Sw...
      Universal Pictures
      2005-04-11
      96889998
      Jarhead
      Sam Mendes
      ...
      [133, 134, 1350483, 155]
      [11, 14, 0, 12]
      37
      9.25
      2367434946
      58
      1.0
      24889998
      0.346
      1
    
    
      5
      27
      1000000
      Drama|Music|Romance
      411705
      Matt, a young glaciologist, soars across the v...
      Revolution Films
      2004-07-16
      1574623
      9 Songs
      Michael Winterbottom
      ...
      [1357012, 177, 1759261, 1087657]
      [1, 1, 1, 18, 15]
      36
      7.20
      2298492
      43
      Error
      574623
      0.575
      1
    
    
      6
      35
      75000000
      Animation|Comedy|Family
      462538
      After Homer accidentally pollutes the town's w...
      Gracie Films
      2007-07-25
      527068851
      The Simpsons Movie
      David Silverman
      ...
      [199, 5587, 5586, 198, 200]
      [33, 18, 23, 22, 27]
      123
      24.60
      3427993239
      80
      Error
      452068851
      2.639
      1
    
    
      7
      38
      20000000
      Science Fiction|Drama|Romance
      338013
      Joel Barish, heartbroken that his girlfriend u...
      Anonymous Content
      2004-03-19
      72258126
      Eternal Sunshine of the Spotless Mind
      Michel Gondry
      ...
      [103, 206, 205, 109, 204]
      [11, 24, 16, 16, 11]
      78
      15.60
      10973717071
      89
      0.148
      52258126
      2.613
      1
    
    
      8
      58
      200000000
      Adventure|Fantasy|Action
      383574
      Captain Jack Sparrow works his way out of a bl...
      Walt Disney Pictures
      2006-06-20
      1065659812
      Pirates of the Caribbean: Dead Man's Chest
      Gore Verbinski
      ...
      [116, 114, 1640, 85, 2440]
      [12, 10, 35, 23, 27]
      107
      21.40
      18153570766
      53
      0.001
      865659812
      2.639
      1
    
    
      9
      59
      32000000
      Drama|Thriller|Crime
      399146
      An average family is thrust into the spotlight...
      New Line Cinema
      2005-09-23
      60740827
      A History of Violence
      David Cronenberg
      ...
      [110, 225, 49, 226, 227]
      [21, 9, 8, 1, 28]
      67
      13.40
      3223414068
      81
      0.255
      28740827
      0.898
      1
    
    
      10
      65
      41000000
      Drama
      298203
      The setting is Detroit in 1995. The city is di...
      Imagine Entertainment
      2002-08-11
      215000000
      8 Mile
      Curtis Hanson
      ...
      [326, 325, 335, 328, 327]
      [25, 4, 11, 10, 8]
      58
      11.60
      1844467360
      77
      0.082
      174000000
      2.639
      1
    
    
      11
      69
      28000000
      Drama|Music|Romance
      358273
      A chronicle of country music legend Johnny Cas...
      Tree Line Films
      2005-09-13
      186438883
      Walk the Line
      James Mangold
      ...
      [73421, 418, 368, 419, 417]
      [22, 20, 15, 1, 4]
      62
      12.40
      3773313959
      72
      Error
      158438883
      2.639
      1
    
    
      12
      70
      30000000
      Drama
      405159
      Despondent over a painful estrangement from hi...
      Lakeshore Entertainment
      2004-12-15
      216763646
      Million Dollar Baby
      Clint Eastwood
      ...
      [448, 190, 192, 449, 450]
      [13, 50, 32, 6, 1]
      102
      20.40
      3355708931
      86
      1.0
      186763646
      2.639
      1
    
    
      13
      71
      5000000
      Drama|Comedy|Music
      249462
      Set against the background of the 1984 Miner's...
      BBC Films
      2000-05-18
      110000000
      Billy Elliot
      Stephen Daldry
      ...
      [478, 477, 479, 480, 481]
      [1, 23, 1, 8, 28]
      61
      12.20
      1641383685
      74
      Error
      105000000
      2.639
      1
    
    
      14
      74
      132000000
      Adventure|Thriller|Science Fiction
      407304
      Ray Ferrier is a divorced dockworker and less-...
      Paramount Pictures
      2005-06-23
      591739379
      War of the Worlds
      Steven Spielberg
      ...
      [503, 502, 501, 500, 504]
      [2, 20, 5, 25, 23]
      75
      15.00
      6233708409
      73
      0.519
      459739379
      2.639
      1
    
    
      15
      77
      9000000
      Mystery|Thriller
      209144
      Suffering short-term memory loss after a head ...
      Summit Entertainment
      2000-11-10
      39723096
      Memento
      Christopher Nolan
      ...
      [529, 532, 530, 537, 534]
      [11, 22, 7, 25, 20]
      85
      17.00
      2654048997
      80
      0.006
      30723096
      2.639
      1
    
    
      16
      80
      2700000
      Drama|Romance
      381681
      Nine years ago two strangers met by chance and...
      Castle Rock Entertainment
      2004-10-02
      15992615
      Before Sunset
      Richard Linklater
      ...
      [649, 569, 651, 1146]
      [40, 20, 3, 21]
      84
      21.00
      466207939
      90
      1.0
      13292615
      2.639
      1
    
    
      17
      82
      135000000
      Action|Adventure|Crime|Thriller
      430357
      Miami Vice is a feature film based on the 1980...
      Universal Pictures
      2006-07-27
      163794509
      Miami Vice
      Michael Mann
      ...
      [16867, 2038, 134, 643, 72466]
      [9, 5, 15, 19, 12]
      60
      12.00
      4569270530
      65
      0.928
      28794509
      0.213
      1
    
    
      18
      83
      130000
      Drama|Thriller
      374102
      Two divers are left out at sea without a boat....
      Plunge Pictures LLC
      2004-06-08
      54667954
      Open Water
      Chris Kentis
      ...
      [1420204, 644, 1187, 590483, 646]
      [1, 7, 20, 1, 1]
      30
      6.00
      272689770
      0
      0.683
      54537954
      2.639
      1
    
    
      19
      98
      103000000
      Action|Drama|Adventure
      172495
      In the year 180, the death of emperor Marcus A...
      DreamWorks SKG
      2000-01-05
      457640427
      Gladiator
      Ridley Scott
      ...
      [73421, 935, 934, 194, 936]
      [17, 17, 11, 42, 41]
      128
      25.60
      3888338936
      67
      0.272
      354640427
      2.639
      1
    
    
      20
      107
      10000000
      Thriller|Crime
      208092
      The second film from British director Guy Ritc...
      Columbia Pictures Corporation
      2000-01-09
      83557872
      Snatch
      Guy Ritchie
      ...
      [287, 1121, 976, 980, 1117]
      [14, 13, 3, 11, 20]
      61
      12.20
      5118165411
      45
      0.049
      73557872
      2.639
      1
    
    
      21
      116
      15000000
      Drama|Thriller|Crime|Romance
      416320
      Match Point is Woody Allen’s satire of the Bri...
      DreamWorks
      2005-10-26
      85306374
      Match Point
      Woody Allen
      ...
      [1244, 1249, 1246, 1245, 1248]
      [10, 29, 11, 12, 38]
      100
      20.00
      8271690710
      72
      Error
      70306374
      2.639
      1
    
    
      22
      118
      150000000
      Adventure|Comedy|Family|Fantasy
      367594
      A young boy wins a tour through the most magni...
      Village Roadshow Pictures
      2005-07-13
      474968763
      Charlie and the Chocolate Factory
      Tim Burton
      ...
      [1286, 1285, 1282, 85, 1281]
      [1, 2, 42, 22, 5]
      72
      14.40
      6700463144
      72
      1.0
      324968763
      2.166
      1
    
    
      23
      120
      93000000
      Adventure|Fantasy|Action
      120737
      Young hobbit Frodo Baggins, after inheriting a...
      WingNut Films
      2001-12-18
      871368364
      The Lord of the Rings: The Fellowship of the Ring
      Peter Jackson
      ...
      [48, 114, 1327, 112, 109]
      [17, 5, 33, 8, 13]
      76
      15.20
      19390142196
      92
      Error
      778368364
      2.639
      1
    
    
      24
      121
      79000000
      Adventure|Fantasy|Action
      167261
      Frodo and Sam are trekking to Mordor to destro...
      WingNut Films
      2002-12-18
      926287400
      The Lord of the Rings: The Two Towers
      Peter Jackson
      ...
      [110, 114, 1327, 109, 882]
      [18, 6, 34, 14, 9]
      81
      16.20
      18248254694
      87
      1.0
      847287400
      2.639
      1
    
    
      25
      122
      94000000
      Adventure|Fantasy|Action
      167260
      Aragorn is revealed as the heir to the ancient...
      WingNut Films
      2003-01-12
      1118888979
      The Lord of the Rings: The Return of the King
      Peter Jackson
      ...
      [110, 114, 1327, 109, 882]
      [19, 7, 35, 15, 10]
      86
      17.20
      18248254694
      94
      0.34
      1024888979
      2.639
      1
    
    
      26
      134
      26000000
      Action|Adventure|Comedy
      190590
      In the deep south during the 1930s, three esca...
      Universal Pictures
      2000-08-30
      71000000
      O Brother, Where Art Thou?
      Joel Coen
      ...
      [1462, 1230, 18686, 1241, 1461]
      [9, 21, 20, 21, 18]
      89
      17.80
      5854038212
      69
      0.73
      45000000
      1.731
      1
    
    
      27
      141
      6000000
      Fantasy|Drama|Mystery
      246578
      After narrowly escaping a bizarre accident, a ...
      Pandora Cinema
      2001-01-18
      1270522
      Donnie Darko
      Richard Kelly
      ...
      [131, 69597, 20089, 723, 1579]
      [11, 22, 6, 23, 9]
      71
      14.20
      2440332869
      71
      0.12
      -4729478
      -0.788
      0
    
    
      28
      142
      14000000
      Drama|Romance
      388795
      Brokeback Mountain is an Ang Lee film about tw...
      River Road Entertainment
      2005-02-09
      178043761
      Brokeback Mountain
      Ang Lee
      ...
      [131, 1810, 1812, 1813, 1811]
      [15, 9, 12, 5, 35]
      76
      15.20
      7540424599
      87
      Error
      164043761
      2.639
      1
    
    
      29
      153
      4000000
      Drama
      335266
      Two lost souls visiting Tokyo -- the young, ne...
      American Zoetrope
      2003-08-31
      119723856
      Lost in Translation
      Sofia Coppola
      ...
      [1771, 1770, 1532, 1772, 1245]
      [16, 1, 28, 8, 10]
      63
      12.60
      9452080296
      89
      0.8
      115723856
      2.639
      1
    
    
      30
      155
      185000000
      Drama|Action|Crime|Thriller
      468569
      Batman raises the stakes in his war on crime. ...
      DC Comics
      2008-07-16
      1004558444
      The Dark Knight
      Christopher Nolan
      ...
      [64, 1810, 3895, 3894, 6383]
      [27, 12, 53, 23, 17]
      132
      26.40
      13471550057
      82
      0.444
      819558444
      2.639
      1
    
    
      31
      161
      85000000
      Thriller|Crime
      240772
      Less than 24 hours into his parole, charismati...
      Village Roadshow Pictures
      2001-07-12
      450717150
      Ocean's Eleven
      Steven Soderbergh
      ...
      [1271, 287, 1204, 1892, 1461]
      [19, 15, 15, 14, 19]
      82
      16.40
      11981835752
      74
      1.0
      365717150
      2.639
      1
    
    
      32
      163
      110000000
      Thriller|Crime
      349903
      Danny Ocean reunites with his old flame and th...
      Village Roadshow Pictures
      2004-09-12
      362744280
      Ocean's Twelve
      Steven Soderbergh
      ...
      [1271, 287, 1204, 1922, 1461]
      [22, 18, 18, 15, 22]
      95
      19.00
      9465243532
      58
      1.0
      252744280
      2.298
      1
    
    
      33
      167
      48000000
      Drama|Science Fiction
      272152
      Prot is a patient at a mental hospital who cla...
      Intermedia Films
      2001-10-22
      50315140
      K-PAX
      Iain Softley
      ...
      [1980, 1229, 1979, 1982, 1981]
      [65, 51, 16, 16, 24]
      172
      34.40
      2075385244
      49
      0.268
      2315140
      0.048
      1
    
    
      34
      170
      5000000
      Horror|Thriller|Science Fiction
      289043
      Twenty-eight days after a killer virus was acc...
      DNA Films
      2002-10-31
      82719885
      28 Days Later
      Danny Boyle
      ...
      [2038, 2054, 2050, 2052, 2037]
      [1, 0, 11, 13, 5]
      30
      6.00
      1537918992
      73
      1.0
      77719885
      2.639
      1
    
    
      35
      176
      1200000
      Horror|Mystery|Crime
      387564
      Obsessed with teaching his victims the value o...
      Lions Gate Films
      2004-01-10
      103911669
      Saw
      James Wan
      ...
      [2047, 2131, 2140, 2136, 2130]
      [26, 10, 9, 15, 26]
      86
      17.20
      875698824
      46
      0.476
      102711669
      2.639
      1
    
    
      36
      179
      80000000
      Crime|Thriller
      373926
      After Silvia Broome, an interpreter at United ...
      Universal Pictures
      2005-08-04
      162944923
      The Interpreter
      Sydney Pollack
      ...
      [2227, 2245, 2229, 2228, 2244]
      [23, 17, 17, 25, 30]
      112
      22.40
      2651124728
      62
      0.365
      82944923
      1.037
      1
    
    
      37
      180
      102000000
      Action|Thriller|Science Fiction|Mystery
      181689
      John Anderton is a top 'Precrime' cop in the l...
      DreamWorks SKG
      2002-06-20
      358372926
      Minority Report
      Steven Spielberg
      ...
      [2206, 2201, 2207, 72466, 500]
      [7, 54, 48, 8, 22]
      139
      27.80
      6472905683
      80
      1.0
      256372926
      2.513
      1
    
    
      38
      182
      32000000
      War|Crime|Drama|Mystery|Romance|Thriller
      452624
      An American journalist played by George Cloone...
      Warner Bros.
      2006-08-12
      5914908
      The Good German
      Steven Soderbergh
      ...
      [2219, 112, 2221, 2220, 1461]
      [18, 13, 12, 21, 24]
      88
      17.60
      8040918411
      49
      Error
      -26085092
      -0.815
      0
    
    
      39
      186
      27000000
      Drama|Thriller|Crime|Mystery
      425210
      Slevin is mistakenly put in the middle of a pe...
      The Weinstein Company
      2006-02-24
      56308881
      Lucky Number Slevin
      Paul McGuigan
      ...
      [140, 2282, 2299, 62, 192]
      [15, 34, 9, 27, 34]
      119
      23.80
      7860410464
      53
      0.368
      29308881
      1.086
      1
    
  

40 rows × 21 columns



In [22]:

    
df.to_csv('Saved_Datasets/NewFeaturesDataset.csv', encoding='utf-8', index=False)



In [ ]:

	id	actor1_name	actor1_gender	actor2_name	actor2_gender	actor3_name	actor3_gender	actor4_name	actor4_gender	actor5_name	actor5_gender	actor_number	director_name	director_gender	director_number	producer_name	producer_number	screeplay_name	editor_name
0	2	Turo Pajala	0	Susanna Haavisto	0.0	Matti Pellonpää	2	Eetu Hilkamo	0	none	0	4	Aki Kaurismäki	0.0	1	none	0	Aki Kaurismäki	Raija Talvio
1	3	Matti Pellonpää	2	Kati Outinen	1.0	Sakari Kuosmanen	2	Esko Nikkari	2	Kylli Köngäs	0	7	Aki Kaurismäki	0.0	1	Mika Kaurismäki	1	Aki Kaurismäki	Raija Talvio
2	5	Tim Roth	2	Antonio Banderas	2.0	Jennifer Beals	1	Madonna	1	Marisa Tomei	1	24	Allison Anders	1.0	4	Lawrence Bender	1	none	Margaret Goodspeed
3	6	Emilio Estevez	2	Cuba Gooding Jr.	2.0	Denis Leary	2	Jeremy Piven	2	Peter Greene	2	15	Stephen Hopkins	2.0	1	Gene Levy	1	Lewis Colick	Tim Wellburn
4	8	none	0	none	0.0	none	0	none	0	none	0	0	Timo Novotny	0.0	1	Timo Novotny	2	Michael Glawogger	Timo Novotny
5	9	Rita Lengyel	1	Milton Welsh	2.0	none	0	none	0	none	0	2	Marc Meyer	0.0	2	Marc Meyer	1	none	Marc Meyer
6	11	Mark Hamill	2	Harrison Ford	2.0	Carrie Fisher	1	Peter Cushing	2	Alec Guinness	2	106	George Lucas	2.0	1	Gary Kurtz	2	none	Marcia Lucas
7	12	Albert Brooks	2	Ellen DeGeneres	1.0	Alexander Gould	2	Willem Dafoe	2	Brad Garrett	2	24	Andrew Stanton	2.0	1	Graham Walters	1	Andrew Stanton	David Ian Salter
8	13	Tom Hanks	2	Robin Wright	1.0	Gary Sinise	2	Mykelti Williamson	2	Sally Field	1	67	Robert Zemeckis	2.0	1	Wendy Finerman	3	Eric Roth	Arthur Schmidt
9	14	Kevin Spacey	2	Annette Bening	1.0	Thora Birch	1	Wes Bentley	2	Mena Suvari	1	41	Sam Mendes	2.0	1	Bruce Cohen	2	Alan Ball	Christopher Greenbury

	id	budget	genres	imdb_id	overview	production_companies	release_date	revenue	tagline	title	actor1_name	actor2_name	actor3_name	actor4_name	actor5_name	director_name
7	12	94000000	Animation\|Family	0266543	Nemo, an adventurous young clownfish, is unexp...	Pixar Animation Studios	2003-05-30	940335536	There are 3.7 trillion fish in the ocean, they...	Finding Nemo	Albert Brooks	Ellen DeGeneres	Alexander Gould	Willem Dafoe	Brad Garrett	Andrew Stanton
11	16	12800000	Drama\|Crime\|Music	0168629	Selma, a Czech immigrant on the verge of blind...	Fine Line Features	2000-05-17	40031879	You don't need eyes to see.	Dancer in the Dark	Björk	Catherine Deneuve	David Morse	Peter Stormare	Joel Grey	Lars von Trier
17	22	140000000	Adventure\|Fantasy\|Action	0325980	Jack Sparrow, a freewheeling 17th-century pira...	Walt Disney Pictures	2003-09-07	655011224	Prepare to be blown out of the water.	Pirates of the Caribbean: The Curse of the Bla...	Johnny Depp	Geoffrey Rush	Orlando Bloom	Keira Knightley	Jack Davenport	Gore Verbinski
18	24	30000000	Action\|Crime	0266697	An assassin is shot at the altar by her ruthle...	Miramax Films	2003-10-10	180949000	Go for the kill.	Kill Bill: Vol. 1	Uma Thurman	Lucy Liu	Vivica A. Fox	Daryl Hannah	David Carradine	Quentin Tarantino
19	25	72000000	Drama\|War	0418763	Jarhead is a film about a US Marine Anthony Sw...	Universal Pictures	2005-04-11	96889998	Welcome to the suck.	Jarhead	Jamie Foxx	Scott MacDonald	none	Lucas Black	Peter Sarsgaard	Sam Mendes

	id	budget	genres	imdb_id	overview	production_companies	release_date	revenue	title	director_name	...	actors_ids	actors_tenures	total_tenure	average_tenure	Total_profitability_actors	Metacritic	YouTube_Mean	Profitability	ROI	success
0	12	94000000	Animation\|Family	266543	Nemo, an adventurous young clownfish, is unexp...	Pixar Animation Studios	2003-05-30	940335536	Finding Nemo	Andrew Stanton	...	[14, 5293, 12, 13, 18]	[18, 24, 2, 28, 14]	86	17.20	7310194071	90	0.218	846335536	2.639	1
1	16	12800000	Drama\|Crime\|Music	168629	Selma, a Czech immigrant on the verge of blind...	Fine Line Features	2000-05-17	40031879	Dancer in the Dark	Lars von Trier	...	[6748, 47, 52, 50, 53]	[49, 19, 21, 44, 15]	148	29.60	294261790	61	Error	27231879	2.127	1
2	22	140000000	Adventure\|Fantasy\|Action	325980	Jack Sparrow, a freewheeling 17th-century pira...	Walt Disney Pictures	2003-09-07	655011224	Pirates of the Caribbean: The Curse of the Bla...	Gore Verbinski	...	[1709, 116, 114, 118, 85]	[7, 9, 7, 22, 20]	65	13.00	15077223101	63	1.0	515011224	2.639	1
3	24	30000000	Action\|Crime	266697	An assassin is shot at the altar by her ruthle...	Miramax Films	2003-10-10	180949000	Kill Bill: Vol. 1	Quentin Tarantino	...	[140, 589, 2535, 139, 141]	[12, 26, 15, 17, 39]	109	21.80	1994329604	69	1.0	150949000	2.639	1
4	25	72000000	Drama\|War	418763	Jarhead is a film about a US Marine Anthony Sw...	Universal Pictures	2005-04-11	96889998	Jarhead	Sam Mendes	...	[133, 134, 1350483, 155]	[11, 14, 0, 12]	37	9.25	2367434946	58	1.0	24889998	0.346	1
5	27	1000000	Drama\|Music\|Romance	411705	Matt, a young glaciologist, soars across the v...	Revolution Films	2004-07-16	1574623	9 Songs	Michael Winterbottom	...	[1357012, 177, 1759261, 1087657]	[1, 1, 1, 18, 15]	36	7.20	2298492	43	Error	574623	0.575	1
6	35	75000000	Animation\|Comedy\|Family	462538	After Homer accidentally pollutes the town's w...	Gracie Films	2007-07-25	527068851	The Simpsons Movie	David Silverman	...	[199, 5587, 5586, 198, 200]	[33, 18, 23, 22, 27]	123	24.60	3427993239	80	Error	452068851	2.639	1
7	38	20000000	Science Fiction\|Drama\|Romance	338013	Joel Barish, heartbroken that his girlfriend u...	Anonymous Content	2004-03-19	72258126	Eternal Sunshine of the Spotless Mind	Michel Gondry	...	[103, 206, 205, 109, 204]	[11, 24, 16, 16, 11]	78	15.60	10973717071	89	0.148	52258126	2.613	1
8	58	200000000	Adventure\|Fantasy\|Action	383574	Captain Jack Sparrow works his way out of a bl...	Walt Disney Pictures	2006-06-20	1065659812	Pirates of the Caribbean: Dead Man's Chest	Gore Verbinski	...	[116, 114, 1640, 85, 2440]	[12, 10, 35, 23, 27]	107	21.40	18153570766	53	0.001	865659812	2.639	1
9	59	32000000	Drama\|Thriller\|Crime	399146	An average family is thrust into the spotlight...	New Line Cinema	2005-09-23	60740827	A History of Violence	David Cronenberg	...	[110, 225, 49, 226, 227]	[21, 9, 8, 1, 28]	67	13.40	3223414068	81	0.255	28740827	0.898	1
10	65	41000000	Drama	298203	The setting is Detroit in 1995. The city is di...	Imagine Entertainment	2002-08-11	215000000	8 Mile	Curtis Hanson	...	[326, 325, 335, 328, 327]	[25, 4, 11, 10, 8]	58	11.60	1844467360	77	0.082	174000000	2.639	1
11	69	28000000	Drama\|Music\|Romance	358273	A chronicle of country music legend Johnny Cas...	Tree Line Films	2005-09-13	186438883	Walk the Line	James Mangold	...	[73421, 418, 368, 419, 417]	[22, 20, 15, 1, 4]	62	12.40	3773313959	72	Error	158438883	2.639	1
12	70	30000000	Drama	405159	Despondent over a painful estrangement from hi...	Lakeshore Entertainment	2004-12-15	216763646	Million Dollar Baby	Clint Eastwood	...	[448, 190, 192, 449, 450]	[13, 50, 32, 6, 1]	102	20.40	3355708931	86	1.0	186763646	2.639	1
13	71	5000000	Drama\|Comedy\|Music	249462	Set against the background of the 1984 Miner's...	BBC Films	2000-05-18	110000000	Billy Elliot	Stephen Daldry	...	[478, 477, 479, 480, 481]	[1, 23, 1, 8, 28]	61	12.20	1641383685	74	Error	105000000	2.639	1
14	74	132000000	Adventure\|Thriller\|Science Fiction	407304	Ray Ferrier is a divorced dockworker and less-...	Paramount Pictures	2005-06-23	591739379	War of the Worlds	Steven Spielberg	...	[503, 502, 501, 500, 504]	[2, 20, 5, 25, 23]	75	15.00	6233708409	73	0.519	459739379	2.639	1
15	77	9000000	Mystery\|Thriller	209144	Suffering short-term memory loss after a head ...	Summit Entertainment	2000-11-10	39723096	Memento	Christopher Nolan	...	[529, 532, 530, 537, 534]	[11, 22, 7, 25, 20]	85	17.00	2654048997	80	0.006	30723096	2.639	1
16	80	2700000	Drama\|Romance	381681	Nine years ago two strangers met by chance and...	Castle Rock Entertainment	2004-10-02	15992615	Before Sunset	Richard Linklater	...	[649, 569, 651, 1146]	[40, 20, 3, 21]	84	21.00	466207939	90	1.0	13292615	2.639	1
17	82	135000000	Action\|Adventure\|Crime\|Thriller	430357	Miami Vice is a feature film based on the 1980...	Universal Pictures	2006-07-27	163794509	Miami Vice	Michael Mann	...	[16867, 2038, 134, 643, 72466]	[9, 5, 15, 19, 12]	60	12.00	4569270530	65	0.928	28794509	0.213	1
18	83	130000	Drama\|Thriller	374102	Two divers are left out at sea without a boat....	Plunge Pictures LLC	2004-06-08	54667954	Open Water	Chris Kentis	...	[1420204, 644, 1187, 590483, 646]	[1, 7, 20, 1, 1]	30	6.00	272689770	0	0.683	54537954	2.639	1
19	98	103000000	Action\|Drama\|Adventure	172495	In the year 180, the death of emperor Marcus A...	DreamWorks SKG	2000-01-05	457640427	Gladiator	Ridley Scott	...	[73421, 935, 934, 194, 936]	[17, 17, 11, 42, 41]	128	25.60	3888338936	67	0.272	354640427	2.639	1
20	107	10000000	Thriller\|Crime	208092	The second film from British director Guy Ritc...	Columbia Pictures Corporation	2000-01-09	83557872	Snatch	Guy Ritchie	...	[287, 1121, 976, 980, 1117]	[14, 13, 3, 11, 20]	61	12.20	5118165411	45	0.049	73557872	2.639	1
21	116	15000000	Drama\|Thriller\|Crime\|Romance	416320	Match Point is Woody Allen’s satire of the Bri...	DreamWorks	2005-10-26	85306374	Match Point	Woody Allen	...	[1244, 1249, 1246, 1245, 1248]	[10, 29, 11, 12, 38]	100	20.00	8271690710	72	Error	70306374	2.639	1
22	118	150000000	Adventure\|Comedy\|Family\|Fantasy	367594	A young boy wins a tour through the most magni...	Village Roadshow Pictures	2005-07-13	474968763	Charlie and the Chocolate Factory	Tim Burton	...	[1286, 1285, 1282, 85, 1281]	[1, 2, 42, 22, 5]	72	14.40	6700463144	72	1.0	324968763	2.166	1
23	120	93000000	Adventure\|Fantasy\|Action	120737	Young hobbit Frodo Baggins, after inheriting a...	WingNut Films	2001-12-18	871368364	The Lord of the Rings: The Fellowship of the Ring	Peter Jackson	...	[48, 114, 1327, 112, 109]	[17, 5, 33, 8, 13]	76	15.20	19390142196	92	Error	778368364	2.639	1
24	121	79000000	Adventure\|Fantasy\|Action	167261	Frodo and Sam are trekking to Mordor to destro...	WingNut Films	2002-12-18	926287400	The Lord of the Rings: The Two Towers	Peter Jackson	...	[110, 114, 1327, 109, 882]	[18, 6, 34, 14, 9]	81	16.20	18248254694	87	1.0	847287400	2.639	1
25	122	94000000	Adventure\|Fantasy\|Action	167260	Aragorn is revealed as the heir to the ancient...	WingNut Films	2003-01-12	1118888979	The Lord of the Rings: The Return of the King	Peter Jackson	...	[110, 114, 1327, 109, 882]	[19, 7, 35, 15, 10]	86	17.20	18248254694	94	0.34	1024888979	2.639	1
26	134	26000000	Action\|Adventure\|Comedy	190590	In the deep south during the 1930s, three esca...	Universal Pictures	2000-08-30	71000000	O Brother, Where Art Thou?	Joel Coen	...	[1462, 1230, 18686, 1241, 1461]	[9, 21, 20, 21, 18]	89	17.80	5854038212	69	0.73	45000000	1.731	1
27	141	6000000	Fantasy\|Drama\|Mystery	246578	After narrowly escaping a bizarre accident, a ...	Pandora Cinema	2001-01-18	1270522	Donnie Darko	Richard Kelly	...	[131, 69597, 20089, 723, 1579]	[11, 22, 6, 23, 9]	71	14.20	2440332869	71	0.12	-4729478	-0.788	0
28	142	14000000	Drama\|Romance	388795	Brokeback Mountain is an Ang Lee film about tw...	River Road Entertainment	2005-02-09	178043761	Brokeback Mountain	Ang Lee	...	[131, 1810, 1812, 1813, 1811]	[15, 9, 12, 5, 35]	76	15.20	7540424599	87	Error	164043761	2.639	1
29	153	4000000	Drama	335266	Two lost souls visiting Tokyo -- the young, ne...	American Zoetrope	2003-08-31	119723856	Lost in Translation	Sofia Coppola	...	[1771, 1770, 1532, 1772, 1245]	[16, 1, 28, 8, 10]	63	12.60	9452080296	89	0.8	115723856	2.639	1
30	155	185000000	Drama\|Action\|Crime\|Thriller	468569	Batman raises the stakes in his war on crime. ...	DC Comics	2008-07-16	1004558444	The Dark Knight	Christopher Nolan	...	[64, 1810, 3895, 3894, 6383]	[27, 12, 53, 23, 17]	132	26.40	13471550057	82	0.444	819558444	2.639	1
31	161	85000000	Thriller\|Crime	240772	Less than 24 hours into his parole, charismati...	Village Roadshow Pictures	2001-07-12	450717150	Ocean's Eleven	Steven Soderbergh	...	[1271, 287, 1204, 1892, 1461]	[19, 15, 15, 14, 19]	82	16.40	11981835752	74	1.0	365717150	2.639	1
32	163	110000000	Thriller\|Crime	349903	Danny Ocean reunites with his old flame and th...	Village Roadshow Pictures	2004-09-12	362744280	Ocean's Twelve	Steven Soderbergh	...	[1271, 287, 1204, 1922, 1461]	[22, 18, 18, 15, 22]	95	19.00	9465243532	58	1.0	252744280	2.298	1
33	167	48000000	Drama\|Science Fiction	272152	Prot is a patient at a mental hospital who cla...	Intermedia Films	2001-10-22	50315140	K-PAX	Iain Softley	...	[1980, 1229, 1979, 1982, 1981]	[65, 51, 16, 16, 24]	172	34.40	2075385244	49	0.268	2315140	0.048	1
34	170	5000000	Horror\|Thriller\|Science Fiction	289043	Twenty-eight days after a killer virus was acc...	DNA Films	2002-10-31	82719885	28 Days Later	Danny Boyle	...	[2038, 2054, 2050, 2052, 2037]	[1, 0, 11, 13, 5]	30	6.00	1537918992	73	1.0	77719885	2.639	1
35	176	1200000	Horror\|Mystery\|Crime	387564	Obsessed with teaching his victims the value o...	Lions Gate Films	2004-01-10	103911669	Saw	James Wan	...	[2047, 2131, 2140, 2136, 2130]	[26, 10, 9, 15, 26]	86	17.20	875698824	46	0.476	102711669	2.639	1
36	179	80000000	Crime\|Thriller	373926	After Silvia Broome, an interpreter at United ...	Universal Pictures	2005-08-04	162944923	The Interpreter	Sydney Pollack	...	[2227, 2245, 2229, 2228, 2244]	[23, 17, 17, 25, 30]	112	22.40	2651124728	62	0.365	82944923	1.037	1
37	180	102000000	Action\|Thriller\|Science Fiction\|Mystery	181689	John Anderton is a top 'Precrime' cop in the l...	DreamWorks SKG	2002-06-20	358372926	Minority Report	Steven Spielberg	...	[2206, 2201, 2207, 72466, 500]	[7, 54, 48, 8, 22]	139	27.80	6472905683	80	1.0	256372926	2.513	1
38	182	32000000	War\|Crime\|Drama\|Mystery\|Romance\|Thriller	452624	An American journalist played by George Cloone...	Warner Bros.	2006-08-12	5914908	The Good German	Steven Soderbergh	...	[2219, 112, 2221, 2220, 1461]	[18, 13, 12, 21, 24]	88	17.60	8040918411	49	Error	-26085092	-0.815	0
39	186	27000000	Drama\|Thriller\|Crime\|Mystery	425210	Slevin is mistakenly put in the middle of a pe...	The Weinstein Company	2006-02-24	56308881	Lucky Number Slevin	Paul McGuigan	...	[140, 2282, 2299, 62, 192]	[15, 34, 9, 27, 34]	119	23.80	7860410464	53	0.368	29308881	1.086	1
40	187	40000000	Action\|Thriller\|Crime	401792	Welcome to Sin City. This town beckons to the ...	Dimension Films	2005-03-31	158733820	Sin City	Robert Rodriguez	...	[56731, 5916, 6278, 6280, 6279]	[12, 11, 3, 26, 8]	60	12.00	1347402319	46	0.8	118733820	2.639	1
41	189	65000000	Crime\|Thriller	458481	Some of Sin City's most hard-boiled citizens c...	Miramax Films	2014-08-20	39407616	Sin City: A Dame to Kill For	Robert Rodriguez	...	[56731, 5916, 2295, 16851, 24045]	[21, 20, 36, 30, 27]	134	26.80	3512611332	46	0.383	-25592384	-0.394	0
42	201	60000000	Science Fiction\|Action\|Adventure\|Thriller	253754	En route to the honeymoon of William Riker to ...	Paramount Pictures	2002-12-13	67312826	Star Trek: Nemesis	Stuart Baird	...	[2390, 2387, 2388, 2392, 1213786]	[26, 30, 23, 19, 21]	119	23.80	1118894447	51	Error	7312826	0.122	1
43	205	17500000	Drama\|History\|War	395169	Inspired by true events, this film takes place...	Lions Gate Films	2004-11-09	38000000	Hotel Rwanda	Terry George	...	[2598, 1733, 2607, 73421, 1896]	[14, 32, 11, 21, 20]	98	19.60	3251345729	79	0.404	20500000	1.171	1
44	214	10000000	Horror\|Thriller\|Crime	489270	Jigsaw has disappeared. Along with his new app...	Lions Gate Films	2006-10-27	163876815	Saw III	Darren Lynn Bousman	...	[2677, 2138, 2464, 2144, 2133]	[3, 22, 15, 26, 12]	78	15.60	1339628004	48	0.671	153876815	2.639	1
45	215	4000000	Horror	432348	When a new murder victim is discovered with al...	Lions Gate Films	2005-10-28	152925093	Saw II	Darren Lynn Bousman	...	[2682, 2138, 2680, 2683, 2144]	[4, 21, 10, 18, 25]	78	15.60	1346302158	48	1.0	148925093	2.639	1
46	217	185000000	Adventure\|Action	367882	Set during the Cold War, the Soviets – led by ...	Lucasfilm	2008-05-21	786636033	Indiana Jones and the Kingdom of the Crystal S...	Steven Spielberg	...	[650, 112, 5538, 3, 10959]	[31, 15, 33, 43, 11]	133	26.60	10537417498	65	Error	601636033	2.639	1
47	231	50000000	Drama\|Thriller	365737	The Middle Eastern oil industry is the backdro...	Section Eight	2005-11-23	94000000	Syriana	Stephen Gaghan	...	[2954, 2955, 2956, 1892, 1461]	[16, 19, 11, 18, 23]	87	17.40	8006815262	76	0.469	44000000	0.880	1
48	237	6400000	Drama\|Thriller\|Crime\|Romance	289635	A young drifter working on a river barge disru...	Recorded Picture Company (RPC)	2003-09-26	2500000	Young Adam	David Mackenzie	...	[3061, 3063, 3065, 1246, 3064]	[11, 18, 1, 9, 16]	55	11.00	3188011025	67	Error	-3900000	-0.609	0
49	243	30000000	Comedy\|Drama\|Romance\|Music	146882	When record store owner Rob Gordon gets dumped...	Buena Vista	2000-03-17	47126295	High Fidelity	Stephen Frears	...	[70851, 1562, 3230, 3036, 3232]	[9, 5, 9, 18, 18]	59	11.80	3796611818	79	Error	17126295	0.571	1