notebook.community

Edit and run



In [1]:

    
import json
import json_helper as jhlp
import os
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from sklearn.cluster import KMeans



In [2]:

    
plt.style.use('ggplot')



In [3]:

    
json_path = os.getcwd() + '/database_characterinteractions'
movie_jsons = [os.getcwd() + '/database_characterinteractions/' + json_name for json_name in os.listdir(json_path)]
script_dicts_with_name = {}
for movie_json in movie_jsons:
    with open(movie_json) as f:
        script_dicts_with_name[movie_json.split('/')[-1].split('.')[0]] = json.loads(f.read())



In [4]:

    
len(movie_jsons)









    Out[4]:





953



In [5]:

    
def get_chars_for_scene2(script_dict, scene_id):
    return script_dict[str(scene_id)]

def get_all_char_from_script2(script_dict):
    chars = set()
    for scene in script_dict:
        for char in script_dict[scene]:
            chars.add(char)
    return list(chars)



In [6]:

    
chars = get_all_char_from_script2(script_dicts_with_name['A Few Good Men'])
print chars
print "\n"
print get_chars_for_scene2(script_dicts_with_name['A Few Good Men'], 10)









    



[u'LUTHER', u'MCGUIRE', u'SAM', u'HOWARD', u'KAFFEE', u'WEST', u'SPRADLING', u'STONE', u'SERGEANT AT ARMS', u'HAMMAKER', u'JO', u'DOWNEY', u'MARKINSON', u'GIBBS', u'KENDRICK', u'JESSEP', u'DAWSON', u'WHITAKER', u'RANDOLPH', u'ROSS']


[u'KAFFEE', u'SPRADLING', u'KAFFEE', u'SPRADLING', u'KAFFEE', u'SPRADLING', u'KAFFEE']



In [7]:

    
len(script_dicts_with_name)









    Out[7]:





952



In [8]:

    
#graph formation method I
#register a character interaction
#if two characters co-occur in a scen
graphs_I = []
graph_names_I = []
for name in script_dicts_with_name:
    script_dict = script_dicts_with_name[name]
    all_char_list = get_all_char_from_script2(script_dict)
    graph = nx.Graph()
    for scene_id in script_dict:
        scene_chars = get_chars_for_scene2(script_dict, scene_id)
        for i in range(len(scene_chars)-1):
            for j in range(i+1,len(scene_chars)):
                char1 = scene_chars[i]
                char2 = scene_chars[j]
                if char1 == char2:
                    continue
                if not graph.has_edge(char1, char2):
                    graph.add_edge(char1, char2, weight=1)
                else:
                    graph[char1][char2]['weight'] += 1
    graphs_I.append(graph)
    graph_names_I.append(name)



In [9]:

    
#for graph formation method II
def get_char_interactions_for_scene_new(script_dict, scene_id):
    interactions = []
    chars_in_scene = get_chars_for_scene2(script_dict, scene_id)
    if len(chars_in_scene) < 2: #if a scene has less than two characters
        return interactions
    for i in range(len(chars_in_scene)-1):
        for j in range(i+1, len(chars_in_scene)):
            if j-i > 3:
                continue
            m = j-i
            weight = 4-m
            char1 = chars_in_scene[i]
            char2 = chars_in_scene[j]
            if char1 == char2:
                continue
            interactions.append((char1, char2, weight))
    return interactions



In [10]:

    
#graph formation method II
#register a character interaction if two characters
#speak within a certain distance of each other
graphs_II = []
graph_names_II = []
for name in script_dicts_with_name:
    script_dict = script_dicts_with_name[name]
    graph = nx.Graph()
    for scene_id in script_dict:
        char_interactions = get_char_interactions_for_scene_new(script_dict, scene_id)
        for interaction in char_interactions:
            char1 = interaction[0]
            char2 = interaction[1]
            wt = interaction[2]
            if not graph.has_edge(char1, char2):
                    graph.add_edge(char1, char2, weight=wt)
            else:
                graph[char1][char2]['weight'] += wt

    graphs_II.append(graph)
    graph_names_II.append(name)



In [11]:

    
print len(graphs_I)
print len(graph_names_I)
print len(graphs_II)
print len(graph_names_II)



In [12]:

    
def get_graph_metrics(graph):
    try:
        num_chars = len(graph.nodes()) #1
        num_relations = len(graph.edges()) #2
        avg_clustering = nx.average_clustering(graph) #3
        per_node_centrality = sorted(np.array(nx.degree_centrality(graph).values()), reverse=True)
        centralization = sum(per_node_centrality[0] - per_node_centrality)/((num_chars-1)*(num_chars-2)) #4
        sing_char_centrality = (per_node_centrality[0] - per_node_centrality[1])/sum(per_node_centrality) #5
        edge_weights = sorted([float(graph[u][v]['weight'])/5 for u,v in graph.edges()], reverse=True)
        single_relationship_centrality = (edge_weights[0]-edge_weights[1])/sum(edge_weights) #6
        degrees = graph.degree(graph.nodes(), weight='weight').values()
        degrees = np.array(degrees)/sum(edge_weights)
        k = min(10, num_chars)
        top_char_degree_var = np.var(sorted(degrees, reverse=True)[:k]) #7
        k = min(10, len(edge_weights))
        top_relationship_var = np.var(edge_weights[:k]) #8
    except Exception as e:
        return None
    metrics = np.array([num_chars, num_relations, avg_clustering, centralization, sing_char_centrality, 
                       single_relationship_centrality, top_char_degree_var, top_relationship_var])
    if float('inf') in metrics:
        return None
    else:
        return metrics



In [ ]:

    
movie_names_I = []
movie_features_I = []

for i,graph in enumerate(graphs_I):
    metrics = get_graph_metrics(graph)
    if metrics != None:
        movie_names_I.append(graph_names_I[i])
        movie_features_I.append(metrics)
        
movie_names_II = []
movie_features_II = []

for i,graph in enumerate(graphs_II):
    metrics = get_graph_metrics(graph)
    if metrics != None:
        movie_names_II.append(graph_names_II[i])
        movie_features_II.append(metrics)



In [14]:

    
print len(movie_names_I)
print len(movie_features_I)

print len(movie_names_II)
print len(movie_features_II)



In [15]:

    
import pandas as pd
feats = [[movie_names_I[i]] + [f for f in mf] for i,mf in enumerate(movie_features_I)]
df = pd.DataFrame(feats)
df









    Out[15]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
    
  
  
    
      0
      Funny People
      18.0
      38.0
      0.627066
      0.038062
      0.013158
      0.228325
      1.114880
      7568.6256
    
    
      1
      Road, The
      11.0
      28.0
      0.832035
      0.060000
      0.000000
      0.236688
      1.372372
      6195.4324
    
    
      2
      Limitless
      19.0
      42.0
      0.859649
      0.046841
      0.011905
      0.014257
      1.287618
      709.2756
    
    
      3
      Yes Man
      27.0
      70.0
      0.651552
      0.031657
      0.107143
      0.004606
      1.213875
      1003.2880
    
    
      4
      English Patient, The
      20.0
      49.0
      0.601429
      0.018775
      0.020408
      0.104373
      0.269430
      604.4000
    
    
      5
      Gremlins
      23.0
      76.0
      0.644488
      0.032566
      0.065789
      0.002179
      0.763040
      18.8484
    
    
      6
      Crow, The
      20.0
      56.0
      0.500198
      0.028932
      0.053571
      0.045455
      0.374572
      19.8116
    
    
      7
      Moon
      9.0
      19.0
      0.544444
      0.035714
      0.000000
      0.194133
      1.177243
      1997.0080
    
    
      8
      Smokin' Aces
      26.0
      71.0
      0.492703
      0.013067
      0.014085
      0.015005
      0.021428
      192.3796
    
    
      9
      Human Nature
      12.0
      34.0
      0.699074
      0.033058
      0.000000
      0.191882
      1.451974
      3996.6304
    
    
      10
      Cradle 2 the Grave
      17.0
      51.0
      0.564180
      0.022135
      0.019608
      0.057832
      0.417705
      235.8836
    
    
      11
      Snatch
      26.0
      81.0
      0.586736
      0.013467
      0.000000
      0.001347
      0.120621
      124.5284
    
    
      12
      Sherlock Holmes
      17.0
      41.0
      0.718009
      0.040625
      0.024390
      0.350877
      1.655823
      808.8096
    
    
      13
      Star Trek II: The Wrath of Khan
      21.0
      101.0
      0.775082
      0.023158
      0.004950
      0.016935
      0.646164
      1029.2916
    
    
      14
      Ginger Snaps
      13.0
      38.0
      0.705883
      0.050505
      0.026316
      0.363497
      1.409181
      2102.7216
    
    
      15
      Harold and Kumar Go to White Castle
      28.0
      64.0
      0.782262
      0.030178
      0.000000
      0.420779
      1.680751
      1563.3876
    
    
      16
      Sideways
      16.0
      33.0
      0.743479
      0.055238
      0.045455
      0.308030
      1.973953
      16684.0036
    
    
      17
      Big White, The
      16.0
      34.0
      0.676515
      0.034286
      0.000000
      0.086073
      0.474138
      1278.0276
    
    
      18
      Cirque du Freak: The Vampire's Assistan
      15.0
      52.0
      0.766040
      0.041601
      0.038462
      0.223978
      1.121962
      3925.1504
    
    
      19
      Eight Legged Freaks
      14.0
      61.0
      0.872982
      0.029586
      0.000000
      0.024307
      0.292936
      114.1380
    
    
      20
      Citizen Kane
      29.0
      102.0
      0.697335
      0.015023
      0.000000
      0.053893
      0.585699
      2216.3924
    
    
      21
      Ghost Rider
      20.0
      65.0
      0.759872
      0.038473
      0.053846
      0.006575
      0.362362
      121.7316
    
    
      22
      Newsies
      25.0
      123.0
      0.757743
      0.021060
      0.004065
      0.114693
      0.562832
      114.2564
    
    
      23
      Wall-E
      14.0
      48.0
      0.758761
      0.028600
      0.000000
      0.022670
      0.602018
      152.1284
    
    
      24
      Precious
      17.0
      39.0
      0.726144
      0.046094
      0.064103
      0.133663
      1.368561
      206.0036
    
    
      25
      Hudson Hawk
      17.0
      60.0
      0.699223
      0.035156
      0.025000
      0.033471
      0.824970
      344.6340
    
    
      26
      Rush Hour 2
      13.0
      28.0
      0.794872
      0.063131
      0.017857
      0.480677
      2.083454
      7429.1796
    
    
      27
      Dances with Wolves
      18.0
      44.0
      0.681393
      0.027682
      0.022727
      0.064723
      0.323648
      163.7716
    
    
      28
      Win Win
      17.0
      71.0
      0.807427
      0.033854
      0.000000
      0.019219
      0.714111
      654.5776
    
    
      29
      1492: Conquest of Paradise
      20.0
      48.0
      0.729789
      0.043706
      0.114583
      0.082028
      1.184544
      112.4160
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      895
      Metro
      17.0
      30.0
      0.460912
      0.033073
      0.033333
      0.111736
      1.456276
      2026.3584
    
    
      896
      Hanna
      20.0
      50.0
      0.589084
      0.030779
      0.010000
      0.014151
      0.723225
      829.3200
    
    
      897
      Twilight: New Moon
      25.0
      85.0
      0.778115
      0.030571
      0.047059
      0.151158
      1.333289
      10729.8416
    
    
      898
      Land of the Dead
      27.0
      173.0
      0.791709
      0.017870
      0.005780
      0.009871
      0.280822
      22.2500
    
    
      899
      Drive
      13.0
      30.0
      0.653175
      0.044192
      0.033333
      0.044248
      0.668038
      95.3604
    
    
      900
      Game, The
      19.0
      37.0
      0.504280
      0.048656
      0.148649
      0.248891
      2.001254
      9948.7540
    
    
      901
      Tourist, The
      11.0
      32.0
      0.843434
      0.038889
      0.015625
      0.210048
      1.204128
      1566.9056
    
    
      902
      Larry Crowne
      28.0
      83.0
      0.608488
      0.029651
      0.072289
      0.081390
      0.534309
      925.7664
    
    
      903
      Planet of the Apes, The
      13.0
      40.0
      0.833794
      0.039773
      0.012500
      0.082212
      0.548466
      382.8324
    
    
      904
      Beach, The
      11.0
      20.0
      0.736652
      0.077778
      0.075000
      0.157210
      1.638903
      1007.3124
    
    
      905
      500 Days of Summer
      16.0
      39.0
      0.773214
      0.051429
      0.076923
      0.274842
      1.614581
      10024.4464
    
    
      906
      Deep Cover
      16.0
      38.0
      0.700832
      0.041905
      0.039474
      0.074451
      0.612710
      1134.0016
    
    
      907
      American Shaolin: King of Kickboxers II
      20.0
      76.0
      0.810174
      0.035088
      0.039474
      0.024289
      0.834218
      165.9876
    
    
      908
      Duck Soup
      11.0
      44.0
      0.858009
      0.024444
      0.011364
      0.116906
      1.009203
      8639.0144
    
    
      909
      Disturbia
      10.0
      19.0
      0.697778
      0.080247
      0.078947
      0.062784
      1.797648
      1085.4624
    
    
      910
      Boogie Nights
      33.0
      87.0
      0.582368
      0.015310
      0.011494
      0.051095
      0.434025
      2863.2884
    
    
      911
      Sugar and Spice
      25.0
      95.0
      0.728497
      0.021513
      0.015789
      0.006047
      0.508536
      142.3296
    
    
      912
      Do The Right Thing
      37.0
      198.0
      0.682062
      0.011662
      0.002525
      0.069682
      0.245499
      594.7184
    
    
      913
      Starman
      19.0
      35.0
      0.694069
      0.021786
      0.000000
      0.469358
      1.349386
      7747.3284
    
    
      914
      Good Girl, The
      9.0
      36.0
      1.000000
      0.000000
      0.000000
      0.021994
      0.928856
      79099.6336
    
    
      915
      Year One
      24.0
      88.0
      0.729840
      0.028184
      0.017045
      0.141398
      0.934894
      1316.5716
    
    
      916
      Pi
      9.0
      16.0
      0.709524
      0.089286
      0.093750
      0.086321
      1.761079
      1783.8144
    
    
      917
      Blood Simple
      23.0
      78.0
      0.673792
      0.014069
      0.012821
      0.044146
      0.311160
      801.5024
    
    
      918
      Mariachi, El
      10.0
      25.0
      0.678730
      0.061728
      0.040000
      0.230825
      0.899861
      522.0916
    
    
      919
      Other Boleyn Girl, The
      30.0
      225.0
      0.781593
      0.015288
      0.000000
      0.017032
      0.305388
      343.3296
    
    
      920
      Mimic
      15.0
      38.0
      0.722496
      0.040816
      0.000000
      0.092096
      0.935956
      338.7984
    
    
      921
      Buried
      13.0
      25.0
      0.766434
      0.066919
      0.140000
      0.188379
      1.572420
      144699.8544
    
    
      922
      Seventh Seal, The
      14.0
      75.0
      0.903152
      0.015779
      0.000000
      0.001217
      0.355084
      12541.7504
    
    
      923
      Two For The Money
      18.0
      38.0
      0.696898
      0.049740
      0.052632
      0.194619
      1.651640
      7729.4564
    
    
      924
      Black Rain
      14.0
      36.0
      0.700031
      0.054241
      0.055556
      0.085296
      1.289786
      519.6560
    
  

925 rows × 9 columns



In [16]:

    
import cPickle as pickle
with open('char_net_final_I.pkl', 'w') as fp:
    pickle.dump(df, fp)



In [17]:

    
feats = [[movie_names_II[i]] + [f for f in mf] for i,mf in enumerate(movie_features_II)]
df = pd.DataFrame(feats)
df

with open('char_net_final_II.pkl', 'w') as fp:
    pickle.dump(df, fp)



In [18]:

    
from sklearn.manifold import TSNE
tsne_model = TSNE(n_components=2, random_state=0)
tsne_op = tsne_model.fit_transform(movie_features_I)
plt.figure(figsize=(10,10))
plt.scatter(tsne_op[:,0], tsne_op[:,1], alpha=0.5)
plt.show()



In [19]:

    
tsne_model = TSNE(n_components=2, random_state=0)
tsne_op = tsne_model.fit_transform(movie_features_II)
plt.figure(figsize=(10,10))
plt.scatter(tsne_op[:,0], tsne_op[:,1], alpha=0.5)
plt.show()



In [20]:

    
#clustering in tsne reduced dimensions
kmeans = KMeans(n_clusters=7, random_state=0)
tsne_clusters = kmeans.fit_predict(tsne_op)
#c = ['r', 'g', 'b', 'y', 'k', 'c', 'm', 'w']
c = ['r', 'g', 'b', 'y', 'k', 'w', 'c']
#c = np.linspace(0,0.6,7)
colors = [c[t] for t in tsne_clusters]
plt.figure(figsize=(10,10))
plt.scatter(tsne_op[:,0], tsne_op[:,1], c=colors, alpha=0.5)
plt.show()



In [21]:

    
#lists of movies falling in different clusters
movie_clusters = [[] for _ in range(7)]
for i,tc in enumerate(tsne_clusters):
    movie_clusters[tc].append(movie_names_II[i])
    
for cluster in movie_clusters:
    print cluster[:10]









    



['Limitless', 'Precious', 'Hudson Hawk', 'Blade Runner', 'Panic Room', 'Watchmen', 'Trainspotting', 'Orphan', "One Flew Over the Cuckoo's Nes", 'Bachelor Party, The']
['Star Trek II: The Wrath of Khan', 'Big White, The', 'Citizen Kane', 'Twiligh', 'Erin Brockovich', 'Matrix, The', 'Family Man, The', "Ocean's Eleven", 'Broken Arrow', 'Angels & Demons']
['Funny People', 'Road, The', 'Human Nature', 'Harold and Kumar Go to White Castle', 'Sideways', 'Rush Hour 2', 'Three Kings (Spoils of War)', 'Paul', 'Up in the Air', 'Fabulous Baker Boys, The']
['Gremlins', 'Crow, The', "Smokin' Aces", 'Eight Legged Freaks', 'Wall-E', 'Dances with Wolves', '1492: Conquest of Paradise', 'Hannibal', 'Pacifier, The', 'Princess Bride, The']
['Yes Man', 'English Patient, The', 'Cold Mountain', 'Crazy, Stupid, Love', "All the King's Men", 'Prophecy, The', 'Men in Black', "All the President's Men", 'Austin Powers - The Spy Who Shagged Me', 'Midnight Cowboy']
['Moon', 'Sherlock Holmes', 'Ginger Snaps', "Cirque du Freak: The Vampire's Assistan", 'Win Win', 'An Education', 'Descendants, The', 'Jacket, The', 'Wag the Dog', 'Lake Placid']
['Cradle 2 the Grave', 'Snatch', 'Ghost Rider', 'Newsies', 'Fair Game', 'Total Recall', 'Marty', 'John Q', 'Adventures of Buckaroo Banzai Across the Eighth Dimension, The', 'Island, The']



In [22]:

    
#find out which cluster the given movie falls in
def get_movie_cluster(name):
    for i,cluster in enumerate(movie_clusters):
        if name in cluster:
            return i



In [23]:

    
print get_movie_cluster('Terminator')
print get_movie_cluster('Terminator 2: Judgement Day')
print get_movie_cluster('Terminator Salvation')

print get_movie_cluster('Godfather')
print get_movie_cluster('Godfather Part II')
print get_movie_cluster('Godfather Part III, The')

print get_movie_cluster('Kill Bill Volume 1 & 2')
print get_movie_cluster('Reservoir Dogs')



In [24]:

    
import cPickle as pickle
with open('movie_data.pkl', 'r') as fp:
    movie_data = pickle.load(fp)



In [25]:

    
movie_data['ratings'][movie_clusters[0][0]]









    Out[25]:





7.9



In [26]:

    
cluster_score_means = []
cluster_score_deviations = []
cluster_genres = []

for cluster in movie_clusters:
    cluster_scores = [movie_data['ratings'][name] for name in cluster if name in movie_data['ratings']]
    cluster_score_means.append(np.mean(cluster_scores))
    cluster_score_deviations.append(np.std(cluster_scores))
    genres = []
    for name in cluster:
        if name in movie_data['genres']:
            genres += movie_data['genres'][name]
    g = set(genres)
    cluster_genres.append(sorted(g, key=lambda m: genres.count(m)))



In [27]:

    
for i in range(len(movie_clusters)):
    print [g for g in cluster_genres[i] if g != 'genres'][:5]
    print cluster_score_means[i]
    print cluster_score_deviations[i]









    



[u'Musical', u'Animation', u'Music', u'Sport', u'War']
6.8902173913
1.1401811243
[u'Film-Noir', u'Talk-Show', u'Animation', u'Western', u'Musical']
7.06022099448
0.904969677341
[u'Film-Noir', u'Musical', u'War', u'Sport', u'Music']
7.04344262295
0.865834923967
[u'Short', u'Musical', u'Music', u'Western', u'Sport']
6.74110429448
1.13411840096
[u'Western', u'Documentary', u'Musical', u'Film-Noir', u'Animation']
7.10275229358
0.88757568129
[u'Animation', u'War', u'Film-Noir', u'Short', u'Documentary']
7.01052631579
0.865223093304
[u'War', u'Western', u'Short', u'Sport', u'Music']
6.99736842105
1.01682862037



In [28]:

    
lls = [[] for _ in range(5)]
lls[0].append(5)
lls[3].append(66)
print lls









    



[[5], [], [], [66], []]



In [29]:

    
print graph_names_I.index('Girl with the Dragon Tattoo, The')
print graph_names_II.index('Girl with the Dragon Tattoo, The')



In [30]:

    
graph_new = graphs_II[graph_names_II.index('Lord of the Rings: Fellowship of the Ring, The')]
graph_new.remove_node('BUTTERBUR')
graph_new.remove_node('FADE TO BLACK')
edge_weights = [float(graph_new[u][v]['weight'])/5 for u,v in graph_new.edges()]
pos = nx.circular_layout(graph_new)
plt.figure(figsize=(15,15))
nx.draw_networkx(graph_new, with_labels=True, width=[e/4 for e in edge_weights], alpha=0.5); plt.show()

	0	1	2	3	4	5	6	7	8
0	Funny People	18.0	38.0	0.627066	0.038062	0.013158	0.228325	1.114880	7568.6256
1	Road, The	11.0	28.0	0.832035	0.060000	0.000000	0.236688	1.372372	6195.4324
2	Limitless	19.0	42.0	0.859649	0.046841	0.011905	0.014257	1.287618	709.2756
3	Yes Man	27.0	70.0	0.651552	0.031657	0.107143	0.004606	1.213875	1003.2880
4	English Patient, The	20.0	49.0	0.601429	0.018775	0.020408	0.104373	0.269430	604.4000
5	Gremlins	23.0	76.0	0.644488	0.032566	0.065789	0.002179	0.763040	18.8484
6	Crow, The	20.0	56.0	0.500198	0.028932	0.053571	0.045455	0.374572	19.8116
7	Moon	9.0	19.0	0.544444	0.035714	0.000000	0.194133	1.177243	1997.0080
8	Smokin' Aces	26.0	71.0	0.492703	0.013067	0.014085	0.015005	0.021428	192.3796
9	Human Nature	12.0	34.0	0.699074	0.033058	0.000000	0.191882	1.451974	3996.6304
10	Cradle 2 the Grave	17.0	51.0	0.564180	0.022135	0.019608	0.057832	0.417705	235.8836
11	Snatch	26.0	81.0	0.586736	0.013467	0.000000	0.001347	0.120621	124.5284
12	Sherlock Holmes	17.0	41.0	0.718009	0.040625	0.024390	0.350877	1.655823	808.8096
13	Star Trek II: The Wrath of Khan	21.0	101.0	0.775082	0.023158	0.004950	0.016935	0.646164	1029.2916
14	Ginger Snaps	13.0	38.0	0.705883	0.050505	0.026316	0.363497	1.409181	2102.7216
15	Harold and Kumar Go to White Castle	28.0	64.0	0.782262	0.030178	0.000000	0.420779	1.680751	1563.3876
16	Sideways	16.0	33.0	0.743479	0.055238	0.045455	0.308030	1.973953	16684.0036
17	Big White, The	16.0	34.0	0.676515	0.034286	0.000000	0.086073	0.474138	1278.0276
18	Cirque du Freak: The Vampire's Assistan	15.0	52.0	0.766040	0.041601	0.038462	0.223978	1.121962	3925.1504
19	Eight Legged Freaks	14.0	61.0	0.872982	0.029586	0.000000	0.024307	0.292936	114.1380
20	Citizen Kane	29.0	102.0	0.697335	0.015023	0.000000	0.053893	0.585699	2216.3924
21	Ghost Rider	20.0	65.0	0.759872	0.038473	0.053846	0.006575	0.362362	121.7316
22	Newsies	25.0	123.0	0.757743	0.021060	0.004065	0.114693	0.562832	114.2564
23	Wall-E	14.0	48.0	0.758761	0.028600	0.000000	0.022670	0.602018	152.1284
24	Precious	17.0	39.0	0.726144	0.046094	0.064103	0.133663	1.368561	206.0036
25	Hudson Hawk	17.0	60.0	0.699223	0.035156	0.025000	0.033471	0.824970	344.6340
26	Rush Hour 2	13.0	28.0	0.794872	0.063131	0.017857	0.480677	2.083454	7429.1796
27	Dances with Wolves	18.0	44.0	0.681393	0.027682	0.022727	0.064723	0.323648	163.7716
28	Win Win	17.0	71.0	0.807427	0.033854	0.000000	0.019219	0.714111	654.5776
29	1492: Conquest of Paradise	20.0	48.0	0.729789	0.043706	0.114583	0.082028	1.184544	112.4160
...	...	...	...	...	...	...	...	...	...
895	Metro	17.0	30.0	0.460912	0.033073	0.033333	0.111736	1.456276	2026.3584
896	Hanna	20.0	50.0	0.589084	0.030779	0.010000	0.014151	0.723225	829.3200
897	Twilight: New Moon	25.0	85.0	0.778115	0.030571	0.047059	0.151158	1.333289	10729.8416
898	Land of the Dead	27.0	173.0	0.791709	0.017870	0.005780	0.009871	0.280822	22.2500
899	Drive	13.0	30.0	0.653175	0.044192	0.033333	0.044248	0.668038	95.3604
900	Game, The	19.0	37.0	0.504280	0.048656	0.148649	0.248891	2.001254	9948.7540
901	Tourist, The	11.0	32.0	0.843434	0.038889	0.015625	0.210048	1.204128	1566.9056
902	Larry Crowne	28.0	83.0	0.608488	0.029651	0.072289	0.081390	0.534309	925.7664
903	Planet of the Apes, The	13.0	40.0	0.833794	0.039773	0.012500	0.082212	0.548466	382.8324
904	Beach, The	11.0	20.0	0.736652	0.077778	0.075000	0.157210	1.638903	1007.3124
905	500 Days of Summer	16.0	39.0	0.773214	0.051429	0.076923	0.274842	1.614581	10024.4464
906	Deep Cover	16.0	38.0	0.700832	0.041905	0.039474	0.074451	0.612710	1134.0016
907	American Shaolin: King of Kickboxers II	20.0	76.0	0.810174	0.035088	0.039474	0.024289	0.834218	165.9876
908	Duck Soup	11.0	44.0	0.858009	0.024444	0.011364	0.116906	1.009203	8639.0144
909	Disturbia	10.0	19.0	0.697778	0.080247	0.078947	0.062784	1.797648	1085.4624
910	Boogie Nights	33.0	87.0	0.582368	0.015310	0.011494	0.051095	0.434025	2863.2884
911	Sugar and Spice	25.0	95.0	0.728497	0.021513	0.015789	0.006047	0.508536	142.3296
912	Do The Right Thing	37.0	198.0	0.682062	0.011662	0.002525	0.069682	0.245499	594.7184
913	Starman	19.0	35.0	0.694069	0.021786	0.000000	0.469358	1.349386	7747.3284
914	Good Girl, The	9.0	36.0	1.000000	0.000000	0.000000	0.021994	0.928856	79099.6336
915	Year One	24.0	88.0	0.729840	0.028184	0.017045	0.141398	0.934894	1316.5716
916	Pi	9.0	16.0	0.709524	0.089286	0.093750	0.086321	1.761079	1783.8144
917	Blood Simple	23.0	78.0	0.673792	0.014069	0.012821	0.044146	0.311160	801.5024
918	Mariachi, El	10.0	25.0	0.678730	0.061728	0.040000	0.230825	0.899861	522.0916
919	Other Boleyn Girl, The	30.0	225.0	0.781593	0.015288	0.000000	0.017032	0.305388	343.3296
920	Mimic	15.0	38.0	0.722496	0.040816	0.000000	0.092096	0.935956	338.7984
921	Buried	13.0	25.0	0.766434	0.066919	0.140000	0.188379	1.572420	144699.8544
922	Seventh Seal, The	14.0	75.0	0.903152	0.015779	0.000000	0.001217	0.355084	12541.7504
923	Two For The Money	18.0	38.0	0.696898	0.049740	0.052632	0.194619	1.651640	7729.4564
924	Black Rain	14.0	36.0	0.700031	0.054241	0.055556	0.085296	1.289786	519.6560