In [1]:
import json
import json_helper as jhlp
import os
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import scipy.stats as scistat
plt.style.use('ggplot')
In [2]:
movies = ['Kill Bill Volume 1 & 2', 'A Few Good Men', 'Girl with the Dragon Tattoo, The','Avengers, The', 'Fight Club', 'Twiligh',
'Pirates of the Caribbean', 'Pirates of the Caribbean: Dead Man\'s Ches']
json_path = "path to json files"
movie_jsons = [json_path+'/'+movie+'.json' for movie in movies]
script_dicts = []
for movie_json in movie_jsons:
with open(movie_json) as f:
script_dicts.append(json.loads(f.read()))
In [3]:
#find character interactions for a scene
def get_chars_for_scene(scene, all_char_list):
chars_from_desc = set()
chars_from_dialogue = set()
desc = jhlp.get_description_for_scene(scene)
for c in all_char_list:
if c in desc:
chars_from_desc.add(c)
for d in scene['char_dialogues']:
if d[1] in all_char_list:
chars_from_dialogue.add(d[1])
return list(chars_from_desc), list(chars_from_dialogue)
In [4]:
#graph formation method I
graphs = []
for script_dict in script_dicts:
all_char_list = jhlp.get_all_char_from_script(script_dict)
graph = nx.Graph()
for key in script_dict:
scene = script_dict[key]
a, b = get_chars_for_scene(scene, all_char_list)
for char1 in b:
for char2 in b:
if char1 == char2:
continue
if not graph.has_edge(char1, char2):
graph.add_edge(char1, char2, weight=1)
else:
graph[char1][char2]['weight'] += 1
graphs.append(graph)
In [10]:
for graph in graphs:
edge_weights = [graph[u][v]['weight'] for u,v in graph.edges()]
pos = nx.spectral_layout(graph)
plt.figure(figsize=(10,10))
nx.draw_networkx(graph, with_labels=True, width=edge_weights, alpha=0.5); plt.show()
In [7]:
#graph formation method II
def get_char_interactions_for_scene_new(scene):
interactions = []
char_dialogues = scene['char_dialogues']
if len(char_dialogues) < 2: #if a scene has less than two characters
return interactions
for i in range(len(char_dialogues)):
for j in range(i, len(char_dialogues)):
if i == j or j-i > 3:
continue
m = j-i
weight = 4-m
char1 = char_dialogues[i][1]
char2 = char_dialogues[j][1]
interactions.append((char1, char2, weight))
return interactions
graphs_new = []
for script_dict in script_dicts:
graph_new = nx.Graph()
for scene_id in script_dict:
scene = script_dict[scene_id]
scene_interactions = get_char_interactions_for_scene_new(scene)
for interaction in scene_interactions:
char1 = interaction[0]
char2 = interaction[1]
wt = interaction[2]
if not graph_new.has_edge(char1, char2):
graph_new.add_edge(char1, char2, weight=wt)
else:
graph_new[char1][char2]['weight'] += wt
graphs_new.append(graph_new)
In [9]:
for graph_new in graphs_new:
edge_weights = [float(graph_new[u][v]['weight'])/5 for u,v in graph_new.edges()]
pos = nx.spectral_layout(graph_new)
plt.figure()
nx.draw_networkx(graph_new, with_labels=True, width=edge_weights, alpha=0.5); plt.show()
In [29]:
def get_graph_metrics(graph):
num_chars = len(graph.nodes()) #1
num_relations = len(graph.edges()) #2
avg_clustering = nx.average_clustering(graph) #3
per_node_centrality = sorted(np.array(nx.degree_centrality(graph_new).values()), reverse=True)
centralization = sum(per_node_centrality[0] - per_node_centrality)/((num_chars-1)*(num_chars-2)) #4
sing_char_centrality = (per_node_centrality[0] - per_node_centrality[1])/sum(per_node_centrality) #5
edge_weights = sorted([float(graph[u][v]['weight'])/5 for u,v in graph.edges()], reverse=True)
single_relationship_centrality = (edge_weights[0]-edge_weights[1])/sum(edge_weights) #6
degrees = graph_new.degree(graph_new.nodes(), weight='weight').values()
degrees = np.array(degrees)/sum(edge_weights)
k = min(10, num_chars)
top_char_degree_var = np.var(sorted(degrees, reverse=True)[:k]) #7
k = min(10, len(e_weights))
top_relationship_var = np.var(edge_weights[:k]) #8
return [num_chars, num_relations, avg_clustering, centralization, sing_char_centrality,
single_relationship_centrality, top_char_degree_var, top_relationship_var]
In [30]:
print get_graph_metrics(graph_new)
In [23]:
#graph metrics
avg_clustering = nx.average_clustering(graph_new)
degree_centrality = nx.degree_centrality(graph_new)
print "average clustering :", avg_clustering
per_node_cent = np.array(degree_centrality.values())
centralization = sum(max(per_node_cent) - per_node_cent)/((len(graph_new.nodes())-1)*(len(graph_new.nodes())-2))
print "centralization :", centralization
In [28]:
#metrics as given in the paper
#single character cetrality
temp = sorted(per_node_cent, reverse=True)
sing_char_cent = (temp[0]-temp[1])/sum(per_node_cent)
print sing_char_cent
In [31]:
#single reationship centrality
sing_rel_cent = (sorted(edge_weights, reverse=True)[0]\
-sorted(edge_weights, reverse=True)[1])\
/sum(edge_weights)
print sing_rel_cent
In [32]:
#top character weight variance
degrees = graph_new.degree(graph_new.nodes(), weight='weight').values()
degrees = np.array(degrees)/sum(edge_weights)
k = min(10, len(graph_new.nodes()))
top_degrees = sorted(degrees, reverse=True)[:k]
char_degree_var = np.var(top_degrees)
print char_degree_var
#needs normalization??
#normalize by the sum of degrees
In [33]:
#top relationship strength variance
e_weights = np.array(edge_weights)/sum(edge_weights)
k = min(10, len(e_weights))
top_e_weights = sorted(e_weights, reverse=True)[:k]
relationship_variance = np.var(top_e_weights)
print relationship_variance