In [1]:
%matplotlib inline

In [2]:
import networkx as nx
import pandas as pd
import projx as px
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as clrs

In [3]:
plt.rcParams['figure.figsize'] = (12, 7)

In [4]:
def prob_dist(itrbl):
    count = {}
    for i in itrbl:
        count.setdefault(i, 0)
        count[i] += 1
    sr = pd.Series(count)
    prob = sr.apply(lambda x: float(x) / len(itrbl))
    return prob


def basic_graph_stats(g):
    stats = {
        "num_nodes": len(g),
        "num_edges": len(g.edges()),
        "density": nx.density(g),
        "diameter": nx.diameter(g),
        "avg_short_path": nx.average_shortest_path_length(g),
        "avg_clust": nx.average_clustering(g),
        "transitivity": nx.transitivity(g)
    }
    return pd.Series(stats)

In [5]:
graph = nx.read_gexf("projections/fourteen_percent_cut.gexf")

In [6]:
subgraphs = list(nx.connected_component_subgraphs(graph))
print([len(sub) for sub in subgraphs])


[288, 5, 8]

In [7]:
g = subgraphs[0]
g1 = subgraphs[1]
g2 = subgraphs[2]

In [8]:
nx.write_gexf(g, "projections/subgraph_fourteen_cut.gexf")

In [9]:
basic_graph_stats(g)


Out[9]:
avg_clust            0.800419
avg_short_path       2.648495
density              0.045708
diameter             6.000000
num_edges         1889.000000
num_nodes          288.000000
transitivity         0.318226
dtype: float64

In [10]:
px.draw_simple_graph(g1)



In [11]:
px.draw_simple_graph(g2, edge_label_attr="")



In [12]:
print(len(g), len(g.edges()), nx.density(g))


(288, 1889, 0.045707510646535034)

Centrality


In [13]:
bc = nx.betweenness_centrality(g, weight="weight")
ec = nx.eigenvector_centrality(g, weight="weight", max_iter=500)
cc = nx.closeness_centrality(g)
deg = nx.degree(g)
pr = nx.pagerank(g, max_iter=500, weight="weight")

In [14]:
cent_10_df = pd.DataFrame({
    "bc": [(k, g.node[k]["label"], bc[k]) for k in sorted(bc, key=bc.get, reverse=True)[0:10]],
    "ec": [(k, g.node[k]["label"], ec[k]) for k in sorted(ec, key=ec.get, reverse=True)[0:10]],
    "cc": [(k, g.node[k]["label"], cc[k]) for k in sorted(cc, key=cc.get, reverse=True)[0:10]],
    "dc": [(k, g.node[k]["label"], deg[k]) for k in sorted(deg, key=deg.get, reverse=True)[0:10]],
    "pr": [(k, g.node[k]["label"], pr[k]) for k in sorted(pr, key=pr.get, reverse=True)[0:10]]
})

In [15]:
print(cent_10_df)


                                                  bc  \
0   (440, Felix Lope de Vega Carpio, 0.241528189965)   
1  (184, Pedro Fernandez de Castro, Conde de Lemo...   
2  (100, Gutierre de Cetina, Vicario General, 0.1...   
3                (70, Mateo Aleman, 0.0861877214357)   
4  (215, Francisco Murcia de la Llana, 0.08169622...   
5            (207, Jorge Rodriguez, 0.0801671566518)   
6             (39, Pedro Crasbeeck, 0.0641219920692)   
7               (437, Alonso Perez, 0.0635877642991)   
8               (18, Roger Velpius, 0.0606340905615)   
9          (420, Juan de Bruschere, 0.0509328070304)   

                                                  cc  \
0   (440, Felix Lope de Vega Carpio, 0.577464788732)   
1   (215, Francisco Murcia de la Llana, 0.560546875)   
2  (100, Gutierre de Cetina, Vicario General, 0.5...   
3                (18, Roger Velpius, 0.508865248227)   
4  (184, Pedro Fernandez de Castro, Conde de Lemo...   
5  (208, Miguel de Cervantes Saavedra, 0.50350877...   
6         (331, Jose de Valdivielso, 0.491438356164)   
7                (437, Alonso Perez, 0.482352941176)   
8      (153, Sebastian de Cormellas, 0.480737018425)   
9    (259, Miguel de Ondarza Zabala, 0.474380165289)   

                                                  dc  \
0              (440, Felix Lope de Vega Carpio, 123)   
1           (215, Francisco Murcia de la Llana, 107)   
2     (100, Gutierre de Cetina, Vicario General, 95)   
3  (184, Pedro Fernandez de Castro, Conde de Lemo...   
4            (208, Miguel de Cervantes Saavedra, 65)   
5                (259, Miguel de Ondarza Zabala, 54)   
6                     (331, Jose de Valdivielso, 51)   
7                       (212, Juan de la Cuesta, 50)   
8                            (437, Alonso Perez, 43)   
9                          (257, Jorge de Tovar, 42)   

                                                  ec  \
0  (215, Francisco Murcia de la Llana, 0.43270149...   
1   (440, Felix Lope de Vega Carpio, 0.322317739816)   
2  (208, Miguel de Cervantes Saavedra, 0.30495839...   
3  (100, Gutierre de Cetina, Vicario General, 0.2...   
4           (212, Juan de la Cuesta, 0.256428667979)   
5  (184, Pedro Fernandez de Castro, Conde de Lemo...   
6         (331, Jose de Valdivielso, 0.189422238188)   
7     (259, Miguel de Ondarza Zabala, 0.14941691391)   
8         (214, Hernando de Vallejo, 0.147329469546)   
9              (257, Jorge de Tovar, 0.145305622202)   

                                                  pr  
0  (440, Felix Lope de Vega Carpio, 0.0398834426429)  
1  (215, Francisco Murcia de la Llana, 0.03377190...  
2  (208, Miguel de Cervantes Saavedra, 0.02459741...  
3  (100, Gutierre de Cetina, Vicario General, 0.0...  
4  (184, Pedro Fernandez de Castro, Conde de Lemo...  
5        (331, Jose de Valdivielso, 0.0142910834114)  
6          (212, Juan de la Cuesta, 0.0140054286736)  
7                (70, Mateo Aleman, 0.0134068554931)  
8    (259, Miguel de Ondarza Zabala, 0.012444953533)  
9           (433, Alonso de Barros, 0.0122789286849)  

In [32]:
pd.Series(deg.values()).hist()


Out[32]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd3376c1150>

In [33]:
deg_prob = prob_dist(deg.values())
plt.scatter(deg_prob.index, deg_prob)


Out[33]:
<matplotlib.collections.PathCollection at 0x7fd33740c4d0>

In [34]:
pd.Series(bc.values()).hist()


Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd3376c1550>

In [35]:
pd.Series(cc.values()).hist()


Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd3373b7fd0>

In [36]:
pd.Series(ec.values()).hist()


Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd337686dd0>

In [37]:
pd.Series(pr.values()).hist()


Out[37]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd33713c190>

Assortativity

Degree


In [38]:
nx.degree_assortativity_coefficient(g)


Out[38]:
-0.13397654720435853

In [39]:
r = nx.degree_assortativity_coefficient(g)
print("%3.1f"%r)


-0.1

In [40]:
nodes_by_deg = sorted(deg, key=deg.get, reverse=True)
mtrx = nx.to_numpy_matrix(g, nodelist=nodes_by_deg)

In [41]:
weight_sr = pd.Series([attrs["weight"] for s, t, attrs in g.edges(data=True)])
weight_sr.describe()


Out[41]:
count    1233.000000
mean        0.281060
std         0.233581
min         0.142857
25%         0.166667
50%         0.200000
75%         0.309524
max         2.602381
dtype: float64

In [42]:
quant = weight_sr.quantile(.75)

In [43]:
plt.rcParams['figure.figsize'] = (17, 12)

In [44]:
#colors = [('purple')] + [(cm.jet(i)) for i in xrange(1,256)]
#new_map = clrs.LinearSegmentedColormap.from_list('new_map', colors, N=256)
heatmap = plt.imshow(mtrx, interpolation='nearest')
heatmap.set_clim(0.0, quant)
plt.colorbar()
plt.savefig("img/cutlines_deg_assort.png")



In [45]:
stripmtrx = mtrx[:, :100]

In [46]:
heatmap = plt.imshow(stripmtrx)
heatmap.set_clim(0.0, quant)
plt.colorbar()


Out[46]:
<matplotlib.colorbar.Colorbar instance at 0x7fd335556518>

In [47]:
zoommtrx = nx.to_numpy_matrix(g, nodelist=nodes_by_deg)[:50, :50]

In [48]:
zoomquant = pd.Series(zoommtrx.flatten().tolist()[0]).quantile(0.9)

In [49]:
heatmap = plt.imshow(zoommtrx)
heatmap.set_clim(0.0, zoomquant)
plt.colorbar()


Out[49]:
<matplotlib.colorbar.Colorbar instance at 0x7fd335438ef0>

Attribute


In [50]:
place = nx.attribute_assortativity_coefficient(g, "top_place")

In [51]:
genre = nx.attribute_assortativity_coefficient(g, "top_genre")

In [52]:
role = nx.attribute_assortativity_coefficient(g, "role")

In [53]:
assort_df = pd.Series({
    "deg": r,
    "role": role,
    "place": place,
    "genre": genre
})
assort_df


Out[53]:
deg     -0.133977
genre    0.397611
place    0.251043
role    -0.121747
dtype: float64

Clustering


In [54]:
nx.average_clustering(g)


Out[54]:
0.8035973273010204

In [55]:
pd.Series(nx.clustering(g).values()).hist()


Out[55]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd33545df50>

In [56]:
nx.transitivity(g)


Out[56]:
0.3290931989924433

Paths


In [57]:
nx.diameter(g)


Out[57]:
6

In [58]:
nx.average_shortest_path_length(g)


Out[58]:
2.809700685178507