notebook.community

Edit and run



In [1]:

    
%matplotlib inline



In [2]:

    
import networkx as nx
import pandas as pd
import projx as px
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as clrs



In [3]:

    
plt.rcParams['figure.figsize'] = (12, 7)



In [4]:

    
def prob_dist(itrbl):
    count = {}
    for i in itrbl:
        count.setdefault(i, 0)
        count[i] += 1
    sr = pd.Series(count)
    prob = sr.apply(lambda x: float(x) / len(itrbl))
    return prob


def basic_graph_stats(g):
    stats = {
        "num_nodes": len(g),
        "num_edges": len(g.edges()),
        "density": nx.density(g),
        "diameter": nx.diameter(g),
        "avg_short_path": nx.average_shortest_path_length(g),
        "avg_clust": nx.average_clustering(g),
        "transitivity": nx.transitivity(g)
    }
    return pd.Series(stats)



In [5]:

    
graph = nx.read_gexf("projections/fourteen_percent_cut.gexf")



In [6]:

    
subgraphs = list(nx.connected_component_subgraphs(graph))
print([len(sub) for sub in subgraphs])









    



[288, 5, 8]



In [7]:

    
g = subgraphs[0]
g1 = subgraphs[1]
g2 = subgraphs[2]



In [8]:

    
nx.write_gexf(g, "projections/subgraph_fourteen_cut.gexf")



In [9]:

    
basic_graph_stats(g)









    Out[9]:





avg_clust            0.800419
avg_short_path       2.648495
density              0.045708
diameter             6.000000
num_edges         1889.000000
num_nodes          288.000000
transitivity         0.318226
dtype: float64



In [10]:

    
px.draw_simple_graph(g1)



In [11]:

    
px.draw_simple_graph(g2, edge_label_attr="")



In [12]:

    
print(len(g), len(g.edges()), nx.density(g))









    



(288, 1889, 0.045707510646535034)

Centrality



In [13]:

    
bc = nx.betweenness_centrality(g, weight="weight")
ec = nx.eigenvector_centrality(g, weight="weight", max_iter=500)
cc = nx.closeness_centrality(g)
deg = nx.degree(g)
pr = nx.pagerank(g, max_iter=500, weight="weight")



In [14]:

    
cent_10_df = pd.DataFrame({
    "bc": [(k, g.node[k]["label"], bc[k]) for k in sorted(bc, key=bc.get, reverse=True)[0:10]],
    "ec": [(k, g.node[k]["label"], ec[k]) for k in sorted(ec, key=ec.get, reverse=True)[0:10]],
    "cc": [(k, g.node[k]["label"], cc[k]) for k in sorted(cc, key=cc.get, reverse=True)[0:10]],
    "dc": [(k, g.node[k]["label"], deg[k]) for k in sorted(deg, key=deg.get, reverse=True)[0:10]],
    "pr": [(k, g.node[k]["label"], pr[k]) for k in sorted(pr, key=pr.get, reverse=True)[0:10]]
})



In [15]:

    
print(cent_10_df)









    



                                                  bc  \
0   (440, Felix Lope de Vega Carpio, 0.241528189965)   
1  (184, Pedro Fernandez de Castro, Conde de Lemo...   
2  (100, Gutierre de Cetina, Vicario General, 0.1...   
3                (70, Mateo Aleman, 0.0861877214357)   
4  (215, Francisco Murcia de la Llana, 0.08169622...   
5            (207, Jorge Rodriguez, 0.0801671566518)   
6             (39, Pedro Crasbeeck, 0.0641219920692)   
7               (437, Alonso Perez, 0.0635877642991)   
8               (18, Roger Velpius, 0.0606340905615)   
9          (420, Juan de Bruschere, 0.0509328070304)   

                                                  cc  \
0   (440, Felix Lope de Vega Carpio, 0.577464788732)   
1   (215, Francisco Murcia de la Llana, 0.560546875)   
2  (100, Gutierre de Cetina, Vicario General, 0.5...   
3                (18, Roger Velpius, 0.508865248227)   
4  (184, Pedro Fernandez de Castro, Conde de Lemo...   
5  (208, Miguel de Cervantes Saavedra, 0.50350877...   
6         (331, Jose de Valdivielso, 0.491438356164)   
7                (437, Alonso Perez, 0.482352941176)   
8      (153, Sebastian de Cormellas, 0.480737018425)   
9    (259, Miguel de Ondarza Zabala, 0.474380165289)   

                                                  dc  \
0              (440, Felix Lope de Vega Carpio, 123)   
1           (215, Francisco Murcia de la Llana, 107)   
2     (100, Gutierre de Cetina, Vicario General, 95)   
3  (184, Pedro Fernandez de Castro, Conde de Lemo...   
4            (208, Miguel de Cervantes Saavedra, 65)   
5                (259, Miguel de Ondarza Zabala, 54)   
6                     (331, Jose de Valdivielso, 51)   
7                       (212, Juan de la Cuesta, 50)   
8                            (437, Alonso Perez, 43)   
9                          (257, Jorge de Tovar, 42)   

                                                  ec  \
0  (215, Francisco Murcia de la Llana, 0.43270149...   
1   (440, Felix Lope de Vega Carpio, 0.322317739816)   
2  (208, Miguel de Cervantes Saavedra, 0.30495839...   
3  (100, Gutierre de Cetina, Vicario General, 0.2...   
4           (212, Juan de la Cuesta, 0.256428667979)   
5  (184, Pedro Fernandez de Castro, Conde de Lemo...   
6         (331, Jose de Valdivielso, 0.189422238188)   
7     (259, Miguel de Ondarza Zabala, 0.14941691391)   
8         (214, Hernando de Vallejo, 0.147329469546)   
9              (257, Jorge de Tovar, 0.145305622202)   

                                                  pr  
0  (440, Felix Lope de Vega Carpio, 0.0398834426429)  
1  (215, Francisco Murcia de la Llana, 0.03377190...  
2  (208, Miguel de Cervantes Saavedra, 0.02459741...  
3  (100, Gutierre de Cetina, Vicario General, 0.0...  
4  (184, Pedro Fernandez de Castro, Conde de Lemo...  
5        (331, Jose de Valdivielso, 0.0142910834114)  
6          (212, Juan de la Cuesta, 0.0140054286736)  
7                (70, Mateo Aleman, 0.0134068554931)  
8    (259, Miguel de Ondarza Zabala, 0.012444953533)  
9           (433, Alonso de Barros, 0.0122789286849)



In [32]:

    
pd.Series(deg.values()).hist()









    Out[32]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fd3376c1150>



In [33]:

    
deg_prob = prob_dist(deg.values())
plt.scatter(deg_prob.index, deg_prob)









    Out[33]:





<matplotlib.collections.PathCollection at 0x7fd33740c4d0>



In [34]:

    
pd.Series(bc.values()).hist()









    Out[34]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fd3376c1550>



In [35]:

    
pd.Series(cc.values()).hist()









    Out[35]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fd3373b7fd0>



In [36]:

    
pd.Series(ec.values()).hist()









    Out[36]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fd337686dd0>



In [37]:

    
pd.Series(pr.values()).hist()









    Out[37]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fd33713c190>

Assortativity

Degree



In [38]:

    
nx.degree_assortativity_coefficient(g)









    Out[38]:





-0.13397654720435853



In [39]:

    
r = nx.degree_assortativity_coefficient(g)
print("%3.1f"%r)



In [40]:

    
nodes_by_deg = sorted(deg, key=deg.get, reverse=True)
mtrx = nx.to_numpy_matrix(g, nodelist=nodes_by_deg)



In [41]:

    
weight_sr = pd.Series([attrs["weight"] for s, t, attrs in g.edges(data=True)])
weight_sr.describe()









    Out[41]:





count    1233.000000
mean        0.281060
std         0.233581
min         0.142857
25%         0.166667
50%         0.200000
75%         0.309524
max         2.602381
dtype: float64



In [42]:

    
quant = weight_sr.quantile(.75)



In [43]:

    
plt.rcParams['figure.figsize'] = (17, 12)



In [44]:

    
#colors = [('purple')] + [(cm.jet(i)) for i in xrange(1,256)]
#new_map = clrs.LinearSegmentedColormap.from_list('new_map', colors, N=256)
heatmap = plt.imshow(mtrx, interpolation='nearest')
heatmap.set_clim(0.0, quant)
plt.colorbar()
plt.savefig("img/cutlines_deg_assort.png")



In [45]:

    
stripmtrx = mtrx[:, :100]



In [46]:

    
heatmap = plt.imshow(stripmtrx)
heatmap.set_clim(0.0, quant)
plt.colorbar()









    Out[46]:





<matplotlib.colorbar.Colorbar instance at 0x7fd335556518>



In [47]:

    
zoommtrx = nx.to_numpy_matrix(g, nodelist=nodes_by_deg)[:50, :50]



In [48]:

    
zoomquant = pd.Series(zoommtrx.flatten().tolist()[0]).quantile(0.9)



In [49]:

    
heatmap = plt.imshow(zoommtrx)
heatmap.set_clim(0.0, zoomquant)
plt.colorbar()









    Out[49]:





<matplotlib.colorbar.Colorbar instance at 0x7fd335438ef0>

Attribute



In [50]:

    
place = nx.attribute_assortativity_coefficient(g, "top_place")



In [51]:

    
genre = nx.attribute_assortativity_coefficient(g, "top_genre")



In [52]:

    
role = nx.attribute_assortativity_coefficient(g, "role")



In [53]:

    
assort_df = pd.Series({
    "deg": r,
    "role": role,
    "place": place,
    "genre": genre
})
assort_df









    Out[53]:





deg     -0.133977
genre    0.397611
place    0.251043
role    -0.121747
dtype: float64

Clustering



In [54]:

    
nx.average_clustering(g)









    Out[54]:





0.8035973273010204



In [55]:

    
pd.Series(nx.clustering(g).values()).hist()









    Out[55]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fd33545df50>



In [56]:

    
nx.transitivity(g)









    Out[56]:





0.3290931989924433

Paths



In [57]:

    
nx.diameter(g)









    Out[57]:





6



In [58]:

    
nx.average_shortest_path_length(g)









    Out[58]:





2.809700685178507