In [1]:
%matplotlib inline

In [2]:
import networkx as nx
import numpy as np
import pandas as pd
# import projx as px
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
plt.rcParams['figure.figsize'] = (12, 7)

In [4]:
def prob_dist(itrbl):
    count = {}
    for i in itrbl:
        count.setdefault(i, 0)
        count[i] += 1
    sr = pd.Series(count)
    prob = sr.apply(lambda x: float(x) / len(itrbl))
    return prob

def basic_graph_stats(g):
    stats = {
        "num_nodes": len(g),
        "num_edges": len(g.edges()),
        "density": nx.density(g),
        "diameter": nx.diameter(g),
        "avg_short_path": nx.average_shortest_path_length(g),
        "avg_clust": nx.average_clustering(g)
    }
    return pd.Series(stats)

In [5]:
graph = nx.read_gexf("projections/onemode.gexf")

In [6]:
subgraphs = list(nx.connected_component_subgraphs(graph))
print([len(sub) for sub in subgraphs])


[305, 5, 8]

In [7]:
g = subgraphs[0]
g1 = subgraphs[1]
g2 = subgraphs[2]

for js graphviz


In [8]:
import json
from networkx.readwrite import json_graph

In [9]:
peru = json_graph.node_link_data(g1)
china = json_graph.node_link_data(g2)

In [10]:
with open("js_viz/graphs/peru.json", "w") as f:
    json.dump(peru, f)

In [11]:
with open("js_viz/graphs/china.json", "w") as f:
    json.dump(china, f)

In [12]:
nx.write_gexf(g, "projections/subgraph_onemode_gc.gexf")
nx.write_gexf(g1, "projections/subgraph_onemode_sub1.gexf")
nx.write_gexf(g2, "projections/subgraph_onemode_sub2.gexf")

In [13]:
print(basic_graph_stats(g))


avg_clust            0.798456
avg_short_path       2.635656
density              0.046894
diameter             6.000000
num_edges         2174.000000
num_nodes          305.000000
dtype: float64

In [14]:
roles = [a.get("role", "") for (n, a) in g.nodes(data=True)]  # Hmm.
places = [a["top_place"] for (n, a) in g.nodes(data=True)]
genres = [a["top_genre"] for (n, a) in g.nodes(data=True)]
sr = pd.Series({"role": prob_dist(roles), "place": prob_dist(places), "genre": prob_dist(genres)})

In [15]:
prob_dist(roles).plot(kind="bar")
plt.savefig("img/roles.png")



In [16]:
prob_dist(places).plot(kind="bar")
plt.savefig("img/places.png")



In [17]:
prob_dist(genres).plot(kind="bar")
plt.savefig("img/genres.png")



In [18]:
px.draw_simple_graph(g1)
#plt.savefig("img/china_component.png")


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-18-7f6160112ac7> in <module>()
----> 1 px.draw_simple_graph(g1)
      2 #plt.savefig("img/china_component.png")

NameError: name 'px' is not defined

In [ ]:
px.draw_simple_graph(g2)
#plt.savefig("img/peru_component.png")

In [ ]:
print(len(g), len(g.edges()), nx.density(g))

Centrality


In [ ]:
bc = nx.betweenness_centrality(g, weight="weight")
ec = nx.eigenvector_centrality(g, weight="weight", max_iter=500)
cc = nx.closeness_centrality(g)
deg = nx.degree(g)
pr = nx.pagerank(g, max_iter=500, weight="weight")

In [ ]:
cent_10_df = pd.DataFrame({
    "bc": [(k, g.node[k]["label"], bc[k]) for k in sorted(bc, key=bc.get, reverse=True)[0:10]],
    "ec": [(k, g.node[k]["label"], ec[k]) for k in sorted(ec, key=ec.get, reverse=True)[0:10]],
    "cc": [(k, g.node[k]["label"], cc[k]) for k in sorted(cc, key=cc.get, reverse=True)[0:10]],
    "dc": [(k, g.node[k]["label"], deg[k]) for k in sorted(deg, key=deg.get, reverse=True)[0:10]],
    #"pr": [(k, g.node[k]["label"], pr[k]) for k in sorted(pr, key=pr.get, reverse=True)[0:10]]
})

In [ ]:
print(cent_10_df)

In [ ]:
pd.Series(deg.values()).hist()

In [ ]:
deg_prob = prob_dist(deg.values())
plt.scatter(deg_prob.index, deg_prob)
plt.savefig("img/deg_dist.png")

Degree by role


In [ ]:
authors = pd.Series(nx.degree(g, nbunch=[n for (n, a) in g.nodes(data=True) if a.get("role") == "author"]).values())
patrons = pd.Series(nx.degree(g, nbunch=[n for (n, a) in g.nodes(data=True) if a.get("role") == "patron"]).values())
printers = pd.Series(nx.degree(g, nbunch=[n for (n, a) in g.nodes(data=True) if a.get("role") == "printer/editor"]).values())
signatories = pd.Series(nx.degree(g, nbunch=[n for (n, a) in g.nodes(data=True) if a.get("role") == "signatory"]).values())

In [ ]:
# plt.plot(authors.index, authors, color="r")
# plt.plot(patrons.index, patrons, color="b")
# plt.plot(printers.index, printers, color="y")
# plt.plot(signatories.index, signatories, color="g")
df = pd.concat([authors, patrons, printers, signatories], axis=1, keys=["author", "patron", "printer", "signatory"])
desc = df.describe()

In [ ]:
desc.to_clipboard()

In [ ]:
a = [auth for auth in df["author"] if not np.isnan(auth)]
p = [auth for auth in df["patron"] if not np.isnan(auth)]
pr = [auth for auth in df["printer"] if not np.isnan(auth)]
s= [auth for auth in df["signatory"] if not np.isnan(auth)]
sns.boxplot([a,p,pr,s], names=["author", "patron", "printer", "signatory"])

In [ ]:
authors = pd.Series(nx.betweenness_centrality(g, nbunch=[n for (n, a) in g.nodes(data=True) if a.get("role") == "author"]).values())
patrons = pd.Series(nx.betweenness_centrality(g, nbunch=[n for (n, a) in g.nodes(data=True) if a.get("role") == "patron"]).values())
printers = pd.Series(nx.betweenness_centrality(g, nbunch=[n for (n, a) in g.nodes(data=True) if a.get("role") == "printer/editor"]).values())
signatories = pd.Series(nx.betweenness_centrality(g, nbunch=[n for (n, a) in g.nodes(data=True) if a.get("role") == "signatory"]).values())

In [ ]:
df = pd.concat([authors, patrons, printers, signatories], axis=1, keys=["author", "patron", "printer", "signatory"])
desc = df.describe()

In [ ]:
a = [auth for auth in df["author"] if not np.isnan(auth)]
p = [auth for auth in df["patron"] if not np.isnan(auth)]
pr = [auth for auth in df["printer"] if not np.isnan(auth)]
s= [auth for auth in df["signatory"] if not np.isnan(auth)]
sns.boxplot([a,p,pr,s], names=["author", "patron", "printer", "signatory"])

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
g.nodes(data=True)

In [ ]:
cent_df = pd.DataFrame({
    "dc": nx.degree_centrality(g).values(),    
    "bc": bc.values(),
    "cc": cc.values(),
    "ec": ec.values(),
})

In [ ]:
cent_df.hist(bins=20)

In [ ]:
pd.Series(bc.values()).hist()

In [ ]:
pd.Series(cc.values()).hist()

In [ ]:
pd.Series(ec.values()).hist()

In [ ]:
pd.Series(pr.values()).hist()

Assortativity

Degree


In [ ]:
nx.degree_assortativity_coefficient(g)

In [ ]:
r = nx.degree_assortativity_coefficient(g)
print("%3.1f"%r)

In [ ]:
nodes_by_deg = sorted(deg, key=deg.get, reverse=True)
mtrx = nx.to_numpy_matrix(g, nodelist=nodes_by_deg)
sns.heatmap(mtrx)

In [ ]:
weight_sr = pd.Series([attrs["weight"] for s, t, attrs in g.edges(data=True)])
weight_sr.describe()

In [ ]:
quant = weight_sr.quantile(.75)

In [ ]:
plt.rcParams['figure.figsize'] = (17, 12)

In [ ]:
heatmap = plt.imshow(mtrx)
heatmap.set_clim(0.0, quant)
plt.colorbar()

In [ ]:
stripmtrx = mtrx[:, :100]

In [ ]:
heatmap = plt.imshow(stripmtrx)
heatmap.set_clim(0.0, quant)
plt.colorbar()

In [ ]:
zoommtrx = nx.to_numpy_matrix(g, nodelist=nodes_by_deg)[:50, :50]
sns.heatmap(zoommtrx)

In [ ]:
zoomquant = pd.Series(zoommtrx.flatten().tolist()[0]).quantile(0.9)

In [ ]:
# heatmap = plt.imshow(zoommtrx)
# heatmap.set_clim(0.0, zoomquant)
# plt.colorbar()

Attribute


In [ ]:
nx.attribute_assortativity_coefficient(g, "top_place")

In [ ]:
nx.attribute_assortativity_coefficient(g, "top_genre")

In [ ]:
nx.attribute_assortativity_coefficient(g, "role")

In [ ]:
len(g.edges())

In [ ]:
def edge_types(g):
    tps = {}
    for s, t, attrs in g.edges(data=True):
        srole = g.node[s].get("role", "")
        trole = g.node[t].get("role", "")
        if srole and trole:
            if (trole, srole) in tps:
                tps[(trole, srole)].append(attrs["weight"])
            else:
                tps.setdefault((srole, trole), [])
                tps[(srole, trole)].append(attrs["weight"])
    return tps

def edge_aggs(tps):
    aggs = {}
    for k, v in tps.items():
        aggs[k] = (len(v) / 2174.0, sum(v) / len(v))
    return aggs

In [ ]:
etps = edge_types(g)
aggs = edge_aggs(etps)

In [ ]:
aggs

In [ ]:
2174 * 0.025

In [ ]:
pd.Series(etps[(u'patron', u'author')]).plot(kind="box")

Clustering


In [ ]:
nx.average_clustering(g)

In [ ]:
pd.Series(nx.clustering(g).values()).hist()

In [ ]:
nx.transitivity(g)

Paths


In [ ]:
nx.diameter(g)

In [ ]:
nx.average_shortest_path_length(g)