In [1]:
%matplotlib inline

In [2]:
import networkx as nx
import pandas as pd
# import projx as px fails in python3
import matplotlib.pyplot as plt
import as cm
import matplotlib.colors as clrs
import seaborn as sns

In [3]:
plt.rcParams['figure.figsize'] = (12, 7)

In [4]:
def prob_dist(itrbl):
    count = {}
    for i in itrbl:
        count.setdefault(i, 0)
        count[i] += 1
    sr = pd.Series(count)
    prob = sr.apply(lambda x: float(x) / len(itrbl))
    return prob

def basic_graph_stats(g):
    stats = {
        "num_nodes": len(g),
        "num_edges": len(g.edges()),
        "density": nx.density(g),
        "diameter": nx.diameter(g),
        "avg_short_path": nx.average_shortest_path_length(g),
        "avg_clust": nx.average_clustering(g),
        "transitivity": nx.transitivity(g)
    return pd.Series(stats)

In [5]:
graph = nx.read_gexf("projections/fifty_percent_cut.gexf")

In [7]:
subgraphs = list(nx.connected_component_subgraphs(graph))
[len(sub) for sub in subgraphs]


In [8]:
g = subgraphs[0]

In [9]:
nx.write_gexf(g, "projections/subgraph_fifty_cut.gexf")

In [10]:

avg_clust            0.803597
avg_short_path       2.809701
density              0.044464
diameter             6.000000
num_edges         1233.000000
num_nodes          236.000000
transitivity         0.329093
dtype: float64


In [11]:
bc = nx.betweenness_centrality(g, weight="weight")
ec = nx.eigenvector_centrality(g, weight="weight", max_iter=500)
cc = nx.closeness_centrality(g)
deg =
pr = nx.pagerank(g, max_iter=500, weight="weight")

In [12]:
cent_10_df = pd.DataFrame({
    "bc": [(k, g.node[k]["label"], bc[k]) for k in sorted(bc, key=bc.get, reverse=True)[0:10]],
    "ec": [(k, g.node[k]["label"], ec[k]) for k in sorted(ec, key=ec.get, reverse=True)[0:10]],
    "cc": [(k, g.node[k]["label"], cc[k]) for k in sorted(cc, key=cc.get, reverse=True)[0:10]],
    "dc": [(k, g.node[k]["label"], deg[k]) for k in sorted(deg, key=deg.get, reverse=True)[0:10]],
    "pr": [(k, g.node[k]["label"], pr[k]) for k in sorted(pr, key=pr.get, reverse=True)[0:10]]

In [13]:

                                                  bc  \
0   (440, Felix Lope de Vega Carpio, 0.197106716329)   
1  (184, Pedro Fernandez de Castro, Conde de Lemo...   
2                (437, Alonso Perez, 0.141436668139)   
3      (153, Sebastian de Cormellas, 0.128245367804)   
4                 (70, Mateo Aleman, 0.102287983172)   
5              (3, Bernardo Grassa, 0.0998645172895)   
6  (208, Miguel de Cervantes Saavedra, 0.09597349...   
7            (207, Jorge Rodriguez, 0.0809602103162)   
8            (267, Juan de Bonilla, 0.0580469176214)   
9  (215, Francisco Murcia de la Llana, 0.05423760...   

                                                  cc  \
0    (440, Felix Lope de Vega Carpio, 0.54524361949)   
1  (215, Francisco Murcia de la Llana, 0.51762114...   
2  (100, Gutierre de Cetina, Vicario General, 0.4...   
3  (208, Miguel de Cervantes Saavedra, 0.47667342...   
4          (209, Ruy Pirez da Veiga, 0.473790322581)   
5      (153, Sebastian de Cormellas, 0.470941883768)   
6             (352, Marcos Teixiera, 0.469061876248)   
7       (147, Tomas Gracian Dantisco, 0.46442687747)   
8                (437, Alonso Perez, 0.461689587426)   
9                (207, Jorge Rodriguez, 0.458984375)   

                                                  dc  \
0               (440, Felix Lope de Vega Carpio, 88)   
1            (215, Francisco Murcia de la Llana, 63)   
2  (184, Pedro Fernandez de Castro, Conde de Lemo...   
3            (208, Miguel de Cervantes Saavedra, 55)   
4     (100, Gutierre de Cetina, Vicario General, 47)   
5                       (212, Juan de la Cuesta, 36)   
6                             (70, Mateo Aleman, 36)   
7                     (331, Jose de Valdivielso, 34)   
8                          (257, Jorge de Tovar, 32)   
9                      (209, Ruy Pirez da Veiga, 32)   

                                                  ec  \
0  (215, Francisco Murcia de la Llana, 0.43885770...   
1   (440, Felix Lope de Vega Carpio, 0.321814008236)   
2  (208, Miguel de Cervantes Saavedra, 0.31870776...   
3  (100, Gutierre de Cetina, Vicario General, 0.2...   
4           (212, Juan de la Cuesta, 0.261972128741)   
5  (184, Pedro Fernandez de Castro, Conde de Lemo...   
6          (331, Jose de Valdivielso, 0.18776568372)   
7         (214, Hernando de Vallejo, 0.144271507257)   
8              (257, Jorge de Tovar, 0.144003233588)   
9    (259, Miguel de Ondarza Zabala, 0.143252046672)   

0   (440, Felix Lope de Vega Carpio, 0.044227642188)  
1  (215, Francisco Murcia de la Llana, 0.03293063...  
2  (208, Miguel de Cervantes Saavedra, 0.02790403...  
3  (184, Pedro Fernandez de Castro, Conde de Lemo...  
4  (100, Gutierre de Cetina, Vicario General, 0.0...  
5                (70, Mateo Aleman, 0.0160517314959)  
6           (433, Alonso de Barros, 0.0146365198798)  
7        (331, Jose de Valdivielso, 0.0143919231841)  
8          (212, Juan de la Cuesta, 0.0143389550352)  
9          (209, Ruy Pirez da Veiga, 0.014244069222)  

In [14]:

In [15]:
deg_prob = prob_dist(list(deg.values()))
plt.scatter(deg_prob.index, deg_prob)

In [16]:

In [17]:

In [18]:

In [19]:

In [20]:


In [21]:
r = nx.degree_assortativity_coefficient(g)


In [22]:
nodes_by_deg = sorted(deg, key=deg.get, reverse=True)
mtrx = nx.to_numpy_matrix(g, nodelist=nodes_by_deg)

In [23]:
weight_sr = pd.Series([attrs["weight"] for s, t, attrs in g.edges(data=True)])

count    1233.000000
mean        0.281060
std         0.233581
min         0.142857
25%         0.166667
50%         0.200000
75%         0.309524
max         2.602381
dtype: float64

In [24]:
quant = weight_sr.quantile(.75)

In [25]:
plt.rcParams['figure.figsize'] = (12, 7)

matrix([[ 0.        ,  1.67604428,  0.        , ...,  0.17763158,
          0.        ,  0.        ],
        [ 1.67604428,  0.        ,  0.77738095, ...,  0.17763158,
          0.        ,  0.19444444],
        [ 0.        ,  0.77738095,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.17763158,  0.17763158,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.19444444,  0.        , ...,  0.        ,
          0.        ,  0.        ]])

In [26]:
#colors = [('purple')] + [(cm.jet(i)) for i in xrange(1,256)]
#new_map = clrs.LinearSegmentedColormap.from_list('new_map', colors, N=256)
# heatmap = plt.imshow(mtrx, interpolation='nearest')
# heatmap.set_clim(0.0, quant)
# plt.colorbar()
# sns.heatmap(mtrx)
# plt.savefig("img/cutlines_deg_assort.png")

In [27]:
stripmtrx = mtrx[:100, :50]

In [28]:
# heatmap = plt.imshow(stripmtrx)
# heatmap.set_clim(0.0, quant)
# plt.colorbar()
# sns.heatmap(stripmtrx)

In [29]:
zoommtrx = nx.to_numpy_matrix(g, nodelist=nodes_by_deg)[:50, :10]

In [30]:
zoomquant = pd.Series(zoommtrx.flatten().tolist()[0]).quantile(0.9)

In [31]:
# heatmap = plt.imshow(zoommtrx)
# heatmap.set_clim(0.0, zoomquant)
# plt.colorbar()

In [32]:
place = nx.attribute_assortativity_coefficient(g, "top_place")

In [33]:
genre = nx.attribute_assortativity_coefficient(g, "top_genre")

In [34]:
role = nx.attribute_assortativity_coefficient(g, "role")

In [35]:
assort_df = pd.Series({
    "deg": r,
    "role": role,
    "place": place,
    "genre": genre

deg     -0.133977
genre    0.455555
place    0.251043
role    -0.121747
dtype: float64

Remove author/patron

In [36]:
no_auth_patron = g.copy()
# g.nodes(data=True)

In [37]:
no_auth_patron = g.copy()
for node, attrs in g.nodes(data=True):
    if attrs.get("role", "") == "author" or attrs.get("role", "") == "patron" or not attrs.get("top_place"):

In [38]:
place = nx.attribute_assortativity_coefficient(no_auth_patron, "top_place")


In [39]:
comps = list(nx.connected_component_subgraphs(no_auth_patron))
for x in comps:


In [40]:
def group_by_top_place(g):
    places = {}
    current = 1
    for n, attrs in g.nodes(data=True):
        tp = attrs["top_place"]
        if tp not in places:
            places[tp] = int(current)
            group = int(current)
            current += 1
            group = places[tp]
        g.node[n]["group"] = group
    return g

In [41]:
no_auth_patron = group_by_top_place(no_auth_patron)

In [42]:
nx.write_gexf(no_auth_patron, "projections/no_auth_patron.gexf")


In [43]:
b = nx.betweenness_centrality(g)

In [44]:
d = {"author": [], "signatory": [], "patron": [], "printer/editor": []}
for k, v in b.items():
    role = g.node[k].get("role", "")
    if role:

In [45]:
import numpy as np
sns.boxplot([d["author"],  d["patron"], d["printer/editor"], d["signatory"]], names=["author", "patron", "printer", "signatory"])
plt.ylim(0.0, 0.08)

(0.0, 0.08)

In [46]:

(array([ 17.,   1.,   1.,   0.,   1.,   0.,   0.,   0.,   0.,   1.]),
 array([ 0.        ,  0.03039818,  0.06079637,  0.09119455,  0.12159274,
         0.15199092,  0.18238911,  0.21278729,  0.24318548,  0.27358366,
 <a list of 10 Patch objects>)

In [ ]:

In [ ]:


In [47]:


In [48]:

<matplotlib.axes._subplots.AxesSubplot at 0x7f10fc14a590>

In [49]:



In [50]:


In [51]:



In [52]:
mod_df = pd.read_csv("data/communities/modularity.csv", encoding="utf-8")
mod_map = {3: 1, 2: 2, 5: 3, 9: 4, 4: 5, 8: 6, 0: 7, 1: 8, 7: 9, 6: 10}
mod_df["Modularity Class"] = mod_df["Modularity Class"].apply(lambda x: mod_map[x])

In [53]:
mod = mod_df.groupby("Modularity Class").size()

In [54]:

<matplotlib.axes._subplots.AxesSubplot at 0x7f10fc0f56d0>

In [55]:
mod_places = mod_df.groupby(["Modularity Class", "top_place"]).size()

In [56]:
one = mod_places[1]
# Core group Castile -> Aragon

<matplotlib.axes._subplots.AxesSubplot at 0x7f10f6f1a590>

In [57]:
two = mod_places[2]
# Core madrid

<matplotlib.axes._subplots.AxesSubplot at 0x7f10f6e8c050>

In [58]:
three = mod_places[3]
# Core Zaragoza in between Madrid and Barcelona

<matplotlib.axes._subplots.AxesSubplot at 0x7f10f6dd0890>

In [59]:
four = mod_places[4]
# Aragon

AttributeError                            Traceback (most recent call last)
<ipython-input-59-6c45d5458cc4> in <module>()
      1 four = mod_places[4]
      2 four.sort()
----> 3 four.plot(kind="bar")
      4 # Aragon

AttributeError: 'numpy.int64' object has no attribute 'plot'

In [60]:
five = mod_places[5]
# Lisbon

<matplotlib.axes._subplots.AxesSubplot at 0x7f10f6d72810>

In [61]:
six = mod_places[6]

AttributeError                            Traceback (most recent call last)
<ipython-input-61-d650e606e2c3> in <module>()
      1 six = mod_places[6]
      2 six.sort()
----> 3 six.plot(kind="bar")

AttributeError: 'numpy.int64' object has no attribute 'plot'

In [62]:
seven = mod_places[7]

<matplotlib.axes._subplots.AxesSubplot at 0x7f10fcc69ed0>

In [63]:
eight = mod_places[8]

<matplotlib.axes._subplots.AxesSubplot at 0x7f10f6c2bd90>

In [64]:
nine = mod_places[9]

<matplotlib.axes._subplots.AxesSubplot at 0x7f10fccbc2d0>

In [65]:
ten = mod_places[10]

<matplotlib.axes._subplots.AxesSubplot at 0x7f10f6a4c490>

In [ ]:

In [ ]:

In [ ]:

In [66]:
mod_places = mod_df.groupby(["role", "Modularity Class"]).size()

role            Modularity Class
author          1                    7
                2                    1
                3                    2
                5                    1
                7                    3
                8                    1
                9                    4
                10                   2
patron          1                    5
                2                    1
                3                    4
                5                    2
                7                    8
                8                    2
                9                    4
                10                   4
printer/editor  1                    9
                2                    1
                3                    4
                5                    2
                7                   14
                8                    8
                9                    7
                10                  14
signatory       1                   26
                2                    2
                3                   23
                5                    3
                7                   24
                8                   12
                9                   17
                10                  16
dtype: int64

In [67]:
df = pd.DataFrame({
        "author": mod_places["author"],
        "patron": mod_places["patron"],
        "printer/editor": mod_places["printer/editor"],
        "signatory": mod_places["signatory"]})

In [68]:
df.plot(kind="bar", stacked=True)

<matplotlib.axes._subplots.AxesSubplot at 0x7f10f6a36490>

In [69]:

  File "<ipython-input-69-bbfed09d89c7>", line 1
SyntaxError: unexpected EOF while parsing

In [70]:
mod_places = mod_df.groupby(["Modularity Class", "role"]).size()

In [71]:
mod_places = mod_places.reset_index()

In [72]:
mod_places = mod_df.groupby(["Modularity Class", "role"]).size()

In [73]:
grouped = mod_places.groupby(level=0)

In [74]:

Modularity Class
1                   47
2                    5
3                   33
5                    8
7                   49
8                   23
9                   32
10                  36
dtype: int64

In [75]:
divided = mod_places.divide(grouped.sum())
# divided = divided.reset_index()
divided = divided.unstack(level=0).fillna(0)
trans = divided.transpose()
trans.plot(kind="bar", stacked=True)
# trans

<matplotlib.axes._subplots.AxesSubplot at 0x7f10f68cdf10>

In [76]:

role author patron printer/editor signatory
Modularity Class
1 0.148936 0.106383 0.191489 0.553191
2 0.200000 0.200000 0.200000 0.400000
3 0.060606 0.121212 0.121212 0.696970
5 0.125000 0.250000 0.250000 0.375000
7 0.061224 0.163265 0.285714 0.489796
8 0.043478 0.086957 0.347826 0.521739
9 0.125000 0.125000 0.218750 0.531250
10 0.055556 0.111111 0.388889 0.444444

In [77]:
regroup = divided.groupby(["role", "Modularity Class", ])

KeyError                                  Traceback (most recent call last)
<ipython-input-77-6e75f0e98a9a> in <module>()
----> 1 regroup = divided.groupby(["role", "Modularity Class", ])

/home/davebshow/.virtualenvs/scientific/local/lib/python2.7/site-packages/pandas/core/generic.pyc in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze)
   2771         axis = self._get_axis_number(axis)
   2772         return groupby(self, by, axis=axis, level=level, as_index=as_index,
-> 2773                        sort=sort, group_keys=group_keys, squeeze=squeeze)
   2775     def asfreq(self, freq, method=None, how=None, normalize=False):

/home/davebshow/.virtualenvs/scientific/local/lib/python2.7/site-packages/pandas/core/groupby.pyc in groupby(obj, by, **kwds)
   1140         raise TypeError('invalid type: %s' % type(obj))
-> 1142     return klass(obj, by, **kwds)

/home/davebshow/.virtualenvs/scientific/local/lib/python2.7/site-packages/pandas/core/groupby.pyc in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze)
    386         if grouper is None:
    387             grouper, exclusions, obj = _get_grouper(obj, keys, axis=axis,
--> 388                                                     level=level, sort=sort)
    390         self.obj = obj

/home/davebshow/.virtualenvs/scientific/local/lib/python2.7/site-packages/pandas/core/groupby.pyc in _get_grouper(obj, key, axis, level, sort)
   2039             exclusions.append(gpr)
   2040             name = gpr
-> 2041             gpr = obj[gpr]
   2043         if isinstance(gpr, Categorical) and len(gpr) != len(obj):

/home/davebshow/.virtualenvs/scientific/local/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
   1676             return self._getitem_multilevel(key)
   1677         else:
-> 1678             return self._getitem_column(key)
   1680     def _getitem_column(self, key):

/home/davebshow/.virtualenvs/scientific/local/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_column(self, key)
   1683         # get column
   1684         if self.columns.is_unique:
-> 1685             return self._get_item_cache(key)
   1687         # duplicate columns & possible reduce dimensionaility

/home/davebshow/.virtualenvs/scientific/local/lib/python2.7/site-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
   1050         res = cache.get(item)
   1051         if res is None:
-> 1052             values = self._data.get(item)
   1053             res = self._box_item_values(item, values)
   1054             cache[item] = res

/home/davebshow/.virtualenvs/scientific/local/lib/python2.7/site-packages/pandas/core/internals.pyc in get(self, item, fastpath)
   2564             if not isnull(item):
-> 2565                 loc = self.items.get_loc(item)
   2566             else:
   2567                 indexer = np.arange(len(self.items))[isnull(self.items)]

/home/davebshow/.virtualenvs/scientific/local/lib/python2.7/site-packages/pandas/core/index.pyc in get_loc(self, key)
   1179         loc : int if unique index, possibly slice or mask if not
   1180         """
-> 1181         return self._engine.get_loc(_values_from_object(key))
   1183     def get_value(self, series, key):

index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3354)()

index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3289)()

KeyError: 'role'

In [78]:



In [79]:
def edge_types(g):
    tps = {}
    for s, t, attrs in g.edges(data=True):
        srole = g.node[s].get("role", "")
        trole = g.node[t].get("role", "")
        if srole and trole:
            if srole == 'patron' and trole == 'printer/editor':
                srole = 'printer/editor'
                trole = 'patron'
            if srole == 'author' and trole == 'patron':
                srole = 'patron'
                trole = 'author'
            if srole == 'author' and trole == 'printer/editor':
                srole = 'printer/editor'
                trole = 'author'
            if srole == 'signatory' and trole == 'patron':
                srole = 'patron'
                trole = 'signatory'
            if srole == 'signatory' and trole == 'printer/editor':
                srole = 'printer/editor'
                trole = 'signatory'
            if srole == 'author' and trole == 'signatory':
                srole = 'signatory'
                trole = 'author'
            if (trole, srole) in tps:
                tps[(trole, srole)].append(attrs["weight"])
                tps.setdefault((srole, trole), [])
                tps[(srole, trole)].append(attrs.get("weight", 0.0))
    return tps

def edge_aggs(tps, e=1233.0):
    aggs = {}
    for k, v in tps.items():
        aggs[k] = (len(v) / e, sum(v) / len(v))
    return aggs

In [80]:
g = nx.read_gexf('js_viz/graphs/cutlines_gc_from_gephi.gephi')
etps = edge_types(g)
aggs = edge_aggs(etps)
whole = pd.DataFrame(aggs).ix[0]

In [81]:
g1 = nx.read_gexf('js_viz/graphs/mod0_from_gephi.gexf')
etps = edge_types(g1)
aggs = edge_aggs(etps, e=len(g1.edges()))
mod0 = pd.DataFrame(aggs).ix[0]
mod0s = pd.DataFrame(aggs).ix[1]

In [82]:
g2 = nx.read_gexf('js_viz/graphs/mod3_from_gephi.gexf')
etps = edge_types(g2)
aggs = edge_aggs(etps, e=len(g2.edges()))
mod3 = pd.DataFrame(aggs).ix[0]
mod3s = pd.DataFrame(aggs).ix[1]

In [83]:
g3 = nx.read_gexf('js_viz/graphs/mod1_from_gephi.gexf')
etps = edge_types(g3)
aggs = edge_aggs(etps, e=len(g3.edges()))
mod1 = pd.DataFrame(aggs).ix[0]
mod1s = pd.DataFrame(aggs).ix[1]

In [84]:
g4 = nx.read_gexf('js_viz/graphs/mod5_from_gephi.gexf')
etps = edge_types(g4)
aggs = edge_aggs(etps, e=len(g4.edges()))
mod5 = pd.DataFrame(aggs).ix[0]
mod5s = pd.DataFrame(aggs).ix[1]

In [85]:
g5 = nx.read_gexf('js_viz/graphs/mod6_from_gephi.gexf')
etps = edge_types(g5)
aggs = edge_aggs(etps, e=len(g5.edges()))
mod6 = pd.DataFrame(aggs).ix[0]
mod6s = pd.DataFrame(aggs).ix[1]

In [86]:
g6 = nx.read_gexf('js_viz/graphs/mod7_from_gephi.gexf')
etps = edge_types(g6)
aggs = edge_aggs(etps, e=len(g6.edges()))
mod7 = pd.DataFrame(aggs).ix[0]
mod7s = pd.DataFrame(aggs).ix[1]

In [87]:
g7 = nx.read_gexf('js_viz/graphs/mod4_from_gephi.gexf')
etps = edge_types(g7)
aggs = edge_aggs(etps, e=len(g7.edges()))
mod4 = pd.DataFrame(aggs).ix[0]
mod4s = pd.DataFrame(aggs).ix[1]

In [88]:
g8 = nx.read_gexf('js_viz/graphs/mod2_from_gephi.gexf')
etps = edge_types(g8)
aggs = edge_aggs(etps, e=len(g8.edges()))
mod2 = pd.DataFrame(aggs).ix[0]
mod2s = pd.DataFrame(aggs).ix[1]

In [89]:
df = pd.concat([mod0, mod3, mod6, mod5, mod7, mod1, mod4, mod2], axis=1).fillna(0)
df.columns = ['subgroup0', 'subgroup3', 'subgroup6', 'subgroup5', 'subgroup7', 'subgroup1', 'subgroup4', 'subgroup2']
t = df.T
filtered = t[[('patron', 'author'), ('patron', 'signatory'),('printer/editor', 'author'),
              ('printer/editor', 'patron'), ('printer/editor', 'signatory'), ('signatory', 'author'),
              ('printer/editor', 'printer/editor'), ('signatory', 'signatory')]]
df = filtered.T

subgroup0 subgroup3 subgroup6 subgroup5 subgroup7 subgroup1 subgroup4 subgroup2
patron author 0 0 0 0 0 0 0 0
signatory 0 0 0 0 0 0 0 0
printer/editor author 0 0 0 0 0 0 0 0
patron 0 0 0 0 0 0 0 0
signatory 0 0 0 0 0 0 0 0
signatory author 0 0 0 0 0 0 0 0
printer/editor printer/editor 0 0 0 0 0 0 0 0
signatory signatory 0 0 0 0 0 0 0 0

In [90]:
desc = {"patron/author": df.ix['patron'].ix['author'],
        "patron/signatory": df.ix['patron'].ix['signatory'],
        "printer/author": df.ix['printer/editor'].ix['author'],
        "printer/patron": df.ix['printer/editor'].ix['patron'],
        "printer/signatory": df.ix['printer/editor'].ix['signatory'],
        "signatory/author": df.ix['signatory'].ix['author'],
        "printer/printer": df.ix['printer/editor'].ix['printer/editor'],
        "signatory/signatory": df.ix['signatory'].ix['signatory']}
edges_df = pd.DataFrame(desc)

In [91]:
g = nx.read_gexf('js_viz/graphs/cutlines_gc_from_gephi.gephi')
etps = edge_types(g)
aggs = edge_aggs(etps)
whole = pd.DataFrame(aggs).ix[0]
t = whole.T
filtered = t[[('patron', 'author'), ('patron', 'signatory'),('printer/editor', 'author'),
              ('printer/editor', 'patron'), ('printer/editor', 'signatory'), ('signatory', 'author'),
              ('printer/editor', 'printer/editor'), ('signatory', 'signatory')]]

whole = filtered.T
# whole.colums=["patron/author", "patron/signatory","printer/author", "printer/patron", "printer/signatory",
#           "signatory/author","printer/printer", "signatory/signatory" ]

whole = whole.reset_index()

level_0 level_1 0
0 patron author 0.033252
1 patron signatory 0.133820
2 printer/editor author 0.085969
3 printer/editor patron 0.078670
4 printer/editor signatory 0.231955
5 signatory author 0.166261
6 printer/editor printer/editor 0.024331
7 signatory signatory 0.205191

In [92]:
sr = whole[0]
sr.index = ["patron/author", "patron/signatory","printer/author", "printer/patron", "printer/signatory",
          "signatory/author","printer/printer", "signatory/signatory" ]

In [93]:
# fig = plt.figure()
# ax1 = fig.add_subplot(111)
ax1 = sns.boxplot([
], names=["patron/author", "patron/signatory","printer/author", "printer/patron", "printer/signatory",
          "signatory/author","printer/printer", "signatory/signatory" ])
# ax2 = ax1.twinx()
# ax1.plot(ax1.get_xticks(), sr)
# plt.ylim(0, 0.4)

In [94]:
df2 = pd.concat([mod0s, mod3s, mod6s, mod5s, mod7s, mod1s, mod4s, mod2s], axis=1).fillna(0)
df2.columns = ['subgroup0', 'subgroup3', 'subgroup6', 'subgroup5', 'subgroup7', 'subgroup1', 'subgroup4', 'subgroup2']
t = df2.T
filtered = t[[('patron', 'author'), ('patron', 'signatory'),('printer/editor', 'author'),
              ('printer/editor', 'patron'), ('printer/editor', 'signatory'), ('signatory', 'author'),
              ('printer/editor', 'printer/editor'), ('signatory', 'signatory')]]

df2 = filtered.T

subgroup0 subgroup3 subgroup6 subgroup5 subgroup7 subgroup1 subgroup4 subgroup2
patron author 0.464841 0.463265 0.654365 0.196429 0.396429 2.167857 0.309524 0.2
signatory 0.332603 0.240787 0.354101 0.157143 0.186835 0.337897 0.309524 0.2
printer/editor author 0.444921 0.238716 0.179444 0.239286 0.227381 0.436756 0.154762 0.2
patron 0.201323 0.204762 0.208854 0.164286 0.181151 0.317641 0.154762 0.2
signatory 0.204777 0.309086 0.191008 0.318398 0.240179 0.235197 0.154762 0.2
signatory author 0.439229 0.325832 0.364921 0.231429 0.297059 0.337897 0.309524 0.2
printer/editor printer/editor 0.399206 0.210317 0.181633 0.142857 0.169841 0.189881 0.000000 0.0
signatory signatory 0.290189 0.374504 0.267857 0.249496 0.215675 0.245346 0.309524 0.2

In [95]:
desc2 = {"patron/author": df2.ix['patron'].ix['author'],
        "patron/signatory": df2.ix['patron'].ix['signatory'],
        "printer/author": df2.ix['printer/editor'].ix['author'],
        "printer/patron": df2.ix['printer/editor'].ix['patron'],
        "printer/signatory": df2.ix['printer/editor'].ix['signatory'],
        "signatory/author": df2.ix['signatory'].ix['author'],
        "printer/printer": df2.ix['printer/editor'].ix['printer/editor'],
        "signatory/signatory": df2.ix['signatory'].ix['signatory']}
strength_df = pd.DataFrame(desc2)

In [96]:
ax1 = sns.boxplot([
], names=["patron/author", "patron/signatory","printer/author", "printer/patron", "printer/signatory",
          "signatory/author","printer/printer", "signatory/signatory" ])
plt.ylim(0, 0.4)

In [97]:
ax1 = sns.boxplot([
], names=["patron/author", "patron/signatory","printer/author", "printer/patron", "printer/signatory",
          "signatory/author","printer/printer", "signatory/signatory" ])
plt.ylim(0, 0.75)

In [98]:

patron/author patron/signatory printer/author printer/patron printer/printer printer/signatory signatory/author signatory/signatory
subgroup0 0.464841 0.332603 0.444921 0.201323 0.399206 0.204777 0.439229 0.290189
subgroup3 0.463265 0.240787 0.238716 0.204762 0.210317 0.309086 0.325832 0.374504
subgroup6 0.654365 0.354101 0.179444 0.208854 0.181633 0.191008 0.364921 0.267857
subgroup5 0.196429 0.157143 0.239286 0.164286 0.142857 0.318398 0.231429 0.249496
subgroup7 0.396429 0.186835 0.227381 0.181151 0.169841 0.240179 0.297059 0.215675
subgroup1 2.167857 0.337897 0.436756 0.317641 0.189881 0.235197 0.337897 0.245346
subgroup4 0.309524 0.309524 0.154762 0.154762 0.000000 0.154762 0.309524 0.309524
subgroup2 0.200000 0.200000 0.200000 0.200000 0.000000 0.200000 0.200000 0.200000

In [99]:

patron/author          0.606589
patron/signatory       0.264861
printer/author         0.265158
printer/patron         0.204097
printer/printer        0.161717
printer/signatory      0.231676
signatory/author       0.313236
signatory/signatory    0.269074
dtype: float64

In [100]:

patron/author          0
patron/signatory       0
printer/author         0
printer/patron         0
printer/printer        0
printer/signatory      0
signatory/author       0
signatory/signatory    0
dtype: float64

In [101]:
edf = pd.concat([strength_df.mean(), edges_df.mean()], axis=1)

In [102]:
edf.columns = ["weight", "freq"]

In [103]:

weight freq
patron/author 0.606589 0
patron/signatory 0.264861 0
printer/author 0.265158 0
printer/patron 0.204097 0
printer/printer 0.161717 0
printer/signatory 0.231676 0
signatory/author 0.313236 0
signatory/signatory 0.269074 0

In [104]:
sns.lmplot(x="weight", y="freq", data=edf)
plt.xlim(0.0, 0.7)

(0.0, 0.7)

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [105]:
tips = sns.load_dataset("tips")

In [106]:

total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
5 25.29 4.71 Male No Sun Dinner 4
6 8.77 2.00 Male No Sun Dinner 2
7 26.88 3.12 Male No Sun Dinner 4
8 15.04 1.96 Male No Sun Dinner 2
9 14.78 3.23 Male No Sun Dinner 2
10 10.27 1.71 Male No Sun Dinner 2
11 35.26 5.00 Female No Sun Dinner 4
12 15.42 1.57 Male No Sun Dinner 2
13 18.43 3.00 Male No Sun Dinner 4
14 14.83 3.02 Female No Sun Dinner 2
15 21.58 3.92 Male No Sun Dinner 2
16 10.33 1.67 Female No Sun Dinner 3
17 16.29 3.71 Male No Sun Dinner 3
18 16.97 3.50 Female No Sun Dinner 3
19 20.65 3.35 Male No Sat Dinner 3
20 17.92 4.08 Male No Sat Dinner 2
21 20.29 2.75 Female No Sat Dinner 2
22 15.77 2.23 Female No Sat Dinner 2
23 39.42 7.58 Male No Sat Dinner 4
24 19.82 3.18 Male No Sat Dinner 2
25 17.81 2.34 Male No Sat Dinner 4
26 13.37 2.00 Male No Sat Dinner 2
27 12.69 2.00 Male No Sat Dinner 2
28 21.70 4.30 Male No Sat Dinner 2
29 19.65 3.00 Female No Sat Dinner 2
... ... ... ... ... ... ... ...
214 28.17 6.50 Female Yes Sat Dinner 3
215 12.90 1.10 Female Yes Sat Dinner 2
216 28.15 3.00 Male Yes Sat Dinner 5
217 11.59 1.50 Male Yes Sat Dinner 2
218 7.74 1.44 Male Yes Sat Dinner 2
219 30.14 3.09 Female Yes Sat Dinner 4
220 12.16 2.20 Male Yes Fri Lunch 2
221 13.42 3.48 Female Yes Fri Lunch 2
222 8.58 1.92 Male Yes Fri Lunch 1
223 15.98 3.00 Female No Fri Lunch 3
224 13.42 1.58 Male Yes Fri Lunch 2
225 16.27 2.50 Female Yes Fri Lunch 2
226 10.09 2.00 Female Yes Fri Lunch 2
227 20.45 3.00 Male No Sat Dinner 4
228 13.28 2.72 Male No Sat Dinner 2
229 22.12 2.88 Female Yes Sat Dinner 2
230 24.01 2.00 Male Yes Sat Dinner 4
231 15.69 3.00 Male Yes Sat Dinner 3
232 11.61 3.39 Male No Sat Dinner 2
233 10.77 1.47 Male No Sat Dinner 2
234 15.53 3.00 Male Yes Sat Dinner 2
235 10.07 1.25 Male No Sat Dinner 2
236 12.60 1.00 Male Yes Sat Dinner 2
237 32.83 1.17 Male Yes Sat Dinner 2
238 35.83 4.67 Female No Sat Dinner 3
239 29.03 5.92 Male No Sat Dinner 3
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2

244 rows × 7 columns

In [ ]:

In [ ]:

In [107]:
chisquare(df.whole.values, df.mod0.values)

NameError                                 Traceback (most recent call last)
<ipython-input-107-74a3de4be89b> in <module>()
----> 1 chisquare(df.whole.values, df.mod0.values)

NameError: name 'chisquare' is not defined

In [108]:

level_0 level_1 0
0 patron author 0.033252
1 patron signatory 0.133820
2 printer/editor author 0.085969
3 printer/editor patron 0.078670
4 printer/editor signatory 0.231955
5 signatory author 0.166261
6 printer/editor printer/editor 0.024331
7 signatory signatory 0.205191

In [109]:

level_0 level_1 0
0 patron author 0.033252
1 patron signatory 0.133820
2 printer/editor author 0.085969
3 printer/editor patron 0.078670
4 printer/editor signatory 0.231955
5 signatory author 0.166261
6 printer/editor printer/editor 0.024331
7 signatory signatory 0.205191

In [110]:

author          author            0
patron          author            0
                signatory         0
printer/editor  author            0
                patron            0
                printer/editor    0
                signatory         0
signatory       author            0
                signatory         0
Name: 0, dtype: float64

In [111]:
mod0['printer/editor']['patron'] = 0.0

In [112]:

author          author            0
patron          author            0
                signatory         0
printer/editor  author            0
                patron            0
                printer/editor    0
                signatory         0
signatory       author            0
                signatory         0
Name: 0, dtype: float64

In [ ]:

In [ ]:

In [119]:
from matplotlib import rcParams
rcParams.update({'figure.autolayout': True})

In [113]:
def count_roles(g):
    d = {}
    for n, a, in g.nodes(data=True):
        role = a.get("role")
        if role:
            d.setdefault(role, 0)
            d[role] += 1
    return d

In [120]:
plt.ylabel('# individuals', size=18)
plt.xlabel('role', size=18)
plt.savefig("img/article/figure15.eps", format='eps', dpi=1000)
# plt.savefig("img/role_dist.png")

/home/davebshow/.virtualenvs/scientific/local/lib/python2.7/site-packages/matplotlib/ UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "

In [ ]:

In [ ]:

In [ ]:

In [ ]: