In [121]:
import pandas as pd
import networkx as nx
In [104]:
pd.read_csv('summary.csv', parse_dates=True)
Out[104]:
In [34]:
authors_churn = pd.read_csv('author-churn.csv')
In [42]:
authors_churn
Out[42]:
In [ ]:
authors = pd.read_csv('authors.csv')
In [41]:
authors.head()
Out[41]:
In [43]:
authors.describe()
Out[43]:
In [24]:
%matplotlib inline
authors.plot(kind='scatter', x='n-authors', y='n-revs');
In [32]:
authors.groupby('n-authors').describe()
Out[32]:
In [119]:
communication = pd.read_csv('communication.csv', parse_dates=True)
In [169]:
strength = communication['strength']
communication['normal_strength'] = strength.apply(lambda x: (x - strength.min()) / (strength.max() - strength.min()))
In [160]:
communication.head()
Out[160]:
In [170]:
G=nx.from_pandas_dataframe(communication, 'author', 'peer', ['strength'])
In [171]:
pos=nx.spring_layout(G)
edgewidth = [ d['strength'] for (u,v,d) in G.edges(data=True)]
nx.draw_networkx_edge_labels(G, pos)
nx.draw_networkx_nodes(G,pos)
nx.draw_networkx_labels(G,pos)
nx.draw_networkx_edges(G, pos, edge_color=edgewidth)
Out[171]:
In [179]:
nx.k_nearest_neighbors(G, weight='strength')
Out[179]:
In [178]:
nx.pagerank(G, weight='strength')
Out[178]: