In [7]:
import pandas as pd
import os, sys
import csv

%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')

Create full edgelist


In [17]:
googlegroups = [group.split('.')[0] for group in os.listdir('../data/mbox/')]

gg_dfs = []

for group in googlegroups:
    
    gg_df = pd.read_csv('../data/edges/' + group + '-edges.csv')
    gg_dfs.append(gg_sf)

gg_edgelist = pd.concat(gg_dfs)

gg_edgelist.to_csv('../data/network/plots_edgelist.csv')

Disambiguate multiple email addresses


In [ ]:
edges = pd.read_csv('../data/network/plots_edgelist.csv')

In [ ]:
def disambiguate(user, names):
    if user in names:
        return names[0]
    else:
        return user
    
copies = [['ANONYMIZED']]

for index, names in enumerate(copies):
    edges['source'] = edges['source'].apply(disambiguate, args=(names,))
    edges['target'] = edges['target'].apply(disambiguate, args=(names,))

# edges.head(50)

In [ ]:
edges.to_csv('../data/network/plots_edgelist.csv', index=False)

Create graph from edgelist


In [ ]:
reader = csv.DictReader('../data/network/plots_edgelist.csv')
g = Graph.DictList(edges=reader, directed=True, edge_foreign_keys=('source','target'), vertices=None)
g.es['weight'] = 1
g.simplify(combine_edges={'weight':'sum'})

Calculate vertex attributes


In [ ]:
g.vs['degree'] = g.degree()
g.vs['in'] = g.indegree()
g.vs['out'] = g.outdegree()
g.vs['core'] = g.coreness()
g.vs['core-in'] = g.coreness(mode=IN)
g.vs['core-out'] = g.coreness(mode=OUT)
g.vs['closeness'] = g.closeness(weights='weight')
g.vs['betweenness'] = g.betweenness(weights='weight')
g.vs['clustering_coef'] = g.transitivity_local_undirected(weights='weight')
g.vs['eigen_centrality'] = g.eigenvector_centrality(weights='weight')
g.vs['avg_neighborhood'] = g.knn(weights='weight')[0]
g.vs['neighborhood'] = g.neighborhood_size()
g.vs['pagerank'] = g.pagerank(weights='weight')
g.vs['hub'] = g.hub_score(weights='weight')
g.vs['authority'] = g.authority_score(weights='weights')

Export vertex attributes


In [ ]:
with open('vertex_attributes.csv', 'w') as csvfile:
    fieldnames = g.vertex_attributes()
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    
    for v in g.vs:
        vs_attrs = v.attributes()
        writer.writerow(vs_attrs)