In [1]:
%pylab inline
%load_ext autoreload
%autoreload 2
import networkx as nx
from matplotlib import pyplot as plt

from temporal_coauthor_network import TemporalCoAuthorNetwork
import process_pubmed

import vincent
vincent.core.initialize_notebook()
import pandas as pd
import csv
import itertools
import re


Populating the interactive namespace from numpy and matplotlib

Temporal Co-author Networks

First create the Co-author network.

Add edges according to the following format:

Author1, Author2, Weight, Time


In [2]:
N = TemporalCoAuthorNetwork()

with open('sample.csv', 'r') as f:
    nodes = []
    for line in f:
        line = line.split(",")
        a1 = line[0].strip()
        a2 = line[1].strip()
        
        if a1 not in nodes:
            nodes.append(a1)
        if a2 not in nodes:
            nodes.append(a2)
        
        N.addEdge(a1,a2,int(line[2]),int(line[3]))

In [3]:
process_pubmed.byAuthor('Adamson, J W')

In [2]:
N = TemporalCoAuthorNetwork()
with open('processed_pubmed_Adamson,JW.csv') as f:
    nodes = []
    for line in f:
        line = line.strip()
        line = line.split("\t")
        a1 = line[0].strip()
        a2 = line[1].strip()
        
        if a1 not in nodes:
            nodes.append(a1)
        if a2 not in nodes:
            nodes.append(a2)
        
        N.addEdge(a1,a2,int(line[2]),int(line[3]))

To show the network we need to use a layout function, of which an example is shown here:


In [3]:
def fixed_node_layout(network):
    pos = nx.spring_layout(network,pos={'Adamson, J W' : (0.5,0.5)},fixed=['Adamson, J W'])
    return pos

Now to show the network at a certain time point


In [4]:
all_edge_times = [time for edge in N.edges for time in edge.occurences.keys()]
max_time = max(all_edge_times)
min_time = min(all_edge_times)
timeSeries = range(min_time,max_time,1)
eigenvector_centrality = dict()
clustering_coefficient = dict()
for timePoint in timeSeries:
    network = N.networkAtTime(timePoint)
    if not network.edges():
        continue;
    pos = fixed_node_layout(network)
    print pos['Adamson, J W']
    nx.draw(network, pos = nx.spring_layout(network,pos={'Adamson, J W' : (0.5,0.5)},fixed=['Adamson, J W']))
    plt.show()
    centrality = nx.eigenvector_centrality_numpy(network)
    clustering = nx.clustering(network)
    for author in ['Adamson, J W']:
        if author not in eigenvector_centrality:
            eigenvector_centrality[author] = []
            clustering_coefficient[author] = []
        if author not in centrality:
            eigenvector_centrality[author].append(0)
            clustering_coefficient[author].append(0)
        else:
            eigenvector_centrality[author].append(centrality[author])
            clustering_coefficient[author].append(clustering[author])


[ 0.          0.31690563]
[ 0.  0.]
[ 0.29073457  0.33287362]
[ 0.32953277  0.4860886 ]
[ 0.49622734  0.52304879]
[ 0.53840522  0.47056951]
[ 0.40732758  0.48756051]
[ 0.48864787  0.49749542]
[ 0.46833063  0.54356893]
[ 0.49017217  0.48307854]
[ 0.48974528  0.49947452]
[ 0.50305538  0.50222691]
[ 0.47725929  0.47527161]
[ 0.50033748  0.46878519]
[ 0.47499107  0.49931181]
[ 0.52190987  0.50614311]
[ 0.50994256  0.50347789]
[ 0.48726342  0.49684753]
[ 0.50055833  0.45876219]
[ 0.49634614  0.49142201]
[ 0.49204892  0.48585372]
[ 0.48457332  0.4952869 ]
[ 0.49129592  0.50013951]
<matplotlib.figure.Figure at 0x107986450>
[ 0.49752826  0.48256262]
<matplotlib.figure.Figure at 0x107b17410>
[ 0.49806677  0.49641379]
<matplotlib.figure.Figure at 0x103bd3990>
[ 0.5051768   0.47497122]
<matplotlib.figure.Figure at 0x105e6bf90>
[ 0.47974071  0.49736827]
<matplotlib.figure.Figure at 0x107b90210>
[ 0.45811938  0.53865823]
<matplotlib.figure.Figure at 0x10748aa10>
[ 0.52524128  0.50975158]
<matplotlib.figure.Figure at 0x105e6bf90>
[ 0.50682637  0.42997113]
<matplotlib.figure.Figure at 0x105e6bfd0>
[ 0.46089174  0.50079383]
<matplotlib.figure.Figure at 0x105e6bf90>
[ 0.49018893  0.49992092]
<matplotlib.figure.Figure at 0x103bbff90>
[ 0.50451863  0.49900192]
<matplotlib.figure.Figure at 0x107f31650>
[ 0.50423055  0.49922984]
<matplotlib.figure.Figure at 0x107bbd350>
[ 0.50279     0.49395696]
[ 0.46563911  0.49914985]
[ 0.50643557  0.50821054]
[ 0.49256717  0.51017705]
[ 0.50452056  0.45059324]

In [5]:
eigen_data = pd.DataFrame.from_dict(eigenvector_centrality)
cluster_data = pd.DataFrame.from_dict(clustering_coefficient)
vincent.Line(eigen_data).display()
vincent.Line(cluster_data).display()



In [ ]: