In [1]:

    
%pylab inline
%load_ext autoreload
%autoreload 2
import networkx as nx
from matplotlib import pyplot as plt

from temporal_coauthor_network import TemporalCoAuthorNetwork
import process_pubmed

import vincent
vincent.core.initialize_notebook()
import pandas as pd
import csv
import itertools
import re









    



Populating the interactive namespace from numpy and matplotlib

Temporal Co-author Networks

First create the Co-author network.

Add edges according to the following format:

Author1, Author2, Weight, Time



In [2]:

    
N = TemporalCoAuthorNetwork()

with open('sample.csv', 'r') as f:
    nodes = []
    for line in f:
        line = line.split(",")
        a1 = line[0].strip()
        a2 = line[1].strip()
        
        if a1 not in nodes:
            nodes.append(a1)
        if a2 not in nodes:
            nodes.append(a2)
        
        N.addEdge(a1,a2,int(line[2]),int(line[3]))



In [3]:

    
process_pubmed.byAuthor('Adamson, J W')



In [2]:

    
N = TemporalCoAuthorNetwork()
with open('processed_pubmed_Adamson,JW.csv') as f:
    nodes = []
    for line in f:
        line = line.strip()
        line = line.split("\t")
        a1 = line[0].strip()
        a2 = line[1].strip()
        
        if a1 not in nodes:
            nodes.append(a1)
        if a2 not in nodes:
            nodes.append(a2)
        
        N.addEdge(a1,a2,int(line[2]),int(line[3]))

To show the network we need to use a layout function, of which an example is shown here:



In [3]:

    
def fixed_node_layout(network):
    pos = nx.spring_layout(network,pos={'Adamson, J W' : (0.5,0.5)},fixed=['Adamson, J W'])
    return pos

Now to show the network at a certain time point



In [4]:

    
all_edge_times = [time for edge in N.edges for time in edge.occurences.keys()]
max_time = max(all_edge_times)
min_time = min(all_edge_times)
timeSeries = range(min_time,max_time,1)
eigenvector_centrality = dict()
clustering_coefficient = dict()
for timePoint in timeSeries:
    network = N.networkAtTime(timePoint)
    if not network.edges():
        continue;
    pos = fixed_node_layout(network)
    print pos['Adamson, J W']
    nx.draw(network, pos = nx.spring_layout(network,pos={'Adamson, J W' : (0.5,0.5)},fixed=['Adamson, J W']))
    plt.show()
    centrality = nx.eigenvector_centrality_numpy(network)
    clustering = nx.clustering(network)
    for author in ['Adamson, J W']:
        if author not in eigenvector_centrality:
            eigenvector_centrality[author] = []
            clustering_coefficient[author] = []
        if author not in centrality:
            eigenvector_centrality[author].append(0)
            clustering_coefficient[author].append(0)
        else:
            eigenvector_centrality[author].append(centrality[author])
            clustering_coefficient[author].append(clustering[author])









    



[ 0.          0.31690563]






    












    



[ 0.  0.]






    












    



[ 0.29073457  0.33287362]






    












    



[ 0.32953277  0.4860886 ]






    












    



[ 0.49622734  0.52304879]






    












    



[ 0.53840522  0.47056951]






    












    



[ 0.40732758  0.48756051]






    












    



[ 0.48864787  0.49749542]






    












    



[ 0.46833063  0.54356893]






    












    



[ 0.49017217  0.48307854]






    












    



[ 0.48974528  0.49947452]






    












    



[ 0.50305538  0.50222691]






    












    



[ 0.47725929  0.47527161]






    












    



[ 0.50033748  0.46878519]






    












    



[ 0.47499107  0.49931181]






    












    



[ 0.52190987  0.50614311]






    












    



[ 0.50994256  0.50347789]






    












    



[ 0.48726342  0.49684753]






    












    



[ 0.50055833  0.45876219]






    












    



[ 0.49634614  0.49142201]






    












    



[ 0.49204892  0.48585372]






    












    



[ 0.48457332  0.4952869 ]






    












    



[ 0.49129592  0.50013951]






    





<matplotlib.figure.Figure at 0x107986450>






    



[ 0.49752826  0.48256262]






    





<matplotlib.figure.Figure at 0x107b17410>






    



[ 0.49806677  0.49641379]






    





<matplotlib.figure.Figure at 0x103bd3990>






    



[ 0.5051768   0.47497122]






    





<matplotlib.figure.Figure at 0x105e6bf90>






    



[ 0.47974071  0.49736827]






    





<matplotlib.figure.Figure at 0x107b90210>






    



[ 0.45811938  0.53865823]






    





<matplotlib.figure.Figure at 0x10748aa10>






    



[ 0.52524128  0.50975158]






    





<matplotlib.figure.Figure at 0x105e6bf90>






    



[ 0.50682637  0.42997113]






    





<matplotlib.figure.Figure at 0x105e6bfd0>






    



[ 0.46089174  0.50079383]






    





<matplotlib.figure.Figure at 0x105e6bf90>






    



[ 0.49018893  0.49992092]






    





<matplotlib.figure.Figure at 0x103bbff90>






    



[ 0.50451863  0.49900192]






    





<matplotlib.figure.Figure at 0x107f31650>






    



[ 0.50423055  0.49922984]






    





<matplotlib.figure.Figure at 0x107bbd350>






    



[ 0.50279     0.49395696]






    












    



[ 0.46563911  0.49914985]






    












    



[ 0.50643557  0.50821054]






    












    



[ 0.49256717  0.51017705]






    












    



[ 0.50452056  0.45059324]



In [5]:

    
eigen_data = pd.DataFrame.from_dict(eigenvector_centrality)
cluster_data = pd.DataFrame.from_dict(clustering_coefficient)
vincent.Line(eigen_data).display()
vincent.Line(cluster_data).display()



In [ ]: