In [1]:
import pandas as pd
import networkx as nx
%matplotlib inline

In [28]:
def authors_full(path):
    authors = pd.read_csv(path + '/author-churn.csv', parse_dates=True, index_col='author')
    authors = authors / authors.sum()

    summary = pd.read_csv(path + '/summary.csv', parse_dates=True)
    main_dev = pd.read_csv(path + '/main-dev.csv', parse_dates=True)
    dev = main_dev[['main-dev', 'ownership']].groupby('main-dev').sum()  / summary['value'][2]
    dev = dev / dev.sum()
    dev = dev.reset_index()
    dev.columns = ['author', 'ownership']
    dev = dev.set_index('author')

    communication = pd.read_csv('data/communication.csv')
    G=nx.from_pandas_dataframe(communication, 'author', 'peer', ['strength'])
    page_rank = pd.DataFrame.from_dict(nx.pagerank(G, weight='strength'), orient='index')
    page_rank.columns = ['page_rank']

    summary = pd.read_csv(path + '/summary.csv', parse_dates=True)
    refactoring_main_dev = pd.read_csv(path + '/refactoring-main-dev.csv', parse_dates=True)
    refactoring_dev = refactoring_main_dev[['main-dev', 'ownership']].groupby('main-dev').sum()  / summary['value'][2]
    refactoring_dev = refactoring_dev / refactoring_dev.sum()
    refactoring_dev = refactoring_dev.reset_index()
    refactoring_dev.columns = ['author', 'refactoring_ownership']
    refactoring_dev = refactoring_dev.set_index('author')

    return pd.concat([dev, refactoring_dev, page_rank, authors], axis='author').fillna(0)

In [29]:
data_dir = 'data/repos/'
authors_full(data_dir + 'golang/go').corr()


/home/janisz/anaconda2/lib/python2.7/site-packages/networkx/generators/stochastic.py:56: UserWarning: zero out-degree for node Sagar Mehta
  warnings.warn('zero out-degree for node %s' % u)
/home/janisz/anaconda2/lib/python2.7/site-packages/networkx/generators/stochastic.py:56: UserWarning: zero out-degree for node Sebastian Geller
  warnings.warn('zero out-degree for node %s' % u)
/home/janisz/anaconda2/lib/python2.7/site-packages/networkx/generators/stochastic.py:56: UserWarning: zero out-degree for node gaopeng
  warnings.warn('zero out-degree for node %s' % u)
Out[29]:
ownership refactoring_ownership page_rank added deleted commits
ownership 1.000000 0.991027 -0.023155 0.982815 0.974970 0.995055
refactoring_ownership 0.991027 1.000000 -0.020705 0.976302 0.974796 0.993721
page_rank -0.023155 -0.020705 1.000000 -0.023367 -0.019477 -0.022695
added 0.982815 0.976302 -0.023367 1.000000 0.984127 0.981833
deleted 0.974970 0.974796 -0.019477 0.984127 1.000000 0.983187
commits 0.995055 0.993721 -0.022695 0.981833 0.983187 1.000000

In [30]:
authors_full(data_dir + 'apache/mesos').corr()


Out[30]:
ownership refactoring_ownership page_rank added deleted commits
ownership 1.000000 0.999671 -0.037384 0.999555 0.999526 0.996993
refactoring_ownership 0.999671 1.000000 -0.037875 0.999130 0.999171 0.996994
page_rank -0.037384 -0.037875 1.000000 -0.038207 -0.036503 -0.048193
added 0.999555 0.999130 -0.038207 1.000000 0.999932 0.996552
deleted 0.999526 0.999171 -0.036503 0.999932 1.000000 0.995808
commits 0.996993 0.996994 -0.048193 0.996552 0.995808 1.000000

In [ ]: