In [1]:
import pandas as pd
import networkx as nx
%matplotlib inline
In [28]:
def authors_full(path):
authors = pd.read_csv(path + '/author-churn.csv', parse_dates=True, index_col='author')
authors = authors / authors.sum()
summary = pd.read_csv(path + '/summary.csv', parse_dates=True)
main_dev = pd.read_csv(path + '/main-dev.csv', parse_dates=True)
dev = main_dev[['main-dev', 'ownership']].groupby('main-dev').sum() / summary['value'][2]
dev = dev / dev.sum()
dev = dev.reset_index()
dev.columns = ['author', 'ownership']
dev = dev.set_index('author')
communication = pd.read_csv('data/communication.csv')
G=nx.from_pandas_dataframe(communication, 'author', 'peer', ['strength'])
page_rank = pd.DataFrame.from_dict(nx.pagerank(G, weight='strength'), orient='index')
page_rank.columns = ['page_rank']
summary = pd.read_csv(path + '/summary.csv', parse_dates=True)
refactoring_main_dev = pd.read_csv(path + '/refactoring-main-dev.csv', parse_dates=True)
refactoring_dev = refactoring_main_dev[['main-dev', 'ownership']].groupby('main-dev').sum() / summary['value'][2]
refactoring_dev = refactoring_dev / refactoring_dev.sum()
refactoring_dev = refactoring_dev.reset_index()
refactoring_dev.columns = ['author', 'refactoring_ownership']
refactoring_dev = refactoring_dev.set_index('author')
return pd.concat([dev, refactoring_dev, page_rank, authors], axis='author').fillna(0)
In [29]:
data_dir = 'data/repos/'
authors_full(data_dir + 'golang/go').corr()
Out[29]:
In [30]:
authors_full(data_dir + 'apache/mesos').corr()
Out[30]:
In [ ]: