In [1]:
import pandas as pd
log = pd.read_csv("../../../software-data/projects/linux/linux_blame_log.csv.gz")
log['timestamp'] = pd.to_datetime(log['timestamp'])
log.head()
Out[1]:
In [2]:
knowledge = log.groupby(
['path', 'author']).agg(
{'timestamp':'min', 'line':'count'}
)
knowledge.head()
Out[2]:
In [3]:
knowledge['all'] = knowledge.groupby('path')['line'].transform('sum')
knowledge['knowing'] = knowledge['line'] / knowledge['all']
knowledge.head()
Out[3]:
In [4]:
max_knowledge_per_file = knowledge.groupby(['path'])['knowing'].transform(max)
knowledge_carriers = knowledge[knowledge['knowing'] == max_knowledge_per_file]
knowledge_carriers = knowledge_carriers.reset_index(level=1)
knowledge_carriers.head()
Out[4]:
In [5]:
from ausi import d3
d3.create_json_for_zoomable_circle_packing(
knowledge_carriers.reset_index(),
'author',
'author',
'path',
'/',
'all',
'knowing',
'linux_circle_packing'
)