git log --oneline --pretty="" --numstat > git_oneline_numstat.log
In [29]:
import pandas as pd
commits = pd.read_csv("../../../intellij-community/git_oneline_numstat.log",
header=None,
sep='\t',
names=["additions", "deletions", "filename"])
commits.head()
Out[29]:
In [30]:
commits.info()
In [32]:
commits['additions'] = pd.to_numeric(commits['additions'], errors='coerce')
commits['deletions'] = pd.to_numeric(commits['deletions'], errors='coerce')
commits.head()
Out[32]:
In [37]:
changes_per_file=commits.groupby('filename').count()\
.sort_values(by="additions", ascending=False)
changes_per_file.tail(10)
Out[37]:
In [26]:
changes_per_file[changes_per_file['additions'] > changes_per_file['additions'].quantile(0.9999)]
Out[26]:
In [4]:
changes_per_file.reset_index()['additions'].plot(logy=True)
Out[4]: