In [1]:
import git
import pandas as pd
from io import StringIO
GIT_REPO_PATH = r'../../spring-petclinic/'
repo = git.Repo(GIT_REPO_PATH)
git_bin = repo.git
git_log = git_bin.execute('git log --pretty=format:"%h\t%at\t%aN\t%s"')
commits = pd.read_csv(StringIO(git_log),
sep="\t",
header=None,
names=['sha', 'timestamp', 'author', 'message']
)
commits['word_count'] = commits['message'].str.count(" ")+1
commits.head()
Out[1]:
In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
commits['word_count'].hist()
plt.suptitle("Word count distribution of commit messages")
Out[2]: