In [1]:
%%bash
GIT_REPO_DIR=/tmp/demo/sonarcube_git_repo
rm -rf $GIT_REPO_DIR
mkdir -p $GIT_REPO_DIR
# warning, this could take a while (~5 minutes)...
git clone https://github.com/SonarSource/sonarqube.git $GIT_REPO_DIR
cd $GIT_REPO_DIR
DATA_DIR="/tmp/data/input/"
rm -rf $DATA_DIR
mkdir -p $DATA_DIR
GIT_LOG_FILE="$DATA_DIR/git.log"
echo 'sha,timestamp,author,email' > $GIT_LOG_FILE
git log --pretty=format:'%h,%ad,%aN,%ae' >> $GIT_LOG_FILE
head $GIT_LOG_FILE
In [2]:
import pandas as pd
commits = pd.read_csv("/tmp/data/input/git.log")
commits.head()
Out[2]:
In [3]:
commits['author'].value_counts().head(10)
Out[3]:
In [4]:
personmapping = pd.read_excel("PersonMapping.xlsx", index_col=0)
personmapping
Out[4]:
In [5]:
commits['person'] = commits['author']
commits.head()
Out[5]:
In [6]:
commits.ix[commits['person'].isin(personmapping.index), 'person'] = \
commits['author'].map(personmapping['Person'])
commits['person'].value_counts().head(10)
Out[6]:
In [7]:
%matplotlib inline
commits['person'].value_counts().head(10).plot(kind='pie', figsize=(5,5))
Out[7]:
In [8]:
commits['timestamp'] = pd.to_datetime(commits['timestamp'])
commits.head()
Out[8]:
In [9]:
commits = commits.set_index(commits['timestamp'])
commits.head()
Out[9]:
In [10]:
commits_per_months = commits.resample('1M').count()
commits_per_months.head()
Out[10]:
In [11]:
commits_per_months['sha'].plot()
Out[11]: