In [1]:
# %load https://gist.githubusercontent.com/kidpixo/2ec078d09834b5aa7869/raw/c8812811211dc7cd62f5b530b51b0104f39263ff/ipython%20inizialization
import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
commits_list = ! git --no-pager log --reverse --oneline

commits = []
for i in commits_list:
    sha1 = i.split(' ')[0]
    print 'Commit SHA-1 value:',sha1
    commits.append(sha1)


Commit SHA-1 value: 7545477
Commit SHA-1 value: b904ffd
Commit SHA-1 value: 050a563
Commit SHA-1 value: e92be1e
Commit SHA-1 value: 976869c
Commit SHA-1 value: 39ab315
Commit SHA-1 value: 8ea5de7
Commit SHA-1 value: afa0783
Commit SHA-1 value: a4cb9a1
Commit SHA-1 value: 112e832
Commit SHA-1 value: 4714408
Commit SHA-1 value: 1ac1c35
Commit SHA-1 value: cf40d67
Commit SHA-1 value: 9407ce9
Commit SHA-1 value: 86727f0

In [41]:
import subprocess

path = '/Users/damo_ma/Downloads/github_rep/git_history_visualizer'
p = subprocess.Popen(['git -C '+path+' --no-pager log --reverse --oneline'], stdout=subprocess.PIPE, shell=True)
for line in iter(p.stdout.readline,''):
    print line.rstrip()


7545477 initialize repo and add first file
b904ffd added a and b
050a563 modified a
e92be1e deleted b
976869c added d and e
39ab315 modified d and e
8ea5de7 added f , modified a
afa0783 modified creation.sh
a4cb9a1 deleted d
112e832 modified creation.sh, f and e
4714408 added python script
1ac1c35 added wrong script: deleted and substituted

In [3]:
all_files = ! git --no-pager log --reverse --name-only --oneline --pretty='format:' |  sed '/^$/d' | sort | uniq

Legend

git status

  • A : file Added
  • D : file Deleted
  • M : file Modified
  • S : file is Static (nothing happen)
  • N : file is Non existent

See the official Git - git-log Documentation :

--diff-filter=[(A|C|D|M|R|T|U|X|B)…[*]]

Select only files that are Added (A), Copied (C), Deleted (D), Modified (M), Renamed (R), have their type (i.e. regular file, symlink, submodule, …) changed (T), are Unmerged (U), are Unknown (X), or have had their pairing Broken (B). Any combination of the filter characters (including none) can be used. When * (All-or-none) is added to the combination, all paths are selected if there is any file that matches other criteria in the comparison; if there is no file that matches other criteria, nothing is selected.

In [4]:
all_filenames = pd.DataFrame(pd.DataFrame(list(all_files)),columns=commits, index=all_files)
all_commits = ! git --no-pager log --reverse --name-status --oneline --pretty='format:COMMIT %h %s' | tr '\t' ' ' | sed -e '/^$/d'

def_states = {
    'A' : 0,
    'M' : 32,
    'S' : 64, # custom value, Static
    'D' : 128,
    'N' : 128, # custom value, Non existent
}

def_states_explain = {
    'A' : 'Added',
    'D' : 'Deleted',
    'M' : 'Modified',
    'S' : 'Static',
    'N' : 'Non existent'
}

# fill NaN
all_filenames.fillna('N', inplace=True)

actual_commit = 0
# previous_commit = 0
for i in all_commits:
    # set the commit number
    if i[0] == 'C':
        value = i.split(' ')[1]
        # starting at the second commit see which file exist in the previous commit
        if actual_commit != int(all_filenames.columns[0]):
             previous_commit = actual_commit
        actual_commit = value
        # assig 1 to file not null un the previous commit
        if previous_commit != 0:
            all_filenames[actual_commit][
                (all_filenames[previous_commit] != 'N') & (all_filenames[previous_commit] != 'D')] = 'S'
#             all_filenames[previous_commit][all_filenames[actual_commit] == 'D'] = 'D'
#             all_filenames[actual_commit][all_filenames[actual_commit] == 'D']   = 'N'
#         print previous_commit,'>',actual_commit
    else:
        state,value = i.split(' ')
#         print ' '*4,'-',state,value
        all_filenames.ix[value,actual_commit] = state

In [5]:
all_commits


Out[5]:
['COMMIT 7545477 initialize repo and add first file',
 'A creation.sh',
 'COMMIT b904ffd added a and b',
 'A a.txt',
 'A b.txt',
 'COMMIT 050a563 modified a',
 'M a.txt',
 'COMMIT e92be1e deleted b',
 'D b.txt',
 'COMMIT 976869c added d and e',
 'A c.txt',
 'A d.txt',
 'A e.txt',
 'COMMIT 39ab315 modified d and e',
 'M d.txt',
 'M e.txt',
 'COMMIT 8ea5de7 added f , modified a',
 'M a.txt',
 'A f.txt',
 'COMMIT afa0783 modified creation.sh',
 'A .creation.sh.swp',
 'M creation.sh',
 'COMMIT a4cb9a1 deleted d',
 'D d.txt',
 'COMMIT 112e832 modified creation.sh, f and e',
 'D .creation.sh.swp',
 'D c.txt',
 'M creation.sh',
 'M e.txt',
 'M f.txt',
 'COMMIT 4714408 added python script',
 'A git_history.ipynb',
 'A git_history.py',
 'COMMIT 1ac1c35 added wrong script: deleted and substituted',
 'D git_history.ipynb',
 'D git_history.py',
 'A git_history_test_git.ipynb',
 'A git_history_test_git.py',
 'COMMIT cf40d67 start to look how to get shell output instead of ipython magic',
 'A .ipynb_checkpoints/git_history_test_git-checkpoint.ipynb',
 'M git_history_test_git.ipynb',
 'M git_history_test_git.py',
 'COMMIT 9407ce9 Initial commit',
 'A .gitignore',
 'A LICENSE',
 'A README.md',
 "COMMIT 86727f0 Merge remote-tracking branch 'remotes/origin/master'"]

In [6]:
all_filenames


Out[6]:
7545477 b904ffd 050a563 e92be1e 976869c 39ab315 8ea5de7 afa0783 a4cb9a1 112e832 4714408 1ac1c35 cf40d67 9407ce9 86727f0
.creation.sh.swp N N N N N N N A S D N N N N N
.gitignore N N N N N N N N N N N N N A S
.ipynb_checkpoints/git_history_test_git-checkpoint.ipynb N N N N N N N N N N N N A S S
LICENSE N N N N N N N N N N N N N A S
README.md N N N N N N N N N N N N N A S
a.txt N A M S S S M S S S S S S S S
b.txt N A S D N N N N N N N N N N N
c.txt N N N N A S S S S D N N N N N
creation.sh A S S S S S S M S M S S S S S
d.txt N N N N A M S S D N N N N N N
e.txt N N N N A M S S S M S S S S S
f.txt N N N N N N A S S M S S S S S
git_history.ipynb N N N N N N N N N N A D N N N
git_history.py N N N N N N N N N N A D N N N
git_history_test_git.ipynb N N N N N N N N N N N A M S S
git_history_test_git.py N N N N N N N N N N N A M S S

In [7]:
def_states = {
    'A' : 120,
    'M' : 180,
    'S' : 255, # custom value, Static
    'D' : 240,
    'N' : 128, # custom value, Non existent
}

history = all_filenames.applymap(lambda x: def_states[x]).values.copy()

In [8]:
h = history.astype('float')
h[history == 128] = np.nan

In [14]:
fig = plt.figure(figsize=[10,12])

ax = plt.subplot(111)
for i in range(len(all_files)):
    x = range(len(commits))
    y = [i for kk in x]
    ax.scatter(x, y, s = 500, c=h[i,:], alpha=1, marker='o',linewidths = 3 , cmap = plt.cm.spectral,vmin = 0, vmax = 255)
    ax.plot(x, y, lw = 3, c='k', zorder=0)
 
ax.set_xticks(range(history.shape[1]))
ax.set_xticklabels(all_filenames.columns,rotation=90)

ax.set_xlabel('commits sha-1 (time arrow to the right ->)')
ax.set_xlim([-.5,len(commits)-0.5])
ax.set_ylabel('file names')
ax.set_yticks(range(history.shape[0]))
ax.set_yticklabels(all_filenames.index.tolist())
ax.set_yticks = 0.1
# set 0 to bounding box width
[i.set_linewidth(0.0) for i in ax.spines.itervalues()]
# see http://stackoverflow.com/a/20416681/1435167
# erase x ticks
for tic in ax.xaxis.get_major_ticks():
    tic.tick1On = tic.tick2On = False
#     tic.label1On = tic.label2On = False
# erase y ticks
for tic in ax.yaxis.get_major_ticks():
    tic.tick1On = tic.tick2On = False
#     tic.label1On = tic.label2On = False

ax2 = fig.add_axes([0.25, .9, 0.5, 0.075])
 
colors = np.array(def_states.values()).astype('float')
colors[colors == 128] = np.nan

x = range(len(colors))
y = [1 for kk in x]
ax2.scatter(x, y, s = 500, c=colors, alpha=1, marker='o',linewidths = 3, cmap = plt.cm.spectral,vmin = 0, vmax = 255)
ax2.plot(x, y, lw = 3, c='k', zorder=0)
 
ax2.set_xticks(x)
ax2.set_xticklabels(def_states_explain.values())
ax2.set_xlabel('Legend')
ax2.set_xlim([-.5,len(x)-0.5])
ax2.set_ylim([0.99,1.01])
# set 0 to bounding box width
[i.set_linewidth(0.0) for i in ax2.spines.itervalues()]
# # see http://stackoverflow.com/a/20416681/1435167
# erase x ticks
for tic in ax2.xaxis.get_major_ticks():
    tic.tick1On = tic.tick2On = False
# erase y ticks
for tic in ax2.yaxis.get_major_ticks():
    tic.tick1On = tic.tick2On = False
    tic.label1On = tic.label2On = False

fig.savefig('/Users/damo_ma/Desktop/test.png')



In [23]:
# fake legend
a = np.empty([2,len(def_states)])
a[0,:] = [k for k in def_states.itervalues()]
a[1,:] = a[0,:]
plt.imshow(a,interpolation='nearest',cmap = plt.cm.spectral,vmin = 0, vmax = 255 )
plt.xticks(range(len(def_states)), [k for k in def_states.iterkeys()]);
plt.yticks([1], '');



In [24]:
fig = plt.figure(figsize=[10,10])

plt.imshow(history,interpolation='nearest',cmap = plt.cm.spectral,vmin = 0, vmax = 255 )
plt.xticks(range(history.shape[1]), all_filenames.columns, rotation='vertical');
plt.xlabel('commits sha-1 (time arrow to the right ->)')
plt.ylabel('file names')
plt.yticks(range(history.shape[0]), all_filenames.index.tolist());



In [ ]: