In [2]:

    
import githistoryvis as ghv

Gather the data

Githistoryvis exposes the class git_history.

inizialization:

foo = git_history(PATH)

sets the attribute foo.path that point to the git respository in PATH.

Also def_states (and def_states_explain) are defined at inizialitation. They are used to transform the state in the dataframe to number for visualization and define the legend.

You can overwrite them at your own risk.

# that is used as colorcode in the datamatrix
def_states = {
    u'A': 120,
    u'C': 25,
    u'B': 51,
    u'D': 240,
    u'M': 180,
    u'R': 102,
    u'U': 204,
    u'T':  76,
    u'X': 153,
    u'S': 255,   # custom value, Static
    u'N': None,  # custom value, Non existent
}

# this is only a humand readable format
def_states_explain = {
    u'A': u'added',
    u'C': u'copied',
    u'D': u'deleted',
    u'M': u'modified',
    u'R': u'renamed',
    u'T': u'type changed',
    u'U': u'unmerged',
    u'X': u'unknown',
    u'B': u'pairing broken',
    u'S': u'Static',
    u'N': u'Non existent'
}

methods

foo.get_history()

extracts the git log, and define:

foo.all_commits = the whole git log
foo.commits = the commits SHA-1
foo.all_files = all the unique file ever existed

arguments:

prettyformat, default %h

optional, accept one of the git prettyformat, see http://git-scm.com/docs/pretty-formats. For example, get the whole commit text with '%s' and write your own parser for sel.decodelog().

Deafault is '%h' of the short SHA-1 of the commit.



In [3]:

    
import os

path =  os.getcwd() # put here the desired git repo path

gt = ghv.git_history(path)

gt.get_history()

# new compact version
gt = ghv.git_history(path, get_history=True)

Argument gitcommitlist

default False

optional, if present should be a string withthe result of:

git -C PATH --no-pager log --reverse --name-status --oneline --pretty="format:COMMIT%x09%h"

For example, execute this command in remote and store the result in a file, read the content

with open('gitoutput', 'r') as file:
    data = file.read()

and pass the result to get_history method:

gt.get_history(gitcommitlist=data)



In [5]:

    
with open('gitoutput', 'r') as file:
    data = file.read()
gt.get_history(gitcommitlist=data)

Visualize the data

We define a pandas DataFrame to contain all the files (Rows) and the status (Columns).

This Grid represent the status of each file at each step or commit.

The inizial stata for all the files is N or Non existent, they are updated in the sequential reding of git_history.all_commits object.

Deserialize and structure the data

The data gather in githistoryvis.git_history() object are deserialized and gathered in a pandas DataFrame by the githistoryvis.definedatamatrix() method.



In [4]:

    
gt.definedatamatrix()
gt.datamatrix









    Out[4]:






  
    
      
      7545477
      b904ffd
      050a563
      e92be1e
      976869c
      39ab315
      8ea5de7
      afa0783
      a4cb9a1
      112e832
      ...
      1f858fb
      5aee0a8
      74ed9b6
      0ab984b
      b477dbd
      eddc268
      3abacf9
      b366ec6
      428b104
      f452ee7
    
  
  
    
      .creation.sh.swp
      N
      N
      N
      N
      N
      N
      N
      A
      S
      D
      ...
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
    
    
      .gitignore
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      S
      S
      S
      S
      S
      S
      S
      S
      M
      S
    
    
      .ipynb_checkpoints/git_history_test_git-checkpoint.ipynb
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      S
      M
      S
      M
      S
      S
      S
      S
      S
      S
    
    
      LICENSE
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      S
      S
      S
      S
      S
      S
      S
      S
      S
      S
    
    
      README.md
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      S
      S
      S
      S
      S
      S
      M
      S
      S
      M
    
    
      __init__.py
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      A
      S
      S
      S
      S
      S
      S
      S
      S
      S
    
    
      a.txt
      N
      A
      M
      S
      S
      S
      M
      S
      S
      S
      ...
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
    
    
      b.txt
      N
      A
      S
      D
      N
      N
      N
      N
      N
      N
      ...
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
    
    
      c.txt
      N
      N
      N
      N
      A
      S
      S
      S
      S
      D
      ...
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
    
    
      creation.sh
      A
      S
      S
      S
      S
      S
      S
      M
      S
      M
      ...
      S
      S
      S
      S
      S
      S
      S
      S
      S
      S
    
    
      d.txt
      N
      N
      N
      N
      A
      M
      S
      S
      D
      N
      ...
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
    
    
      e.txt
      N
      N
      N
      N
      A
      M
      S
      S
      S
      M
      ...
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
    
    
      example_githistoryvis.py
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      N
      N
      N
      N
      N
      A
      S
      S
      S
      S
    
    
      f.txt
      N
      N
      N
      N
      N
      N
      A
      S
      S
      M
      ...
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
    
    
      git_history.ipynb
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
    
    
      git_history.py
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
    
    
      git_history_test_git.ipynb
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      S
      S
      S
      M
      M
      S
      S
      S
      S
      S
    
    
      git_history_test_git.py
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      S
      S
      S
      M
      S
      S
      S
      S
      S
      S
    
    
      githistoryvis.py
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      S
      S
      M
      S
      M
      S
      S
      M
      S
      M
    
    
      images/commit_and_files_range_visual_history.png
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
    
    
      images/commit_file_range.png
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      S
      S
      S
      M
      S
      S
      S
      S
      S
      S
    
    
      images/commit_range.png
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      S
      S
      S
      M
      S
      S
      S
      S
      S
      S
    
    
      images/commit_range_visual_history.png
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
    
    
      images/complete_visual_history.png
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      S
      S
      S
      M
      S
      S
      S
      S
      S
      S
    
    
      images/file_range.png
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      S
      S
      S
      M
      S
      S
      S
      S
      S
      S
    
    
      images/files_range_visual_history.png
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
    
    
      images/state_filter.png
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      S
      S
      S
      M
      S
      S
      S
      S
      S
      S
    
    
      visual_history.png
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
      ...
      N
      N
      N
      N
      N
      N
      N
      N
      N
      N
    
  

28 rows × 40 columns

Visualize the data

The data from the pandas DataFrame coul be visualized by this simple example routine.

The arguments are:

plt : the imported name of matplotlib.pyplot.
size (default 200) : the size of the pyplot.scatteplot.
figsize (default [9,7]) : size of the pyplot.figure.
linewidths (default 3) : width of the pyplot.scatteplot outer lines.
outpath : if defined, the figure will be saved without visualization.- legend : if defined to any value, will show a bad legend.



In [21]:

    
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline



In [22]:

    
gt.plot_history_df(plt,gt.datamatrix,size= 300, figsize = [12,10.5])
gt.plot_history_df(plt,gt.datamatrix,size= 300, figsize = [12,10.5],outpath=path+os.sep+'images/complete_visual_history.png')



In [24]:

    
# filtering the history on:
# a commit range
plot_df_commit_range = gt.datamatrix.ix[:,'a4cb9a1':'1222c5e']
gt.plot_history_df(plt,plot_df_commit_range,size= 300, figsize= [3,10])
gt.plot_history_df(plt,plot_df_commit_range,size= 300, figsize= [3,10], outpath=path+os.sep+'images/commit_range.png')



In [25]:

    
# filtering the history on:
# a file range: all files not ending with txt
plot_df_file_range = gt.datamatrix[~gt.datamatrix.index.str.contains('txt$')]
gt.plot_history_df(plt,plot_df_file_range,size= 300, figsize= [11.5,8.5])
gt.plot_history_df(plt,plot_df_file_range,size= 300, figsize= [11.5,8.5], outpath=path+os.sep+'images/file_range.png')



In [26]:

    
# filtering the history on:
# a commit range AND a file range: all files not ending with txt
plot_df_commit_file_range = gt.datamatrix.ix[:,'a4cb9a1':'1222c5e'][~gt.datamatrix.index.str.contains('txt$')]
gt.plot_history_df(plt,plot_df_commit_file_range,size= 300,figsize= [3.5,8.5])
gt.plot_history_df(plt,plot_df_commit_file_range,size= 300,figsize= [3.5,8.5],outpath=path+os.sep+'images/commit_file_range.png')



In [27]:

    
# filtering the history on:
# a commit range AND a file range: all files not ending with txt
plot_df_state_filter = gt.datamatrix[gt.datamatrix[gt.datamatrix.columns[-1]] != 'N']
gt.plot_history_df(plt,plot_df_state_filter,size= 300,figsize= [11,6])
gt.plot_history_df(plt,plot_df_state_filter,size= 300,figsize= [11,6],outpath=path+os.sep+'images/state_filter.png')

	7545477	b904ffd	050a563	e92be1e	976869c	39ab315	8ea5de7	afa0783	a4cb9a1	112e832	...	1f858fb	5aee0a8	74ed9b6	0ab984b	b477dbd	eddc268	3abacf9	b366ec6	428b104	f452ee7
.creation.sh.swp	N	N	N	N	N	N	N	A	S	D	...	N	N	N	N	N	N	N	N	N	N
.gitignore	N	N	N	N	N	N	N	N	N	N	...	S	S	S	S	S	S	S	S	M	S
.ipynb_checkpoints/git_history_test_git-checkpoint.ipynb	N	N	N	N	N	N	N	N	N	N	...	S	M	S	M	S	S	S	S	S	S
LICENSE	N	N	N	N	N	N	N	N	N	N	...	S	S	S	S	S	S	S	S	S	S
README.md	N	N	N	N	N	N	N	N	N	N	...	S	S	S	S	S	S	M	S	S	M
__init__.py	N	N	N	N	N	N	N	N	N	N	...	A	S	S	S	S	S	S	S	S	S
a.txt	N	A	M	S	S	S	M	S	S	S	...	N	N	N	N	N	N	N	N	N	N
b.txt	N	A	S	D	N	N	N	N	N	N	...	N	N	N	N	N	N	N	N	N	N
c.txt	N	N	N	N	A	S	S	S	S	D	...	N	N	N	N	N	N	N	N	N	N
creation.sh	A	S	S	S	S	S	S	M	S	M	...	S	S	S	S	S	S	S	S	S	S
d.txt	N	N	N	N	A	M	S	S	D	N	...	N	N	N	N	N	N	N	N	N	N
e.txt	N	N	N	N	A	M	S	S	S	M	...	N	N	N	N	N	N	N	N	N	N
example_githistoryvis.py	N	N	N	N	N	N	N	N	N	N	...	N	N	N	N	N	A	S	S	S	S
f.txt	N	N	N	N	N	N	A	S	S	M	...	N	N	N	N	N	N	N	N	N	N
git_history.ipynb	N	N	N	N	N	N	N	N	N	N	...	N	N	N	N	N	N	N	N	N	N
git_history.py	N	N	N	N	N	N	N	N	N	N	...	N	N	N	N	N	N	N	N	N	N
git_history_test_git.ipynb	N	N	N	N	N	N	N	N	N	N	...	S	S	S	M	M	S	S	S	S	S
git_history_test_git.py	N	N	N	N	N	N	N	N	N	N	...	S	S	S	M	S	S	S	S	S	S
githistoryvis.py	N	N	N	N	N	N	N	N	N	N	...	S	S	M	S	M	S	S	M	S	M
images/commit_and_files_range_visual_history.png	N	N	N	N	N	N	N	N	N	N	...	N	N	N	N	N	N	N	N	N	N
images/commit_file_range.png	N	N	N	N	N	N	N	N	N	N	...	S	S	S	M	S	S	S	S	S	S
images/commit_range.png	N	N	N	N	N	N	N	N	N	N	...	S	S	S	M	S	S	S	S	S	S
images/commit_range_visual_history.png	N	N	N	N	N	N	N	N	N	N	...	N	N	N	N	N	N	N	N	N	N
images/complete_visual_history.png	N	N	N	N	N	N	N	N	N	N	...	S	S	S	M	S	S	S	S	S	S
images/file_range.png	N	N	N	N	N	N	N	N	N	N	...	S	S	S	M	S	S	S	S	S	S
images/files_range_visual_history.png	N	N	N	N	N	N	N	N	N	N	...	N	N	N	N	N	N	N	N	N	N
images/state_filter.png	N	N	N	N	N	N	N	N	N	N	...	S	S	S	M	S	S	S	S	S	S
visual_history.png	N	N	N	N	N	N	N	N	N	N	...	N	N	N	N	N	N	N	N	N	N