In [89]:

    
import pandas as pd

commits = pd.read_csv("../../intellij-community/git_oneline.log")    
commits.head()









    Out[89]:







  
    
      
      sha
      timestamp
      author
    
  
  
    
      0
      a0fc4d3828f
      1502119397
      peter
    
    
      1
      dec0d165974
      1502122966
      Anna.Kozlova
    
    
      2
      f1fd8e4bc7a
      1502122584
      Egor.Ushakov
    
    
      3
      f965b772530
      1502121596
      Alexey Kudravtsev
    
    
      4
      62759679eb4
      1502121252
      Alexey Kudravtsev



In [92]:

    
commits_raw.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 228801 entries, 0 to 228800
Data columns (total 4 columns):
sha          228801 non-null object
date         228801 non-null object
timestamp    228801 non-null int64
author       228801 non-null object
dtypes: int64(1), object(3)
memory usage: 7.0+ MB



In [93]:

    
commits['author'].value_counts().head(10).plot(kind='pie', figsize=(5,5))









    Out[93]:





<matplotlib.axes._subplots.AxesSubplot at 0x252e3207128>



In [94]:

    
commits.head()









    Out[94]:







  
    
      
      sha
      timestamp
      author
    
  
  
    
      0
      a0fc4d3828f
      1502119397
      peter
    
    
      1
      dec0d165974
      1502122966
      Anna.Kozlova
    
    
      2
      f1fd8e4bc7a
      1502122584
      Egor.Ushakov
    
    
      3
      f965b772530
      1502121596
      Alexey Kudravtsev
    
    
      4
      62759679eb4
      1502121252
      Alexey Kudravtsev



In [95]:

    
commits = commits.set_index(pd.to_datetime(commits['timestamp'], unit="s"))
commits.head()









    Out[95]:







  
    
      
      sha
      timestamp
      author
    
    
      timestamp
      
      
      
    
  
  
    
      2017-08-07 15:23:17
      a0fc4d3828f
      1502119397
      peter
    
    
      2017-08-07 16:22:46
      dec0d165974
      1502122966
      Anna.Kozlova
    
    
      2017-08-07 16:16:24
      f1fd8e4bc7a
      1502122584
      Egor.Ushakov
    
    
      2017-08-07 15:59:56
      f965b772530
      1502121596
      Alexey Kudravtsev
    
    
      2017-08-07 15:54:12
      62759679eb4
      1502121252
      Alexey Kudravtsev



In [96]:

    
commits_per_month['commits_cum'] = commits_per_month['author'].cumsum()
commits_per_month









    Out[96]:







  
    
      
      sha
      timestamp
      author
      time
      commits_cum
    
    
      time
      
      
      
      
      
    
  
  
    
      2004-11-30
      18
      18
      18
      18
      18
    
    
      2004-12-31
      29
      29
      29
      29
      47
    
    
      2005-01-31
      380
      380
      380
      380
      427
    
    
      2005-02-28
      595
      595
      595
      595
      1022
    
    
      2005-03-31
      664
      664
      664
      664
      1686
    
    
      2005-04-30
      601
      601
      601
      601
      2287
    
    
      2005-05-31
      669
      669
      669
      669
      2956
    
    
      2005-06-30
      1046
      1046
      1046
      1046
      4002
    
    
      2005-07-31
      1181
      1181
      1181
      1181
      5183
    
    
      2005-08-31
      428
      428
      428
      428
      5611
    
    
      2005-09-30
      362
      362
      362
      362
      5973
    
    
      2005-10-31
      403
      403
      403
      403
      6376
    
    
      2005-11-30
      594
      594
      594
      594
      6970
    
    
      2005-12-31
      526
      526
      526
      526
      7496
    
    
      2006-01-31
      744
      744
      744
      744
      8240
    
    
      2006-02-28
      661
      661
      661
      661
      8901
    
    
      2006-03-31
      706
      706
      706
      706
      9607
    
    
      2006-04-30
      741
      741
      741
      741
      10348
    
    
      2006-05-31
      738
      738
      738
      738
      11086
    
    
      2006-06-30
      846
      846
      846
      846
      11932
    
    
      2006-07-31
      1161
      1161
      1161
      1161
      13093
    
    
      2006-08-31
      683
      683
      683
      683
      13776
    
    
      2006-09-30
      919
      919
      919
      919
      14695
    
    
      2006-10-31
      752
      752
      752
      752
      15447
    
    
      2006-11-30
      597
      597
      597
      597
      16044
    
    
      2006-12-31
      524
      524
      524
      524
      16568
    
    
      2007-01-31
      690
      690
      690
      690
      17258
    
    
      2007-02-28
      684
      684
      684
      684
      17942
    
    
      2007-03-31
      962
      962
      962
      962
      18904
    
    
      2007-04-30
      796
      796
      796
      796
      19700
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2015-03-31
      2467
      2467
      2467
      2467
      165752
    
    
      2015-04-30
      1943
      1943
      1943
      1943
      167695
    
    
      2015-05-31
      1737
      1737
      1737
      1737
      169432
    
    
      2015-06-30
      1680
      1680
      1680
      1680
      171112
    
    
      2015-07-31
      1965
      1965
      1965
      1965
      173077
    
    
      2015-08-31
      2012
      2012
      2012
      2012
      175089
    
    
      2015-09-30
      2360
      2360
      2360
      2360
      177449
    
    
      2015-10-31
      3252
      3252
      3252
      3252
      180701
    
    
      2015-11-30
      2206
      2206
      2206
      2206
      182907
    
    
      2015-12-31
      1901
      1901
      1901
      1901
      184808
    
    
      2016-01-31
      1207
      1207
      1207
      1207
      186015
    
    
      2016-02-29
      2076
      2076
      2076
      2076
      188091
    
    
      2016-03-31
      2604
      2604
      2604
      2604
      190695
    
    
      2016-04-30
      2352
      2352
      2352
      2352
      193047
    
    
      2016-05-31
      2044
      2044
      2044
      2044
      195091
    
    
      2016-06-30
      2371
      2371
      2371
      2371
      197462
    
    
      2016-07-31
      2114
      2114
      2114
      2114
      199576
    
    
      2016-08-31
      2240
      2240
      2240
      2240
      201816
    
    
      2016-09-30
      2177
      2177
      2177
      2177
      203993
    
    
      2016-10-31
      3130
      3130
      3130
      3130
      207123
    
    
      2016-11-30
      2487
      2487
      2487
      2487
      209610
    
    
      2016-12-31
      2033
      2033
      2033
      2033
      211643
    
    
      2017-01-31
      1981
      1981
      1981
      1981
      213624
    
    
      2017-02-28
      2437
      2437
      2437
      2437
      216061
    
    
      2017-03-31
      3001
      3001
      3001
      3001
      219062
    
    
      2017-04-30
      2578
      2578
      2578
      2578
      221640
    
    
      2017-05-31
      2424
      2424
      2424
      2424
      224064
    
    
      2017-06-30
      2365
      2365
      2365
      2365
      226429
    
    
      2017-07-31
      2040
      2040
      2040
      2040
      228469
    
    
      2017-08-31
      332
      332
      332
      332
      228801
    
  

154 rows × 5 columns



In [38]:

    
commits_per_month['commits_cum'].plot()









    Out[38]:





<matplotlib.axes._subplots.AxesSubplot at 0x24f4ce88a58>



In [ ]:

    
commits.



In [77]:

    
%matplotlib inline
commits_raw['time'].plot()









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-77-47f2e92a8e6f> in <module>()
      1 get_ipython().magic('matplotlib inline')
----> 2 commits_raw['time'].plot()

C:\dev\apps\Anaconda3\lib\site-packages\pandas\plotting\_core.py in __call__(self, kind, ax, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, label, secondary_y, **kwds)
   2444                            colormap=colormap, table=table, yerr=yerr,
   2445                            xerr=xerr, label=label, secondary_y=secondary_y,
-> 2446                            **kwds)
   2447     __call__.__doc__ = plot_series.__doc__
   2448 

C:\dev\apps\Anaconda3\lib\site-packages\pandas\plotting\_core.py in plot_series(data, kind, ax, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, label, secondary_y, **kwds)
   1886                  yerr=yerr, xerr=xerr,
   1887                  label=label, secondary_y=secondary_y,
-> 1888                  **kwds)
   1889 
   1890 

C:\dev\apps\Anaconda3\lib\site-packages\pandas\plotting\_core.py in _plot(data, x, y, subplots, ax, kind, **kwds)
   1680         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   1681 
-> 1682     plot_obj.generate()
   1683     plot_obj.draw()
   1684     return plot_obj.result

C:\dev\apps\Anaconda3\lib\site-packages\pandas\plotting\_core.py in generate(self)
    234     def generate(self):
    235         self._args_adjust()
--> 236         self._compute_plot_data()
    237         self._setup_subplots()
    238         self._make_plot()

C:\dev\apps\Anaconda3\lib\site-packages\pandas\plotting\_core.py in _compute_plot_data(self)
    343         if is_empty:
    344             raise TypeError('Empty {0!r}: no numeric data to '
--> 345                             'plot'.format(numeric_data.__class__.__name__))
    346 
    347         self.data = numeric_data

TypeError: Empty 'DataFrame': no numeric data to plot



In [73]:

    
commits_raw['unix'].dtypes









    Out[73]:





dtype('int64')



In [38]:

    
commits_raw['time'] = pd.to_datetime(commits_raw['timestamp'])
commits_raw.dtypes









    Out[38]:





sha                  object
date                 object
timestamp             int64
author               object
time         datetime64[ns]
dtype: object



In [ ]:

    
commits_raw.head()



In [ ]:

    
commits_raw



In [1]:

    
import git 

GIT_LOG_FILE = r'C:/dev/repos/intellij-community/'
repo = git.Repo(GIT_LOG_FILE, odbt=git.GitCmdObjectDB)
g = repo.git
log = g.log('--all', '--numstat', '--no-renames', '--pretty=format:#%h#%ad#%aN')
log[0:100]









    Out[1]:





'#a0fc4d3828f#Mon Aug 7 17:23:17 2017 +0200#peter\n3\t9\tjava/java-analysis-impl/src/com/intellij/codeIn'

After this, we have to tell Git which information we want. We can do this via the pretty-format option.

For each commit, we choose to create a header line with the following commit info (by using --pretty=format:'--%h--%ad--%aN'), which gives us the following output:

--fa1ca6f--Thu Dec 22 08:04:18 2016 +0100--feststelltaste

It contains the SHA key, the timestamp as well as the author's name of the commit, separated by a character that isn't certaninly in these information--. My favorite separator for this is \u0012

We also want to have some details about the modifications of each file per commit. This is why we use the --numstat flag.

Together with the --all flag to get all commits and the --no-renames flag to avoid commits that only rename files, we retrieve all the needed information directly via Git.

For each other row, we got some statistics about the modified files:

2   0   src/main/asciidoc/appendices/bibliography.adoc

It contains the number of lines inserted, the number of lines deleted and the relative path of the file. With a little trick and a little bit of data wrangling, we can read that information into a nicely structured DataFrame.

The first entries of that file look something like this:

Let's get started!

Import the data

First, I'll show you my approach on how to read nearly everything into a DataFrame. The key is to use Pandas' read_csv for reading "non-character separated values". How to do that? We simply choose a separator that doesn't occur in the file that we want to read. My favorite character for this is the "DEVICE CONTROL TWO" character U+0012. I haven't encountered a situation yet where this character was included in a data set.

We just read our git.log file without any headers (because there are none) and give the only column a nice name.



In [3]:

    
import pandas as pd
from io import StringIO

commits_raw = pd.read_csv(StringIO(log), 
    sep="#",
    header=None,
    names=['file_stats','sha', 'date', 'author'])
commits_raw.head()









    Out[3]:







  
    
      
      file_stats
      sha
      date
      author
    
  
  
    
      0
      NaN
      a0fc4d3828f
      Mon Aug 7 17:23:17 2017 +0200
      peter
    
    
      1
      3\t9\tjava/java-analysis-impl/src/com/intellij...
      NaN
      NaN
      NaN
    
    
      2
      17\t0\tjava/java-tests/testData/inspection/nul...
      NaN
      NaN
      NaN
    
    
      3
      18\t0\tjava/java-tests/testData/inspection/nul...
      NaN
      NaN
      NaN
    
    
      4
      6\t0\tjava/java-tests/testSrc/com/intellij/jav...
      NaN
      NaN
      NaN

Data Wrangling

OK, but now we have a ~~problem~~ data wrangling challenge. We have the commit info as well as the statistic for the modified file in one column, but they don't belong together. What we want is to have the commit info along with the file statistics in separate columns to get some serious analysis started.

Commit info

Let's treat the commit info first. Luckily, we set some kind of anchor or marker to identify the commit info: Each commit info starts with a --. So let's extract all the commit info from the original commits DataFrame.



In [5]:

    
commit_metadata = commits_raw[['sha', 'date', 'author']].fillna(method='ffill')
commit_metadata.head(5)









    Out[5]:







  
    
      
      sha
      date
      author
    
  
  
    
      0
      a0fc4d3828f
      Mon Aug 7 17:23:17 2017 +0200
      peter
    
    
      1
      a0fc4d3828f
      Mon Aug 7 17:23:17 2017 +0200
      peter
    
    
      2
      a0fc4d3828f
      Mon Aug 7 17:23:17 2017 +0200
      peter
    
    
      3
      a0fc4d3828f
      Mon Aug 7 17:23:17 2017 +0200
      peter
    
    
      4
      a0fc4d3828f
      Mon Aug 7 17:23:17 2017 +0200
      peter

With this, we can focus on extracting the information of a commit info row. The next command could be looking a little frightening, but don't worry. We go through it step by step.



In [6]:

    
file_info = commits_raw['file_stats'].dropna().str.split("\t", expand=True)
file_info.columns = ['additions', "deletions", "filename"]
file_info.head()









    Out[6]:







  
    
      
      additions
      deletions
      filename
    
  
  
    
      1
      3
      9
      java/java-analysis-impl/src/com/intellij/codeI...
    
    
      2
      17
      0
      java/java-tests/testData/inspection/nullablePr...
    
    
      3
      18
      0
      java/java-tests/testData/inspection/nullablePr...
    
    
      4
      6
      0
      java/java-tests/testSrc/com/intellij/java/code...
    
    
      6
      3
      3
      java/java-psi-impl/src/com/intellij/psi/scope/...



In [7]:

    
file_info['additions'] = pd.to_numeric(file_info['additions'], errors='coerce')
file_info['deletions'] = pd.to_numeric(file_info['deletions'], errors='coerce')
file_info.dtypes









    Out[7]:





additions    float64
deletions    float64
filename      object
dtype: object



In [8]:

    
commits = commit_metadata.join(file_info, how='right')
commits = commits.dropna()
commits.head()









    Out[8]:







  
    
      
      sha
      date
      author
      additions
      deletions
      filename
    
  
  
    
      1
      a0fc4d3828f
      Mon Aug 7 17:23:17 2017 +0200
      peter
      3.0
      9.0
      java/java-analysis-impl/src/com/intellij/codeI...
    
    
      2
      a0fc4d3828f
      Mon Aug 7 17:23:17 2017 +0200
      peter
      17.0
      0.0
      java/java-tests/testData/inspection/nullablePr...
    
    
      3
      a0fc4d3828f
      Mon Aug 7 17:23:17 2017 +0200
      peter
      18.0
      0.0
      java/java-tests/testData/inspection/nullablePr...
    
    
      4
      a0fc4d3828f
      Mon Aug 7 17:23:17 2017 +0200
      peter
      6.0
      0.0
      java/java-tests/testSrc/com/intellij/java/code...
    
    
      6
      dec0d165974
      Mon Aug 7 18:22:46 2017 +0200
      Anna.Kozlova
      3.0
      3.0
      java/java-psi-impl/src/com/intellij/psi/scope/...



In [87]:

    
commits.groupby('author').sum()[['additions']].sort_values(by='additions', ascending=False)









    Out[87]:







  
    
      
      additions
    
    
      author
      
    
  
  
    
      Dmitry Jemerov
      3415183.0
    
    
      Maxim Shafirov
      1317300.0
    
    
      Alexey Kudravtsev
      1091957.0
    
    
      Dmitry Trofimov
      836951.0
    
    
      Ekaterina Tuzova
      834714.0
    
    
      Anna Kozlova
      807367.0
    
    
      Roman Shevchenko
      761080.0
    
    
      Dmitry Avdeev
      648392.0
    
    
      no_reply@jetbrains.com
      563445.0
    
    
      Mikhail Golubev
      522789.0
    
    
      Eugene Kudelevsky
      485821.0
    
    
      Bas Leijdekkers
      471711.0
    
    
      irengrig
      457734.0
    
    
      peter
      415278.0
    
    
      Vladimir Krivosheev
      396594.0
    
    
      Anton Makeev
      389985.0
    
    
      Kirill Likhodedov
      385721.0
    
    
      Aleksey Pivovarov
      384356.0
    
    
      Eugene Zhuravlev
      362103.0
    
    
      Elizaveta Shashkova
      359524.0
    
    
      Alexey Gopachenko
      337642.0
    
    
      nik
      317163.0
    
    
      Anna.Kozlova
      316704.0
    
    
      Andrey Vlasovskikh
      306105.0
    
    
      anna
      301108.0
    
    
      Sergey Ignatov
      265608.0
    
    
      Denis.Zhdanov
      240963.0
    
    
      Maxim.Mossienko
      240159.0
    
    
      Peter Gromov
      234667.0
    
    
      Konstantin Bulenkov
      207978.0
    
    
      ...
      ...
    
    
      spLeaner
      2.0
    
    
      Marcus Hüsgen
      2.0
    
    
      Evgeny Naumenko
      2.0
    
    
      Sergey Zhukov
      2.0
    
    
      Stephen Friedrich
      2.0
    
    
      Tobias Bieniek
      2.0
    
    
      Ben Mintz
      2.0
    
    
      Dmitry Zhuravlev
      2.0
    
    
      suman-ganta
      1.0
    
    
      ryenus
      1.0
    
    
      Adam Batkin
      1.0
    
    
      Matthew Gharrity
      1.0
    
    
      Lucas Smaira
      1.0
    
    
      Amaury
      1.0
    
    
      Leonya Khachaturov
      1.0
    
    
      Raluca Sauciuc
      1.0
    
    
      Roma Leventov
      1.0
    
    
      Daniel Espendiller
      1.0
    
    
      Igor Mikhailuk
      1.0
    
    
      Sergej Zagursky
      1.0
    
    
      Sergey Golovachev
      1.0
    
    
      dmitrii.kravchenko
      1.0
    
    
      Anton Lobov
      1.0
    
    
      auchri
      1.0
    
    
      Chetan.Mehrotra
      1.0
    
    
      Deepanshu Gupta
      1.0
    
    
      Matthias Vill
      1.0
    
    
      Ladislav Thon
      0.0
    
    
      Ryan Prichard
      0.0
    
    
      animaru
      0.0
    
  

454 rows × 1 columns

OK, this part is ready, let's have a look at the file statistics!

We're done!

Complete code block

To much code to look through? Here is everything from above in a condensed format.

Just some milliseconds to run through, not bad!

Summary

In this notebook, I showed you how to read some non-perfect structured data via the non-character separator trick. I also showed you how to transform the rows that contain multiple kinds of data into one nicely structured DataFrame.

Now that we have the Git repository DataFrame, we can do some nice things with it e. g. visualizing the code churn of a project, but that's a story for another notebook! But to give you a short preview:



In [88]:

    
%matplotlib inline
timed_commits = commits.set_index(pd.DatetimeIndex(commits['date']))[['insertions', 'deletions']].resample('1D').sum()
(timed_commits['insertions'] - timed_commits['deletions']).cumsum().fillna(method='ffill').plot()









    



---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-88-aebe6ddd42b6> in <module>()
      1 get_ipython().magic('matplotlib inline')
----> 2 timed_commits = commits.set_index(pd.DatetimeIndex(commits['date']))[['insertions', 'deletions']].resample('1D').sum()
      3 (timed_commits['insertions'] - timed_commits['deletions']).cumsum().fillna(method='ffill').plot()

C:\dev\apps\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   2054         if isinstance(key, (Series, np.ndarray, Index, list)):
   2055             # either boolean or fancy integer index
-> 2056             return self._getitem_array(key)
   2057         elif isinstance(key, DataFrame):
   2058             return self._getitem_frame(key)

C:\dev\apps\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_array(self, key)
   2098             return self.take(indexer, axis=0, convert=False)
   2099         else:
-> 2100             indexer = self.loc._convert_to_indexer(key, axis=1)
   2101             return self.take(indexer, axis=1, convert=True)
   2102 

C:\dev\apps\Anaconda3\lib\site-packages\pandas\core\indexing.py in _convert_to_indexer(self, obj, axis, is_setter)
   1229                 mask = check == -1
   1230                 if mask.any():
-> 1231                     raise KeyError('%s not in index' % objarr[mask])
   1232 
   1233                 return _values_from_object(indexer)

KeyError: "['insertions'] not in index"



In [ ]:

    
%matplotlib inline
commits['author'].value_counts().plot(kind='pie', figsize=(10,10))

Stay tuned!

	sha	timestamp	author
0	a0fc4d3828f	1502119397	peter
1	dec0d165974	1502122966	Anna.Kozlova
2	f1fd8e4bc7a	1502122584	Egor.Ushakov
3	f965b772530	1502121596	Alexey Kudravtsev
4	62759679eb4	1502121252	Alexey Kudravtsev

	sha	timestamp	author
timestamp
2017-08-07 15:23:17	a0fc4d3828f	1502119397	peter
2017-08-07 16:22:46	dec0d165974	1502122966	Anna.Kozlova
2017-08-07 16:16:24	f1fd8e4bc7a	1502122584	Egor.Ushakov
2017-08-07 15:59:56	f965b772530	1502121596	Alexey Kudravtsev
2017-08-07 15:54:12	62759679eb4	1502121252	Alexey Kudravtsev

	sha	timestamp	author	time	commits_cum
time
2004-11-30	18	18	18	18	18
2004-12-31	29	29	29	29	47
2005-01-31	380	380	380	380	427
2005-02-28	595	595	595	595	1022
2005-03-31	664	664	664	664	1686
2005-04-30	601	601	601	601	2287
2005-05-31	669	669	669	669	2956
2005-06-30	1046	1046	1046	1046	4002
2005-07-31	1181	1181	1181	1181	5183
2005-08-31	428	428	428	428	5611
2005-09-30	362	362	362	362	5973
2005-10-31	403	403	403	403	6376
2005-11-30	594	594	594	594	6970
2005-12-31	526	526	526	526	7496
2006-01-31	744	744	744	744	8240
2006-02-28	661	661	661	661	8901
2006-03-31	706	706	706	706	9607
2006-04-30	741	741	741	741	10348
2006-05-31	738	738	738	738	11086
2006-06-30	846	846	846	846	11932
2006-07-31	1161	1161	1161	1161	13093
2006-08-31	683	683	683	683	13776
2006-09-30	919	919	919	919	14695
2006-10-31	752	752	752	752	15447
2006-11-30	597	597	597	597	16044
2006-12-31	524	524	524	524	16568
2007-01-31	690	690	690	690	17258
2007-02-28	684	684	684	684	17942
2007-03-31	962	962	962	962	18904
2007-04-30	796	796	796	796	19700
...	...	...	...	...	...
2015-03-31	2467	2467	2467	2467	165752
2015-04-30	1943	1943	1943	1943	167695
2015-05-31	1737	1737	1737	1737	169432
2015-06-30	1680	1680	1680	1680	171112
2015-07-31	1965	1965	1965	1965	173077
2015-08-31	2012	2012	2012	2012	175089
2015-09-30	2360	2360	2360	2360	177449
2015-10-31	3252	3252	3252	3252	180701
2015-11-30	2206	2206	2206	2206	182907
2015-12-31	1901	1901	1901	1901	184808
2016-01-31	1207	1207	1207	1207	186015
2016-02-29	2076	2076	2076	2076	188091
2016-03-31	2604	2604	2604	2604	190695
2016-04-30	2352	2352	2352	2352	193047
2016-05-31	2044	2044	2044	2044	195091
2016-06-30	2371	2371	2371	2371	197462
2016-07-31	2114	2114	2114	2114	199576
2016-08-31	2240	2240	2240	2240	201816
2016-09-30	2177	2177	2177	2177	203993
2016-10-31	3130	3130	3130	3130	207123
2016-11-30	2487	2487	2487	2487	209610
2016-12-31	2033	2033	2033	2033	211643
2017-01-31	1981	1981	1981	1981	213624
2017-02-28	2437	2437	2437	2437	216061
2017-03-31	3001	3001	3001	3001	219062
2017-04-30	2578	2578	2578	2578	221640
2017-05-31	2424	2424	2424	2424	224064
2017-06-30	2365	2365	2365	2365	226429
2017-07-31	2040	2040	2040	2040	228469
2017-08-31	332	332	332	332	228801

	file_stats	sha	date	author
0	NaN	a0fc4d3828f	Mon Aug 7 17:23:17 2017 +0200	peter
1	3\t9\tjava/java-analysis-impl/src/com/intellij...	NaN	NaN	NaN
2	17\t0\tjava/java-tests/testData/inspection/nul...	NaN	NaN	NaN
3	18\t0\tjava/java-tests/testData/inspection/nul...	NaN	NaN	NaN
4	6\t0\tjava/java-tests/testSrc/com/intellij/jav...	NaN	NaN	NaN

	additions	deletions	filename
1	3	9	java/java-analysis-impl/src/com/intellij/codeI...
2	17	0	java/java-tests/testData/inspection/nullablePr...
3	18	0	java/java-tests/testData/inspection/nullablePr...
4	6	0	java/java-tests/testSrc/com/intellij/java/code...
6	3	3	java/java-psi-impl/src/com/intellij/psi/scope/...

	additions
author
Dmitry Jemerov	3415183.0
Maxim Shafirov	1317300.0
Alexey Kudravtsev	1091957.0
Dmitry Trofimov	836951.0
Ekaterina Tuzova	834714.0
Anna Kozlova	807367.0
Roman Shevchenko	761080.0
Dmitry Avdeev	648392.0
no_reply@jetbrains.com	563445.0
Mikhail Golubev	522789.0
Eugene Kudelevsky	485821.0
Bas Leijdekkers	471711.0
irengrig	457734.0
peter	415278.0
Vladimir Krivosheev	396594.0
Anton Makeev	389985.0
Kirill Likhodedov	385721.0
Aleksey Pivovarov	384356.0
Eugene Zhuravlev	362103.0
Elizaveta Shashkova	359524.0
Alexey Gopachenko	337642.0
nik	317163.0
Anna.Kozlova	316704.0
Andrey Vlasovskikh	306105.0
anna	301108.0
Sergey Ignatov	265608.0
Denis.Zhdanov	240963.0
Maxim.Mossienko	240159.0
Peter Gromov	234667.0
Konstantin Bulenkov	207978.0
...	...
spLeaner	2.0
Marcus Hüsgen	2.0
Evgeny Naumenko	2.0
Sergey Zhukov	2.0
Stephen Friedrich	2.0
Tobias Bieniek	2.0
Ben Mintz	2.0
Dmitry Zhuravlev	2.0
suman-ganta	1.0
ryenus	1.0
Adam Batkin	1.0
Matthew Gharrity	1.0
Lucas Smaira	1.0
Amaury	1.0
Leonya Khachaturov	1.0
Raluca Sauciuc	1.0
Roma Leventov	1.0
Daniel Espendiller	1.0
Igor Mikhailuk	1.0
Sergej Zagursky	1.0
Sergey Golovachev	1.0
dmitrii.kravchenko	1.0
Anton Lobov	1.0
auchri	1.0
Chetan.Mehrotra	1.0
Deepanshu Gupta	1.0
Matthias Vill	1.0
Ladislav Thon	0.0
Ryan Prichard	0.0
animaru	0.0