git log --oneline --pretty="" --numstat > git_oneline_numstat.log


In [29]:
import pandas as pd

commits = pd.read_csv("../../../intellij-community/git_oneline_numstat.log", 
            header=None, 
            sep='\t',
            names=["additions", "deletions", "filename"])
commits.head()


Out[29]:
additions deletions filename
0 5 1 platform/platform-api/src/com/intellij/openapi...
1 12 6 platform/lang-impl/src/com/intellij/ide/projec...
2 13 43 java/java-analysis-impl/src/com/intellij/codeI...
3 1 1 java/java-analysis-impl/src/com/intellij/codeI...
4 0 1 java/java-psi-api/src/com/intellij/psi/PsiUses...

In [30]:
commits.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 826173 entries, 0 to 826172
Data columns (total 3 columns):
additions    826173 non-null object
deletions    826173 non-null object
filename     826173 non-null object
dtypes: object(3)
memory usage: 18.9+ MB

In [32]:
commits['additions'] = pd.to_numeric(commits['additions'], errors='coerce')
commits['deletions'] = pd.to_numeric(commits['deletions'], errors='coerce')
commits.head()


Out[32]:
additions deletions filename
0 5.0 1.0 platform/platform-api/src/com/intellij/openapi...
1 12.0 6.0 platform/lang-impl/src/com/intellij/ide/projec...
2 13.0 43.0 java/java-analysis-impl/src/com/intellij/codeI...
3 1.0 1.0 java/java-analysis-impl/src/com/intellij/codeI...
4 0.0 1.0 java/java-psi-api/src/com/intellij/psi/PsiUses...

In [37]:
changes_per_file=commits.groupby('filename').count()\
.sort_values(by="additions", ascending=False)
changes_per_file.tail(10)


Out[37]:
additions deletions
filename
plugins/junit5_rt/lib/junit-vintage-engine-4.12.0-M2.jar 0 0
plugins/junit5_rt/lib/junit-vintage-engine-4.12.0-M5.jar 0 0
plugins/junit5_rt/lib/opentest4j-1.0.0-M1.jar 0 0
plugins/junit5_rt/lib/junit-vintage-engine-4.12.0-M6.jar 0 0
plugins/junit5_rt/lib/junit-vintage-engine-4.12.0-RC2.jar 0 0
plugins/junit5_rt/lib/junit-vintage-engine-4.12.0.jar 0 0
plugins/junit5_rt/lib/junit4-engine-5.0.0-ALPHA.jar 0 0
plugins/junit5_rt/lib/junit4-runner-5.0.0-ALPHA.jar 0 0
plugins/junit5_rt/lib/junit5-engine-5.0.0-ALPHA.jar 0 0
platform/icons/src/general/comboUpPassive@2x.png 0 0

In [26]:
changes_per_file[changes_per_file['additions'] > changes_per_file['additions'].quantile(0.9999)]


Out[26]:
additions deletions
filename
resources/src/META-INF/IdeaPlugin.xml 920 920
platform/util/resources/misc/registry.properties 889 889
platform/platform-impl/src/com/intellij/openapi/editor/impl/EditorImpl.java 849 849
plugins/groovy/src/META-INF/plugin.xml 791 791
python/src/META-INF/python-plugin-common.xml 601 601
platform/platform-resources/src/META-INF/LangExtensions.xml 545 545
plugins/groovy/src/org/jetbrains/plugins/groovy/annotator/GroovyAnnotator.java 541 541
platform/util/src/com/intellij/util/ui/UIUtil.java 535 535
plugins/InspectionGadgets/src/com/siyeh/InspectionGadgetsBundle.properties 523 523
resources/src/idea/ActionManager.xml 516 516
platform/platform-resources-en/src/misc/registry.properties 435 435
platform/platform-resources-en/src/messages/ActionsBundle.properties 414 414
python/src/com/jetbrains/python/PyBundle.properties 385 385
platform/lang-impl/src/com/intellij/codeInsight/lookup/impl/LookupImpl.java 385 385
platform/testFramework/src/com/intellij/testFramework/fixtures/impl/CodeInsightTestFixtureImpl.java 384 384
build/scripts/layouts.gant 373 373
platform/platform-impl/src/com/intellij/openapi/application/impl/ApplicationImpl.java 360 360
plugins/InspectionGadgets/InspectionGadgetsAnalysis/src/com/siyeh/InspectionGadgetsBundle.properties 356 356
java/java-psi-impl/src/com/intellij/psi/impl/source/resolve/graphInference/InferenceSession.java 354 354
platform/lang-impl/src/com/intellij/util/indexing/FileBasedIndexImpl.java 352 352
platform/platform-resources-en/src/messages/IdeBundle.properties 344 344
platform/platform-resources/src/idea/PlatformActions.xml 344 344

In [4]:
changes_per_file.reset_index()['additions'].plot(logy=True)


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x21c4fac5518>