In [7]:
import pandas as pd
# reading
git_log = pd.read_csv(
    "../../../buschmais-spring-petclinic/git_log_numstat.log",
    sep="\t", 
    header=None,
    names=[
        'additions', 
        'deletions', 
        'filename', 
        'author'])

# converting in "one line"
log = git_log[['additions', 'deletions', 'filename']]\
    .join(git_log[['author']]\
    .fillna(method='ffill'))\
    .dropna()
    
log['additions'] = pd.to_numeric(log['additions'], errors='coerce')
log['deletions'] = pd.to_numeric(log['deletions'], errors='coerce')

log['filename'] = log.filename.str.replace("org/springframework/samples/", "")
log[log['filename'].str.endswith(".java")].to_csv("git_log_prepared.log", columns=['author','filename','additions', 'deletions'], index=None)
log


Out[7]:
additions deletions filename author
1 4.0 5.0 src/test/java/petclinic/web/CrashControllerTes... Antoine Rey
2 25.0 7.0 src/test/java/petclinic/web/OwnerControllerTes... Antoine Rey
3 21.0 9.0 src/test/java/petclinic/web/PetControllerTests... Antoine Rey
4 23.0 3.0 src/test/java/petclinic/web/VetControllerTests... Antoine Rey
5 10.0 6.0 src/test/java/petclinic/web/VisitControllerTes... Antoine Rey
7 111.0 0.0 src/main/java/petclinic/PetclinicInitializer.java Antoine Rey
9 4.0 14.0 src/main/java/petclinic/repository/jdbc/JdbcPe... Antoine Rey
11 2.0 2.0 src/main/java/petclinic/web/VetController.java Antoine Rey
12 14.0 14.0 src/test/java/petclinic/web/VetControllerTests... Antoine Rey
14 22.0 5.0 src/main/java/petclinic/repository/jdbc/JdbcVi... Attilio
15 1.0 1.0 src/main/java/petclinic/repository/jpa/JpaVisi... Attilio
16 2.0 0.0 src/main/java/petclinic/service/ClinicService.... Attilio
17 5.0 0.0 src/main/java/petclinic/service/ClinicServiceI... Attilio
18 10.0 0.0 src/test/java/petclinic/service/AbstractClinic... Attilio
20 1.0 1.0 src/main/java/petclinic/model/BaseEntity.java Faisal Hameed
21 5.0 4.0 src/main/java/petclinic/web/OwnerController.java Faisal Hameed
22 7.0 6.0 src/main/java/petclinic/web/PetController.java Faisal Hameed
23 5.0 3.0 src/main/java/petclinic/web/PetValidator.java Faisal Hameed
24 1.0 1.0 src/test/java/petclinic/service/AbstractClinic... Faisal Hameed
26 53.0 0.0 src/test/java/petclinic/web/CrashControllerTes... Colin But
27 167.0 0.0 src/test/java/petclinic/web/OwnerControllerTes... Colin But
28 113.0 0.0 src/test/java/petclinic/web/PetControllerTests... Colin But
29 76.0 0.0 src/test/java/petclinic/web/PetTypeFormatterTe... Colin But
30 22.0 6.0 src/test/java/petclinic/web/VetControllerTests... Colin But
31 79.0 0.0 src/test/java/petclinic/web/VisitControllerTes... Colin But
33 6.0 6.0 src/main/java/petclinic/model/Visit.java Tomas Repel
34 2.0 2.0 src/main/java/petclinic/repository/jdbc/JdbcVi... Tomas Repel
36 5.0 4.0 src/main/java/petclinic/model/Pet.java Antoine Rey
37 2.0 1.0 src/main/java/petclinic/repository/jdbc/JdbcPe... Antoine Rey
38 2.0 1.0 src/test/java/petclinic/service/AbstractClinic... Antoine Rey
... ... ... ... ...
1052 44.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1053 21.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1054 5.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1055 25.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1056 41.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1057 9.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1058 15.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1059 0.0 5.0 org.springframework.samples.petclinic/src/main... Keith Donald
1060 0.0 25.0 org.springframework.samples.petclinic/src/main... Keith Donald
1061 0.0 41.0 org.springframework.samples.petclinic/src/main... Keith Donald
1062 0.0 9.0 org.springframework.samples.petclinic/src/main... Keith Donald
1063 0.0 15.0 org.springframework.samples.petclinic/src/main... Keith Donald
1065 1.0 1.0 org.springframework.samples.petclinic/src/main... Keith Donald
1066 0.0 1.0 org.springframework.samples.petclinic/src/main... Keith Donald
1067 6.0 1.0 org.springframework.samples.petclinic/src/main... Keith Donald
1069 2.0 1.0 org.springframework.samples.petclinic/src/main... Keith Donald
1070 21.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1071 15.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1073 14.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1074 26.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1075 36.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1076 13.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1077 39.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1078 5.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1079 25.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1080 41.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1081 9.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1082 10.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1083 9.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald
1084 5.0 0.0 org.springframework.samples.petclinic/src/main... Keith Donald

929 rows × 4 columns


In [104]:
def aggregate_additions(group):
    df = group.groupby(['author'])[['additions']].sum()
    df['all_additions'] = group['additions'].sum()
    df['ownership'] = df['additions'] / df['all_additions']
    return df

log.groupby('filename').apply(aggregate_additions)


Out[104]:
additions all_additions ownership
filename author
.bowerrc Antoine Rey 1.0 4.0 0.250000
Dapeng 0.0 4.0 0.000000
Faycal Ihabritane 3.0 4.0 0.750000
.classpath Costin Leau 43.0 81.0 0.530864
Keith Donald 8.0 81.0 0.098765
Michael Isvy 28.0 81.0 0.345679
Rossen Stoyanchev 2.0 81.0 0.024691
.editorconfig Antoine Rey 14.0 14.0 1.000000
.gitignore Antoine Rey 3.0 12.0 0.250000
Cyrille Le Clerc 2.0 12.0 0.166667
Dave Syer 1.0 12.0 0.083333
Michael Isvy 6.0 12.0 0.500000
Patrick Crocker 0.0 12.0 0.000000
.mvn/wrapper/maven-wrapper.jar Patrick Crocker NaN 0.0 NaN
.mvn/wrapper/maven-wrapper.properties Patrick Crocker 1.0 1.0 1.000000
.project Costin Leau 36.0 59.0 0.610169
Keith Donald 16.0 59.0 0.271186
Michael Isvy 5.0 59.0 0.084746
Rossen Stoyanchev 2.0 59.0 0.033898
.settings/.jsdtscope Keith Donald 11.0 13.0 0.846154
Michael Isvy 2.0 13.0 0.153846
.settings/com.springsource.server.ide.jdt.core.xml Keith Donald 2.0 2.0 1.000000
Michael Isvy 0.0 2.0 0.000000
.settings/org.eclipse.jdt.core.prefs Keith Donald 12.0 16.0 0.750000
Michael Isvy 4.0 16.0 0.250000
.settings/org.eclipse.jst.common.project.facet.core.prefs Keith Donald 3.0 3.0 1.000000
Michael Isvy 0.0 3.0 0.000000
.settings/org.eclipse.m2e.core.prefs Michael Isvy 4.0 4.0 1.000000
.settings/org.eclipse.m2e.wtp.prefs Michael Isvy 2.0 2.0 1.000000
.settings/org.eclipse.wst.common.component Costin Leau 10.0 38.0 0.263158
... ... ... ... ...
src/test/resources/logback-test.xml Gordon Dickens 24.0 24.0 1.000000
Michael Isvy 0.0 24.0 0.000000
src/test/resources/org/springframework/samples/petclinic/AbstractClinicTests-context.xml Chris Beams 6.0 52.0 0.115385
Costin Leau 22.0 52.0 0.423077
Keith Donald 24.0 52.0 0.461538
Michael Isvy 0.0 52.0 0.000000
src/test/resources/org/springframework/samples/petclinic/hibernate/HibernateClinicTests-context.xml Costin Leau 32.0 35.0 0.914286
Michael Isvy 3.0 35.0 0.085714
src/test/resources/org/springframework/samples/petclinic/jdbc/JdbcClinicTests-context.xml Michael Isvy 30.0 30.0 1.000000
src/test/resources/org/springframework/samples/petclinic/jdbc/SimpleJdbcClinicTests-context.xml Costin Leau 11.0 11.0 1.000000
Michael Isvy 0.0 11.0 0.000000
src/test/resources/org/springframework/samples/petclinic/jpa/applicationContext-entityManager.xml Costin Leau 16.0 17.0 0.941176
Michael Isvy 1.0 17.0 0.058824
src/test/resources/org/springframework/samples/petclinic/jpa/applicationContext-hibernateAdapter.xml Costin Leau 9.0 9.0 1.000000
Michael Isvy 0.0 9.0 0.000000
src/test/resources/org/springframework/samples/petclinic/jpa/applicationContext-jpaCommon.xml Chris Beams 8.0 62.0 0.129032
Costin Leau 31.0 62.0 0.500000
Keith Donald 23.0 62.0 0.370968
Michael Isvy 0.0 62.0 0.000000
src/test/resources/org/springframework/samples/petclinic/jpa/applicationContext-openJpaAdapter.xml Costin Leau 9.0 9.0 1.000000
Michael Isvy 0.0 9.0 0.000000
src/test/resources/org/springframework/samples/petclinic/jpa/applicationContext-toplinkAdapter.xml Costin Leau 9.0 9.0 1.000000
Michael Isvy 0.0 9.0 0.000000
src/test/resources/spring/mvc-test-config.xml Antoine Rey 12.0 12.0 1.000000
src/{main => test}/java/org/springframework/samples/petclinic/util/EntityUtils.java Dave Syer 0.0 0.0 NaN
travis.yml Michael Isvy 3.0 3.0 1.000000
travis.yml => .travis.yml Michael Isvy 0.0 0.0 NaN
{org.springframework.samples.petclinic/src/main/webapp => src/main/webapp/resources}/images/banner-graphic.png Michael Isvy NaN 0.0 NaN
{org.springframework.samples.petclinic/src/main/webapp => src/main/webapp/resources}/images/pets.png Michael Isvy NaN 0.0 NaN
{org.springframework.samples.petclinic/src/main/webapp => src/main/webapp/resources}/images/springsource-logo.png Michael Isvy NaN 0.0 NaN

2647 rows × 3 columns


In [ ]:
log.groupby(['filename', 'author'])[['additions']].agg

In [22]:
log.groupby(['filename', 'author'], level=0)[['additions']].sum()


Out[22]:
additions
1 2
3 1
5 1
8 1
9 11
11 10
12 20
13 0
15 1
16 2
17 17
19 5
20 1
21 1
22 43
24 2
26 2
28 1
30 0
32 1
34 4
36 3
37 5
38 1
39 10
40 23
42 1
44 0
45 2
47 9
... ...
5048 12
5049 3
5050 10
5051 7
5052 6
5053 9
5054 3
5055 67
5056 13
5057 106
5058 14
5059 26
5060 36
5061 13
5062 39
5063 5
5064 25
5065 41
5066 9
5067 10
5068 9
5069 5
5070 42
5071 12
5072 14
5073 36
5074 12
5075 36
5076 42
5078 4

4520 rows × 1 columns