In [7]:
import pandas as pd
# reading
git_log = pd.read_csv(
"../../../buschmais-spring-petclinic/git_log_numstat.log",
sep="\t",
header=None,
names=[
'additions',
'deletions',
'filename',
'author'])
# converting in "one line"
log = git_log[['additions', 'deletions', 'filename']]\
.join(git_log[['author']]\
.fillna(method='ffill'))\
.dropna()
log['additions'] = pd.to_numeric(log['additions'], errors='coerce')
log['deletions'] = pd.to_numeric(log['deletions'], errors='coerce')
log['filename'] = log.filename.str.replace("org/springframework/samples/", "")
log[log['filename'].str.endswith(".java")].to_csv("git_log_prepared.log", columns=['author','filename','additions', 'deletions'], index=None)
log
Out[7]:
additions
deletions
filename
author
1
4.0
5.0
src/test/java/petclinic/web/CrashControllerTes...
Antoine Rey
2
25.0
7.0
src/test/java/petclinic/web/OwnerControllerTes...
Antoine Rey
3
21.0
9.0
src/test/java/petclinic/web/PetControllerTests...
Antoine Rey
4
23.0
3.0
src/test/java/petclinic/web/VetControllerTests...
Antoine Rey
5
10.0
6.0
src/test/java/petclinic/web/VisitControllerTes...
Antoine Rey
7
111.0
0.0
src/main/java/petclinic/PetclinicInitializer.java
Antoine Rey
9
4.0
14.0
src/main/java/petclinic/repository/jdbc/JdbcPe...
Antoine Rey
11
2.0
2.0
src/main/java/petclinic/web/VetController.java
Antoine Rey
12
14.0
14.0
src/test/java/petclinic/web/VetControllerTests...
Antoine Rey
14
22.0
5.0
src/main/java/petclinic/repository/jdbc/JdbcVi...
Attilio
15
1.0
1.0
src/main/java/petclinic/repository/jpa/JpaVisi...
Attilio
16
2.0
0.0
src/main/java/petclinic/service/ClinicService....
Attilio
17
5.0
0.0
src/main/java/petclinic/service/ClinicServiceI...
Attilio
18
10.0
0.0
src/test/java/petclinic/service/AbstractClinic...
Attilio
20
1.0
1.0
src/main/java/petclinic/model/BaseEntity.java
Faisal Hameed
21
5.0
4.0
src/main/java/petclinic/web/OwnerController.java
Faisal Hameed
22
7.0
6.0
src/main/java/petclinic/web/PetController.java
Faisal Hameed
23
5.0
3.0
src/main/java/petclinic/web/PetValidator.java
Faisal Hameed
24
1.0
1.0
src/test/java/petclinic/service/AbstractClinic...
Faisal Hameed
26
53.0
0.0
src/test/java/petclinic/web/CrashControllerTes...
Colin But
27
167.0
0.0
src/test/java/petclinic/web/OwnerControllerTes...
Colin But
28
113.0
0.0
src/test/java/petclinic/web/PetControllerTests...
Colin But
29
76.0
0.0
src/test/java/petclinic/web/PetTypeFormatterTe...
Colin But
30
22.0
6.0
src/test/java/petclinic/web/VetControllerTests...
Colin But
31
79.0
0.0
src/test/java/petclinic/web/VisitControllerTes...
Colin But
33
6.0
6.0
src/main/java/petclinic/model/Visit.java
Tomas Repel
34
2.0
2.0
src/main/java/petclinic/repository/jdbc/JdbcVi...
Tomas Repel
36
5.0
4.0
src/main/java/petclinic/model/Pet.java
Antoine Rey
37
2.0
1.0
src/main/java/petclinic/repository/jdbc/JdbcPe...
Antoine Rey
38
2.0
1.0
src/test/java/petclinic/service/AbstractClinic...
Antoine Rey
...
...
...
...
...
1052
44.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1053
21.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1054
5.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1055
25.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1056
41.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1057
9.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1058
15.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1059
0.0
5.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1060
0.0
25.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1061
0.0
41.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1062
0.0
9.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1063
0.0
15.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1065
1.0
1.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1066
0.0
1.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1067
6.0
1.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1069
2.0
1.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1070
21.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1071
15.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1073
14.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1074
26.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1075
36.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1076
13.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1077
39.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1078
5.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1079
25.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1080
41.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1081
9.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1082
10.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1083
9.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
1084
5.0
0.0
org.springframework.samples.petclinic/src/main...
Keith Donald
929 rows × 4 columns
In [104]:
def aggregate_additions(group):
df = group.groupby(['author'])[['additions']].sum()
df['all_additions'] = group['additions'].sum()
df['ownership'] = df['additions'] / df['all_additions']
return df
log.groupby('filename').apply(aggregate_additions)
Out[104]:
additions
all_additions
ownership
filename
author
.bowerrc
Antoine Rey
1.0
4.0
0.250000
Dapeng
0.0
4.0
0.000000
Faycal Ihabritane
3.0
4.0
0.750000
.classpath
Costin Leau
43.0
81.0
0.530864
Keith Donald
8.0
81.0
0.098765
Michael Isvy
28.0
81.0
0.345679
Rossen Stoyanchev
2.0
81.0
0.024691
.editorconfig
Antoine Rey
14.0
14.0
1.000000
.gitignore
Antoine Rey
3.0
12.0
0.250000
Cyrille Le Clerc
2.0
12.0
0.166667
Dave Syer
1.0
12.0
0.083333
Michael Isvy
6.0
12.0
0.500000
Patrick Crocker
0.0
12.0
0.000000
.mvn/wrapper/maven-wrapper.jar
Patrick Crocker
NaN
0.0
NaN
.mvn/wrapper/maven-wrapper.properties
Patrick Crocker
1.0
1.0
1.000000
.project
Costin Leau
36.0
59.0
0.610169
Keith Donald
16.0
59.0
0.271186
Michael Isvy
5.0
59.0
0.084746
Rossen Stoyanchev
2.0
59.0
0.033898
.settings/.jsdtscope
Keith Donald
11.0
13.0
0.846154
Michael Isvy
2.0
13.0
0.153846
.settings/com.springsource.server.ide.jdt.core.xml
Keith Donald
2.0
2.0
1.000000
Michael Isvy
0.0
2.0
0.000000
.settings/org.eclipse.jdt.core.prefs
Keith Donald
12.0
16.0
0.750000
Michael Isvy
4.0
16.0
0.250000
.settings/org.eclipse.jst.common.project.facet.core.prefs
Keith Donald
3.0
3.0
1.000000
Michael Isvy
0.0
3.0
0.000000
.settings/org.eclipse.m2e.core.prefs
Michael Isvy
4.0
4.0
1.000000
.settings/org.eclipse.m2e.wtp.prefs
Michael Isvy
2.0
2.0
1.000000
.settings/org.eclipse.wst.common.component
Costin Leau
10.0
38.0
0.263158
...
...
...
...
...
src/test/resources/logback-test.xml
Gordon Dickens
24.0
24.0
1.000000
Michael Isvy
0.0
24.0
0.000000
src/test/resources/org/springframework/samples/petclinic/AbstractClinicTests-context.xml
Chris Beams
6.0
52.0
0.115385
Costin Leau
22.0
52.0
0.423077
Keith Donald
24.0
52.0
0.461538
Michael Isvy
0.0
52.0
0.000000
src/test/resources/org/springframework/samples/petclinic/hibernate/HibernateClinicTests-context.xml
Costin Leau
32.0
35.0
0.914286
Michael Isvy
3.0
35.0
0.085714
src/test/resources/org/springframework/samples/petclinic/jdbc/JdbcClinicTests-context.xml
Michael Isvy
30.0
30.0
1.000000
src/test/resources/org/springframework/samples/petclinic/jdbc/SimpleJdbcClinicTests-context.xml
Costin Leau
11.0
11.0
1.000000
Michael Isvy
0.0
11.0
0.000000
src/test/resources/org/springframework/samples/petclinic/jpa/applicationContext-entityManager.xml
Costin Leau
16.0
17.0
0.941176
Michael Isvy
1.0
17.0
0.058824
src/test/resources/org/springframework/samples/petclinic/jpa/applicationContext-hibernateAdapter.xml
Costin Leau
9.0
9.0
1.000000
Michael Isvy
0.0
9.0
0.000000
src/test/resources/org/springframework/samples/petclinic/jpa/applicationContext-jpaCommon.xml
Chris Beams
8.0
62.0
0.129032
Costin Leau
31.0
62.0
0.500000
Keith Donald
23.0
62.0
0.370968
Michael Isvy
0.0
62.0
0.000000
src/test/resources/org/springframework/samples/petclinic/jpa/applicationContext-openJpaAdapter.xml
Costin Leau
9.0
9.0
1.000000
Michael Isvy
0.0
9.0
0.000000
src/test/resources/org/springframework/samples/petclinic/jpa/applicationContext-toplinkAdapter.xml
Costin Leau
9.0
9.0
1.000000
Michael Isvy
0.0
9.0
0.000000
src/test/resources/spring/mvc-test-config.xml
Antoine Rey
12.0
12.0
1.000000
src/{main => test}/java/org/springframework/samples/petclinic/util/EntityUtils.java
Dave Syer
0.0
0.0
NaN
travis.yml
Michael Isvy
3.0
3.0
1.000000
travis.yml => .travis.yml
Michael Isvy
0.0
0.0
NaN
{org.springframework.samples.petclinic/src/main/webapp => src/main/webapp/resources}/images/banner-graphic.png
Michael Isvy
NaN
0.0
NaN
{org.springframework.samples.petclinic/src/main/webapp => src/main/webapp/resources}/images/pets.png
Michael Isvy
NaN
0.0
NaN
{org.springframework.samples.petclinic/src/main/webapp => src/main/webapp/resources}/images/springsource-logo.png
Michael Isvy
NaN
0.0
NaN
2647 rows × 3 columns
In [ ]:
log.groupby(['filename', 'author'])[['additions']].agg
In [22]:
log.groupby(['filename', 'author'], level=0)[['additions']].sum()
Out[22]:
additions
1
2
3
1
5
1
8
1
9
11
11
10
12
20
13
0
15
1
16
2
17
17
19
5
20
1
21
1
22
43
24
2
26
2
28
1
30
0
32
1
34
4
36
3
37
5
38
1
39
10
40
23
42
1
44
0
45
2
47
9
...
...
5048
12
5049
3
5050
10
5051
7
5052
6
5053
9
5054
3
5055
67
5056
13
5057
106
5058
14
5059
26
5060
36
5061
13
5062
39
5063
5
5064
25
5065
41
5066
9
5067
10
5068
9
5069
5
5070
42
5071
12
5072
14
5073
36
5074
12
5075
36
5076
42
5078
4
4520 rows × 1 columns
Content source: feststelltaste/software-analytics
Similar notebooks: