In [1]:
import pandas as pd
changes = pd.read_csv("git_log_prepared.log")
changes.head()
Out[1]:
author
filename
additions
deletions
0
Antoine Rey
src/test/java/petclinic/web/CrashControllerTes...
4.0
5.0
1
Antoine Rey
src/test/java/petclinic/web/OwnerControllerTes...
25.0
7.0
2
Antoine Rey
src/test/java/petclinic/web/PetControllerTests...
21.0
9.0
3
Antoine Rey
src/test/java/petclinic/web/VetControllerTests...
23.0
3.0
4
Antoine Rey
src/test/java/petclinic/web/VisitControllerTes...
10.0
6.0
In [3]:
top5_changes = pd.DataFrame(changes.filename.value_counts()).head()
top5_changes
Out[3]:
filename
src/main/java/petclinic/repository/jdbc/JdbcOwnerRepositoryImpl.java
23
src/main/java/petclinic/web/OwnerController.java
19
src/main/java/petclinic/repository/jdbc/JdbcVetRepositoryImpl.java
18
src/main/java/petclinic/web/PetController.java
18
src/main/java/petclinic/repository/jdbc/JdbcVisitRepositoryImpl.java
16
In [4]:
import glob
file_list = glob.glob("../../../buschmais-spring-petclinic/src/**/*.java", recursive=True)
file_list
lines_of_code = []
for filename in file_list:
lines = 0
indent = 0
with open(filename) as f:
for line in f.readlines():
line = line.replace("\t", " ")
# BUG: Just count first whitespaces, not all in line
indent = indent + line.count(" ")
lines = lines + 1
lines_of_code.append([filename, lines, indent])
lines_df = pd.DataFrame(lines_of_code, columns=["file", "lines", "indent"])
lines_df['file'] = lines_df.file.str.replace("\\", "/").str.replace("../../../buschmais-spring-petclinic/", "")
lines_df = lines_df.sort_values(by="lines", ascending=False)
lines_df['file'] = lines_df.file.str.replace("org/springframework/samples/", "")
lines_df.head()
Out[4]:
file
lines
indent
47
src/test/java/petclinic/service/AbstractClinic...
204
1283
52
src/test/java/petclinic/web/OwnerControllerTes...
185
1301
16
src/main/java/petclinic/repository/jdbc/JdbcOw...
158
1177
3
src/main/java/petclinic/model/Owner.java
153
938
39
src/main/java/petclinic/web/OwnerController.java
138
925
In [5]:
%matplotlib inline
lines_df.plot.scatter('lines', 'indent')
Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x16a2ca91630>
In [6]:
changes.head()
Out[6]:
author
filename
additions
deletions
0
Antoine Rey
src/test/java/petclinic/web/CrashControllerTes...
4.0
5.0
1
Antoine Rey
src/test/java/petclinic/web/OwnerControllerTes...
25.0
7.0
2
Antoine Rey
src/test/java/petclinic/web/PetControllerTests...
21.0
9.0
3
Antoine Rey
src/test/java/petclinic/web/VetControllerTests...
23.0
3.0
4
Antoine Rey
src/test/java/petclinic/web/VisitControllerTes...
10.0
6.0
In [7]:
changes['lines'] = changes.additions - changes.deletions
changes.groupby("filename").sum().sort_values(by='lines', ascending=False).head()
Out[7]:
additions
deletions
lines
filename
src/test/java/petclinic/service/AbstractClinicServiceTests.java
388.0
184.0
204.0
src/test/java/petclinic/web/OwnerControllerTests.java
192.0
7.0
185.0
src/main/java/petclinic/repository/jdbc/JdbcOwnerRepositoryImpl.java
388.0
230.0
158.0
src/main/java/petclinic/model/Owner.java
290.0
137.0
153.0
src/main/java/petclinic/web/OwnerController.java
311.0
173.0
138.0
In [8]:
result = pd.merge(lines_df, changes, left_on='file', right_on="filename")
result.head()
Out[8]:
file
lines_x
indent
author
filename
additions
deletions
lines_y
0
src/test/java/petclinic/service/AbstractClinic...
204
1283
Attilio
src/test/java/petclinic/service/AbstractClinic...
10.0
0.0
10.0
1
src/test/java/petclinic/service/AbstractClinic...
204
1283
Faisal Hameed
src/test/java/petclinic/service/AbstractClinic...
1.0
1.0
0.0
2
src/test/java/petclinic/service/AbstractClinic...
204
1283
Antoine Rey
src/test/java/petclinic/service/AbstractClinic...
2.0
1.0
1.0
3
src/test/java/petclinic/service/AbstractClinic...
204
1283
Antoine Rey
src/test/java/petclinic/service/AbstractClinic...
86.0
86.0
0.0
4
src/test/java/petclinic/service/AbstractClinic...
204
1283
Antoine Rey
src/test/java/petclinic/service/AbstractClinic...
2.0
0.0
2.0
In [13]:
def aggregate_additions(group):
df = group.groupby(['author'])[['additions']].count()
df['all_additions'] = group['additions'].count()
df['ownership'] = df['additions'] / df['all_additions']
return df
ownerships = changes.groupby('filename').apply(aggregate_additions)
ownerships.groupby('filename')[['ownership']].max()
Out[13]:
ownership
filename
org.springframework.samples.petclinic/src/main/java/petclinic/HomeController.java
0.750000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/Appointment.java
0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/AppointmentBook.java
0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/AppointmentForm.java
0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/Appointments.java
0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/AppointmentsController.java
0.666667
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/StubAppointmentBook.java
0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/Owner.java
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnerController.java
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnerRepository.java
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnersController.java
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/StubOwnerRepository.java
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/Owner.java
0.750000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnerController.java
0.666667
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnerRepository.java
0.666667
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnerSearchForm.java
0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnersController.java
0.750000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/StubOwnerRepository.java
0.666667
org.springframework.samples.petclinic/src/main/java/petclinic/owners/pets/Gender.java
0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/pets/Pet.java
0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/pets/PetController.java
0.666667
org.springframework.samples.petclinic/src/main/java/petclinic/owners/pets/PetRepository.java
0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/pets/StubPetRepository.java
0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/pet/Gender.java
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/pet/Pet.java
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/pet/PetController.java
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/pet/PetRepository.java
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/pet/StubPetRepository.java
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/util/ExternalContext.java
0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/util/Measurement.java
0.500000
...
...
src/test/java/petclinic/repository/jdbc/JdbcVisitRepositoryImplTests.java
0.750000
src/test/java/petclinic/repository/jpa/JpaOwnerRepositoryImplTests.java
0.750000
src/test/java/petclinic/repository/jpa/JpaPetRepositoryImplTests.java
0.750000
src/test/java/petclinic/repository/jpa/JpaVetRepositoryImplTests.java
0.750000
src/test/java/petclinic/repository/jpa/JpaVisitRepositoryImplTests.java
0.750000
src/test/java/petclinic/repository/springdatajpa/JpaOwnerRepositoryImplTests.java
0.750000
src/test/java/petclinic/repository/springdatajpa/JpaPetRepositoryImplTests.java
0.750000
src/test/java/petclinic/repository/springdatajpa/JpaVetRepositoryImplTests.java
0.750000
src/test/java/petclinic/repository/springdatajpa/JpaVisitRepositoryImplTests.java
0.750000
src/test/java/petclinic/repository/springdatajpa/SpringDataOwnerRepositoryTests.java
0.750000
src/test/java/petclinic/service/AbstractClinicServiceTests.java
0.562500
src/test/java/petclinic/service/ClinicServiceJdbcTests.java
0.666667
src/test/java/petclinic/service/ClinicServiceJpaTests.java
0.800000
src/test/java/petclinic/service/ClinicServiceSpringDataJpaTests.java
0.666667
src/test/java/petclinic/springdatajpa/JpaOwnerRepositoryImplTests.java
1.000000
src/test/java/petclinic/springdatajpa/JpaPetRepositoryImplTests.java
1.000000
src/test/java/petclinic/springdatajpa/JpaVetRepositoryImplTests.java
1.000000
src/test/java/petclinic/springdatajpa/JpaVisitRepositoryImplTests.java
1.000000
src/test/java/petclinic/springdatajpa/SpringDataOwnerRepositoryTests.java
1.000000
src/test/java/petclinic/web/CrashControllerTests.java
0.500000
src/test/java/petclinic/web/OwnerControllerTests.java
0.500000
src/test/java/petclinic/web/PetControllerTests.java
0.500000
src/test/java/petclinic/web/PetTypeFormatterTests.java
1.000000
src/test/java/petclinic/web/VetControllerTest.java
1.000000
src/test/java/petclinic/web/VetControllerTests.java
0.714286
src/test/java/petclinic/web/VisitControllerTests.java
0.500000
src/test/java/petclinic/web/VisitsAtomViewTest.java
0.800000
src/test/java/petclinic/web/VisitsAtomViewWithContainerTest.java
0.666667
src/test/java/petclinic/web/VisitsViewTest.java
1.000000
src/test/java/petclinic/web/VisitsViewTests.java
1.000000
205 rows × 1 columns
In [19]:
ownerships.loc["org.springframework.samples.petclinic/src/main/java/petclinic/pet/PetRepository.java"]
Out[19]:
additions
all_additions
ownership
author
Keith Donald
2
2
1.0
In [16]:
ownerships.loc["org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnerController.java"]
Out[16]:
additions
all_additions
ownership
author
Keith Donald
2
2
1.0
In [9]:
def aggregate_additions(group):
df = group.groupby(['author'])[['additions']].sum()
df['all_additions'] = group['additions'].sum()
df['ownership'] = df['additions'] / df['all_additions']
return df
changes.groupby('filename').apply(aggregate_additions)
Out[9]:
additions
all_additions
ownership
filename
author
org.springframework.samples.petclinic/src/main/java/petclinic/HomeController.java
Keith Donald
17.0
17.0
1.000000
Michael Isvy
0.0
17.0
0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/Appointment.java
Keith Donald
37.0
37.0
1.000000
Michael Isvy
0.0
37.0
0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/AppointmentBook.java
Keith Donald
13.0
13.0
1.000000
Michael Isvy
0.0
13.0
0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/AppointmentForm.java
Keith Donald
67.0
67.0
1.000000
Michael Isvy
0.0
67.0
0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/Appointments.java
Keith Donald
15.0
15.0
1.000000
Michael Isvy
0.0
15.0
0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/AppointmentsController.java
Keith Donald
44.0
44.0
1.000000
Michael Isvy
0.0
44.0
0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/StubAppointmentBook.java
Keith Donald
23.0
23.0
1.000000
Michael Isvy
0.0
23.0
0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/Owner.java
Keith Donald
26.0
26.0
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnerController.java
Keith Donald
36.0
36.0
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnerRepository.java
Keith Donald
13.0
13.0
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnersController.java
Keith Donald
45.0
45.0
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/StubOwnerRepository.java
Keith Donald
21.0
21.0
1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/Owner.java
Keith Donald
78.0
78.0
1.000000
Michael Isvy
0.0
78.0
0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnerController.java
Keith Donald
45.0
45.0
1.000000
Michael Isvy
0.0
45.0
0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnerRepository.java
Keith Donald
14.0
14.0
1.000000
Michael Isvy
0.0
14.0
0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnerSearchForm.java
Keith Donald
15.0
15.0
1.000000
Michael Isvy
0.0
15.0
0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnersController.java
Keith Donald
50.0
50.0
1.000000
Michael Isvy
0.0
50.0
0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/StubOwnerRepository.java
Keith Donald
24.0
24.0
1.000000
...
...
...
...
...
src/test/java/petclinic/service/ClinicServiceJdbcTests.java
Michael Isvy
37.0
39.0
0.948718
src/test/java/petclinic/service/ClinicServiceJpaTests.java
Antoine Rey
3.0
33.0
0.090909
Michael Isvy
30.0
33.0
0.909091
src/test/java/petclinic/service/ClinicServiceSpringDataJpaTests.java
Antoine Rey
4.0
24.0
0.166667
Michael Isvy
20.0
24.0
0.833333
src/test/java/petclinic/springdatajpa/JpaOwnerRepositoryImplTests.java
Michael Isvy
36.0
36.0
1.000000
src/test/java/petclinic/springdatajpa/JpaPetRepositoryImplTests.java
Michael Isvy
30.0
30.0
1.000000
src/test/java/petclinic/springdatajpa/JpaVetRepositoryImplTests.java
Michael Isvy
30.0
30.0
1.000000
src/test/java/petclinic/springdatajpa/JpaVisitRepositoryImplTests.java
Michael Isvy
30.0
30.0
1.000000
src/test/java/petclinic/springdatajpa/SpringDataOwnerRepositoryTests.java
Michael Isvy
21.0
21.0
1.000000
src/test/java/petclinic/web/CrashControllerTests.java
Antoine Rey
4.0
57.0
0.070175
Colin But
53.0
57.0
0.929825
src/test/java/petclinic/web/OwnerControllerTests.java
Antoine Rey
25.0
192.0
0.130208
Colin But
167.0
192.0
0.869792
src/test/java/petclinic/web/PetControllerTests.java
Antoine Rey
21.0
134.0
0.156716
Colin But
113.0
134.0
0.843284
src/test/java/petclinic/web/PetTypeFormatterTests.java
Colin But
76.0
76.0
1.000000
src/test/java/petclinic/web/VetControllerTest.java
Michael Isvy
53.0
53.0
1.000000
src/test/java/petclinic/web/VetControllerTests.java
Antoine Rey
41.0
116.0
0.353448
Colin But
22.0
116.0
0.189655
Michael Isvy
53.0
116.0
0.456897
src/test/java/petclinic/web/VisitControllerTests.java
Antoine Rey
10.0
89.0
0.112360
Colin But
79.0
89.0
0.887640
src/test/java/petclinic/web/VisitsAtomViewTest.java
Costin Leau
90.0
214.0
0.420561
Gordon Dickens
60.0
214.0
0.280374
Michael Isvy
64.0
214.0
0.299065
src/test/java/petclinic/web/VisitsAtomViewWithContainerTest.java
Gordon Dickens
23.0
93.0
0.247312
Michael Isvy
70.0
93.0
0.752688
src/test/java/petclinic/web/VisitsViewTest.java
Michael Isvy
77.0
77.0
1.000000
src/test/java/petclinic/web/VisitsViewTests.java
Michael Isvy
77.0
77.0
1.000000
410 rows × 3 columns
Wegen Datenschutz reicht es ja, nur die "Stärke" der Wissenskonzentration zu visualisieren
In [ ]:
Content source: feststelltaste/software-analytics
Similar notebooks: