Content

  1. Read in two one files per read_csv() separately
  2. Merge two datasources /for lines of code(?) -> would be optional
  3. Calculate ownership

In [1]:
import pandas as pd

changes = pd.read_csv("git_log_prepared.log")
changes.head()


Out[1]:
author filename additions deletions
0 Antoine Rey src/test/java/petclinic/web/CrashControllerTes... 4.0 5.0
1 Antoine Rey src/test/java/petclinic/web/OwnerControllerTes... 25.0 7.0
2 Antoine Rey src/test/java/petclinic/web/PetControllerTests... 21.0 9.0
3 Antoine Rey src/test/java/petclinic/web/VetControllerTests... 23.0 3.0
4 Antoine Rey src/test/java/petclinic/web/VisitControllerTes... 10.0 6.0

In [3]:
top5_changes = pd.DataFrame(changes.filename.value_counts()).head()
top5_changes


Out[3]:
filename
src/main/java/petclinic/repository/jdbc/JdbcOwnerRepositoryImpl.java 23
src/main/java/petclinic/web/OwnerController.java 19
src/main/java/petclinic/repository/jdbc/JdbcVetRepositoryImpl.java 18
src/main/java/petclinic/web/PetController.java 18
src/main/java/petclinic/repository/jdbc/JdbcVisitRepositoryImpl.java 16

In [4]:
import glob

file_list = glob.glob("../../../buschmais-spring-petclinic/src/**/*.java", recursive=True)
file_list

lines_of_code = []


for filename in file_list:
    lines = 0
    indent = 0
    with open(filename) as f:
        for line in f.readlines():
            line = line.replace("\t", " ")
            # BUG: Just count first whitespaces, not all in line
            indent = indent + line.count(" ")
            lines = lines + 1
    lines_of_code.append([filename, lines, indent])
    
lines_df = pd.DataFrame(lines_of_code, columns=["file", "lines", "indent"])
lines_df['file'] = lines_df.file.str.replace("\\", "/").str.replace("../../../buschmais-spring-petclinic/", "")
lines_df = lines_df.sort_values(by="lines", ascending=False)
lines_df['file'] = lines_df.file.str.replace("org/springframework/samples/", "")

lines_df.head()


Out[4]:
file lines indent
47 src/test/java/petclinic/service/AbstractClinic... 204 1283
52 src/test/java/petclinic/web/OwnerControllerTes... 185 1301
16 src/main/java/petclinic/repository/jdbc/JdbcOw... 158 1177
3 src/main/java/petclinic/model/Owner.java 153 938
39 src/main/java/petclinic/web/OwnerController.java 138 925

In [5]:
%matplotlib inline
lines_df.plot.scatter('lines', 'indent')


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x16a2ca91630>

In [6]:
changes.head()


Out[6]:
author filename additions deletions
0 Antoine Rey src/test/java/petclinic/web/CrashControllerTes... 4.0 5.0
1 Antoine Rey src/test/java/petclinic/web/OwnerControllerTes... 25.0 7.0
2 Antoine Rey src/test/java/petclinic/web/PetControllerTests... 21.0 9.0
3 Antoine Rey src/test/java/petclinic/web/VetControllerTests... 23.0 3.0
4 Antoine Rey src/test/java/petclinic/web/VisitControllerTes... 10.0 6.0

In [7]:
changes['lines'] = changes.additions - changes.deletions
changes.groupby("filename").sum().sort_values(by='lines', ascending=False).head()


Out[7]:
additions deletions lines
filename
src/test/java/petclinic/service/AbstractClinicServiceTests.java 388.0 184.0 204.0
src/test/java/petclinic/web/OwnerControllerTests.java 192.0 7.0 185.0
src/main/java/petclinic/repository/jdbc/JdbcOwnerRepositoryImpl.java 388.0 230.0 158.0
src/main/java/petclinic/model/Owner.java 290.0 137.0 153.0
src/main/java/petclinic/web/OwnerController.java 311.0 173.0 138.0

In [8]:
result = pd.merge(lines_df, changes, left_on='file', right_on="filename")
result.head()


Out[8]:
file lines_x indent author filename additions deletions lines_y
0 src/test/java/petclinic/service/AbstractClinic... 204 1283 Attilio src/test/java/petclinic/service/AbstractClinic... 10.0 0.0 10.0
1 src/test/java/petclinic/service/AbstractClinic... 204 1283 Faisal Hameed src/test/java/petclinic/service/AbstractClinic... 1.0 1.0 0.0
2 src/test/java/petclinic/service/AbstractClinic... 204 1283 Antoine Rey src/test/java/petclinic/service/AbstractClinic... 2.0 1.0 1.0
3 src/test/java/petclinic/service/AbstractClinic... 204 1283 Antoine Rey src/test/java/petclinic/service/AbstractClinic... 86.0 86.0 0.0
4 src/test/java/petclinic/service/AbstractClinic... 204 1283 Antoine Rey src/test/java/petclinic/service/AbstractClinic... 2.0 0.0 2.0

In [13]:
def aggregate_additions(group):
    df = group.groupby(['author'])[['additions']].count()
    df['all_additions'] = group['additions'].count()
    df['ownership'] = df['additions'] / df['all_additions']
    return df

ownerships = changes.groupby('filename').apply(aggregate_additions)
ownerships.groupby('filename')[['ownership']].max()


Out[13]:
ownership
filename
org.springframework.samples.petclinic/src/main/java/petclinic/HomeController.java 0.750000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/Appointment.java 0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/AppointmentBook.java 0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/AppointmentForm.java 0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/Appointments.java 0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/AppointmentsController.java 0.666667
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/StubAppointmentBook.java 0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/Owner.java 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnerController.java 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnerRepository.java 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnersController.java 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/StubOwnerRepository.java 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/Owner.java 0.750000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnerController.java 0.666667
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnerRepository.java 0.666667
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnerSearchForm.java 0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnersController.java 0.750000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/StubOwnerRepository.java 0.666667
org.springframework.samples.petclinic/src/main/java/petclinic/owners/pets/Gender.java 0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/pets/Pet.java 0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/pets/PetController.java 0.666667
org.springframework.samples.petclinic/src/main/java/petclinic/owners/pets/PetRepository.java 0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/pets/StubPetRepository.java 0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/pet/Gender.java 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/pet/Pet.java 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/pet/PetController.java 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/pet/PetRepository.java 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/pet/StubPetRepository.java 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/util/ExternalContext.java 0.500000
org.springframework.samples.petclinic/src/main/java/petclinic/util/Measurement.java 0.500000
... ...
src/test/java/petclinic/repository/jdbc/JdbcVisitRepositoryImplTests.java 0.750000
src/test/java/petclinic/repository/jpa/JpaOwnerRepositoryImplTests.java 0.750000
src/test/java/petclinic/repository/jpa/JpaPetRepositoryImplTests.java 0.750000
src/test/java/petclinic/repository/jpa/JpaVetRepositoryImplTests.java 0.750000
src/test/java/petclinic/repository/jpa/JpaVisitRepositoryImplTests.java 0.750000
src/test/java/petclinic/repository/springdatajpa/JpaOwnerRepositoryImplTests.java 0.750000
src/test/java/petclinic/repository/springdatajpa/JpaPetRepositoryImplTests.java 0.750000
src/test/java/petclinic/repository/springdatajpa/JpaVetRepositoryImplTests.java 0.750000
src/test/java/petclinic/repository/springdatajpa/JpaVisitRepositoryImplTests.java 0.750000
src/test/java/petclinic/repository/springdatajpa/SpringDataOwnerRepositoryTests.java 0.750000
src/test/java/petclinic/service/AbstractClinicServiceTests.java 0.562500
src/test/java/petclinic/service/ClinicServiceJdbcTests.java 0.666667
src/test/java/petclinic/service/ClinicServiceJpaTests.java 0.800000
src/test/java/petclinic/service/ClinicServiceSpringDataJpaTests.java 0.666667
src/test/java/petclinic/springdatajpa/JpaOwnerRepositoryImplTests.java 1.000000
src/test/java/petclinic/springdatajpa/JpaPetRepositoryImplTests.java 1.000000
src/test/java/petclinic/springdatajpa/JpaVetRepositoryImplTests.java 1.000000
src/test/java/petclinic/springdatajpa/JpaVisitRepositoryImplTests.java 1.000000
src/test/java/petclinic/springdatajpa/SpringDataOwnerRepositoryTests.java 1.000000
src/test/java/petclinic/web/CrashControllerTests.java 0.500000
src/test/java/petclinic/web/OwnerControllerTests.java 0.500000
src/test/java/petclinic/web/PetControllerTests.java 0.500000
src/test/java/petclinic/web/PetTypeFormatterTests.java 1.000000
src/test/java/petclinic/web/VetControllerTest.java 1.000000
src/test/java/petclinic/web/VetControllerTests.java 0.714286
src/test/java/petclinic/web/VisitControllerTests.java 0.500000
src/test/java/petclinic/web/VisitsAtomViewTest.java 0.800000
src/test/java/petclinic/web/VisitsAtomViewWithContainerTest.java 0.666667
src/test/java/petclinic/web/VisitsViewTest.java 1.000000
src/test/java/petclinic/web/VisitsViewTests.java 1.000000

205 rows × 1 columns


In [19]:
ownerships.loc["org.springframework.samples.petclinic/src/main/java/petclinic/pet/PetRepository.java"]


Out[19]:
additions all_additions ownership
author
Keith Donald 2 2 1.0

In [16]:
ownerships.loc["org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnerController.java"]


Out[16]:
additions all_additions ownership
author
Keith Donald 2 2 1.0

In [9]:
def aggregate_additions(group):
    df = group.groupby(['author'])[['additions']].sum()
    df['all_additions'] = group['additions'].sum()
    df['ownership'] = df['additions'] / df['all_additions']
    return df

changes.groupby('filename').apply(aggregate_additions)


Out[9]:
additions all_additions ownership
filename author
org.springframework.samples.petclinic/src/main/java/petclinic/HomeController.java Keith Donald 17.0 17.0 1.000000
Michael Isvy 0.0 17.0 0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/Appointment.java Keith Donald 37.0 37.0 1.000000
Michael Isvy 0.0 37.0 0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/AppointmentBook.java Keith Donald 13.0 13.0 1.000000
Michael Isvy 0.0 13.0 0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/AppointmentForm.java Keith Donald 67.0 67.0 1.000000
Michael Isvy 0.0 67.0 0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/Appointments.java Keith Donald 15.0 15.0 1.000000
Michael Isvy 0.0 15.0 0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/AppointmentsController.java Keith Donald 44.0 44.0 1.000000
Michael Isvy 0.0 44.0 0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/appointments/StubAppointmentBook.java Keith Donald 23.0 23.0 1.000000
Michael Isvy 0.0 23.0 0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/Owner.java Keith Donald 26.0 26.0 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnerController.java Keith Donald 36.0 36.0 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnerRepository.java Keith Donald 13.0 13.0 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/OwnersController.java Keith Donald 45.0 45.0 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owner/StubOwnerRepository.java Keith Donald 21.0 21.0 1.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/Owner.java Keith Donald 78.0 78.0 1.000000
Michael Isvy 0.0 78.0 0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnerController.java Keith Donald 45.0 45.0 1.000000
Michael Isvy 0.0 45.0 0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnerRepository.java Keith Donald 14.0 14.0 1.000000
Michael Isvy 0.0 14.0 0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnerSearchForm.java Keith Donald 15.0 15.0 1.000000
Michael Isvy 0.0 15.0 0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/OwnersController.java Keith Donald 50.0 50.0 1.000000
Michael Isvy 0.0 50.0 0.000000
org.springframework.samples.petclinic/src/main/java/petclinic/owners/StubOwnerRepository.java Keith Donald 24.0 24.0 1.000000
... ... ... ... ...
src/test/java/petclinic/service/ClinicServiceJdbcTests.java Michael Isvy 37.0 39.0 0.948718
src/test/java/petclinic/service/ClinicServiceJpaTests.java Antoine Rey 3.0 33.0 0.090909
Michael Isvy 30.0 33.0 0.909091
src/test/java/petclinic/service/ClinicServiceSpringDataJpaTests.java Antoine Rey 4.0 24.0 0.166667
Michael Isvy 20.0 24.0 0.833333
src/test/java/petclinic/springdatajpa/JpaOwnerRepositoryImplTests.java Michael Isvy 36.0 36.0 1.000000
src/test/java/petclinic/springdatajpa/JpaPetRepositoryImplTests.java Michael Isvy 30.0 30.0 1.000000
src/test/java/petclinic/springdatajpa/JpaVetRepositoryImplTests.java Michael Isvy 30.0 30.0 1.000000
src/test/java/petclinic/springdatajpa/JpaVisitRepositoryImplTests.java Michael Isvy 30.0 30.0 1.000000
src/test/java/petclinic/springdatajpa/SpringDataOwnerRepositoryTests.java Michael Isvy 21.0 21.0 1.000000
src/test/java/petclinic/web/CrashControllerTests.java Antoine Rey 4.0 57.0 0.070175
Colin But 53.0 57.0 0.929825
src/test/java/petclinic/web/OwnerControllerTests.java Antoine Rey 25.0 192.0 0.130208
Colin But 167.0 192.0 0.869792
src/test/java/petclinic/web/PetControllerTests.java Antoine Rey 21.0 134.0 0.156716
Colin But 113.0 134.0 0.843284
src/test/java/petclinic/web/PetTypeFormatterTests.java Colin But 76.0 76.0 1.000000
src/test/java/petclinic/web/VetControllerTest.java Michael Isvy 53.0 53.0 1.000000
src/test/java/petclinic/web/VetControllerTests.java Antoine Rey 41.0 116.0 0.353448
Colin But 22.0 116.0 0.189655
Michael Isvy 53.0 116.0 0.456897
src/test/java/petclinic/web/VisitControllerTests.java Antoine Rey 10.0 89.0 0.112360
Colin But 79.0 89.0 0.887640
src/test/java/petclinic/web/VisitsAtomViewTest.java Costin Leau 90.0 214.0 0.420561
Gordon Dickens 60.0 214.0 0.280374
Michael Isvy 64.0 214.0 0.299065
src/test/java/petclinic/web/VisitsAtomViewWithContainerTest.java Gordon Dickens 23.0 93.0 0.247312
Michael Isvy 70.0 93.0 0.752688
src/test/java/petclinic/web/VisitsViewTest.java Michael Isvy 77.0 77.0 1.000000
src/test/java/petclinic/web/VisitsViewTests.java Michael Isvy 77.0 77.0 1.000000

410 rows × 3 columns

Wegen Datenschutz reicht es ja, nur die "Stärke" der Wissenskonzentration zu visualisieren


In [ ]: