In [1]:
import pandas as pd
coverage = pd.read_csv("../dataset/jacoco_production_coverage_spring_petclinic.csv")
coverage.head()
Out[1]:
In [2]:
coverage['lines'] = coverage.LINE_MISSED + coverage.LINE_COVERED
coverage['covered'] = coverage.LINE_COVERED / coverage.lines
coverage.head()
Out[2]:
In [3]:
coverage['fqn'] = coverage.PACKAGE + "." + coverage.CLASS
coverage_per_class = coverage.set_index('fqn')[['lines', 'covered']]
coverage_per_class.head()
Out[3]:
In [4]:
git_log = pd.read_csv(
"../dataset/git_log_numstat_spring_petclinic.log",
sep="\t",
names=['additions', 'deletions', 'path'])
git_log.head()
Out[4]:
In [5]:
git_log['fqn'] = git_log.path.str.extract(
"/java/(.*)\.java",
expand=True)[0]
git_log['fqn'] = git_log.fqn.str.replace("/", ".")
git_log['fqn'][0]
Out[5]:
In [6]:
changes_per_file = git_log.groupby('fqn').path.count()
changes_per_file.name = "changes"
changes_per_file.head()
Out[6]:
In [7]:
analysis = coverage_per_class.join(changes_per_file)
analysis.head()
Out[7]:
In [8]:
analysis['tech'] = analysis.index.str.split(".").str[-2]
analysis.head()
Out[8]:
In [9]:
tech_insights = analysis.groupby('tech').agg({
"lines" : "sum",
"covered": "mean",
"changes" : "sum"
})
tech_insights
Out[9]:
In [10]:
%matplotlib inline
from ausi import portfolio
portfolio.plot_diagram(tech_insights, "changes", "covered", "lines");
In [11]:
analysis['domain'] = "Other"
domains = ["Owner", "Pet", "Visit", "Vet", "Specialty", "Clinic"]
for domain in domains:
analysis.loc[analysis.index.str.contains(domain), 'domain'] = domain
analysis.head()
Out[11]:
In [12]:
domain_insights = analysis.groupby('domain').agg({
"lines" : "sum",
"covered": "mean",
"changes" : "sum"
})
domain_insights = domain_insights.rename(columns=
{"lines": "Größe", "covered" : "Nutzungsgrad", "changes" : "Investition"})
domain_insights
Out[12]:
In [13]:
portfolio.plot_diagram(domain_insights, "Investition", "Nutzungsgrad", "Größe");
Erkenntnisse
Maßnahme: Für die Komponente "Other" müssen dringends qualitätsverbessernde Maßnahmen ergriffen werden