In [1]:
import pandas as pd
coverage = pd.read_csv("../dataset/jacoco_production_coverage_spring_petclinic.csv")
coverage.head()
Out[1]:
In [2]:
coverage['lines'] = coverage.LINE_MISSED + coverage.LINE_COVERED
coverage.head()
Out[2]:
In [3]:
coverage['covered'] = coverage.LINE_COVERED / coverage.lines
coverage.head()
Out[3]:
In [4]:
%matplotlib inline
coverage.covered.hist()
Out[4]:
In [5]:
coverage['fqn'] = coverage.PACKAGE + "." + coverage.CLASS
coverage.head()
Out[5]:
In [6]:
coverage_per_class = coverage.set_index('fqn')[['lines', 'covered']]
coverage_per_class.head()
Out[6]:
In [7]:
import requests
from pandas.io.json import json_normalize
URL = "https://sonarcloud.io/api/issues/search?languages=java&componentKeys=org.springframework.samples:spring-petclinic:boundedcontexts"
# in C:\dev\repos\software-analytics\demos\dataset
# python -m "http.server" 28080
#URL = "http://localhost:28080/sonarqube_search.json"
issues_json = requests.get(URL).json()
issues_json;
In [8]:
issues = json_normalize(issues_json['issues'])
issues.head()
Out[8]:
In [9]:
issues = issues[['component', 'debt']]
issues.head()
Out[9]:
In [10]:
issues['debt'] = issues.debt.apply(pd.Timedelta)
issues.head()
Out[10]:
In [11]:
issues['fqn'] = issues.component.str.extract("/java/(.*).java", expand=True)
issues['fqn'] = issues.fqn.str.replace("/", ".")
issues.fqn[0]
Out[11]:
In [12]:
issues.head()
Out[12]:
In [13]:
debt_per_class = issues.groupby('fqn')[['debt']].sum()
debt_per_class.head()
Out[13]:
In [14]:
analysis = coverage_per_class.join(debt_per_class)
analysis.head()
Out[14]:
In [15]:
analysis = analysis.fillna(0)
analysis.head()
Out[15]:
In [16]:
domains = "Owner,Pet,Visit,Vet,Specialty,Clinic".split(",")
domains
Out[16]:
In [17]:
analysis['domain'] = "Other"
for domain in domains:
analysis.loc[analysis.index.str.contains(domain), 'domain'] = domain
analysis.head()
Out[17]:
In [18]:
analysis['tech'] = analysis.index.str.split(".").str[-2]
analysis.head()
Out[18]:
In [19]:
analysis.groupby('tech').covered.mean()
Out[19]:
In [20]:
analysis.head()
Out[20]:
In [21]:
management_compatible_data = analysis.groupby('domain').agg({"covered": "mean", "debt" : "sum", "lines" : "sum"})
management_compatible_data.debt = management_compatible_data.debt.dt.seconds / 60
management_compatible_data.columns = ['Utilization (%)', 'Technical Debt (min)', 'Size']
management_compatible_data.head()
Out[21]:
In [22]:
%matplotlib inline
from ausi import portfolio
portfolio.plot_diagram(management_compatible_data, "Technical Debt (min)", "Utilization (%)", "Size", "Domain")
Out[22]:
In [23]:
management_compatible_data = analysis.groupby('tech').agg({"covered": "mean", "debt" : "sum", "lines" : "sum"})
management_compatible_data.debt = management_compatible_data.debt.dt.seconds / 60
management_compatible_data.columns = ['Utilization (%)', 'Technical Debt (min)', 'Size']
portfolio.plot_diagram(management_compatible_data, "Technical Debt (min)", "Utilization (%)", "Size", "Architecture")
Out[23]: