The main goal of this notebook is to get all the evaluations about method coherence where
evaluator got a (perfect) agreement and store those data in the Database as
coherence_dataset.models.Example instances.
These instances are just used to ease the operations of getting the data from the database
to get statistics and/or training Machine Learning models.
In fact, querying directly Example instances, allows to avoid getting (each time) Judges
evaluations, getting the intersection of their agreement, and finally injecting the data
to create the training set.
Note: : this notebook assumes the use of Python 3
In [ ]:
%load preamble_directives.py
In [11]:
from source_code_analysis.models import SoftwareProject
projects = list()
projects.append(SoftwareProject.objects.get(name__iexact='CoffeeMaker', version__exact='1.0'))
projects.append(SoftwareProject.objects.get(name__iexact='Jfreechart', version__exact='0.6.0'))
projects.append(SoftwareProject.objects.get(name__iexact='Jfreechart', version__exact='0.7.1'))
projects.append(SoftwareProject.objects.get(name__iexact='JHotDraw', version__exact='7.4.1'))
print(projects)
In [ ]:
judges_combinations = (('leonardo.nole', 'rossella.linsalata'),
('leonardo.nole', 'antonio.petrone'),
('leonardo.nole', 'antonio.petrone'),
('leonardo.nole', 'rossella.linsalata'),)
from coherence_dataset.settings import NOT_COHERENT, COHERENT
CODES_Labels = (NOT_COHERENT, COHERENT)
from collections import defaultdict
stats_results = defaultdict(list)
In [ ]:
from evaluations import Judge
from coherence_dataset.models import Example
for pno, project in enumerate(projects):
print('Processing Evaluation for ', str(project), ' Project')
# Get Methods
code_methods = project.code_methods.all()
method_ids_map = dict()
for method in code_methods:
method_ids_map[method.id] = method
print('Gathered ', len(method_ids_map.keys()), ' Methods')
j1_usrname, j2_usrname = judges_combinations[pno]
j1 = Judge(j1_usrname, project.name, project.version)
j2 = Judge(j2_usrname, project.name, project.version)
#getting just NC and CO evaluations
j1_evals = j1.two_codes_evaluations
j2_evals = j2.two_codes_evaluations
project_stats = list()
for i, label in enumerate(CODES_Labels):
j1_evals_code = j1_evals[i]
j2_evals_code = j2_evals[i]
method_ids = j1_evals_code.intersection(j2_evals_code)
print('Gathered ', len(method_ids),
' for {0} examples'.format('Positive' if i == 0 else 'Negative'))
saved_instances_counter = 0
for mid in method_ids:
method = method_ids_map[mid]
try:
_ = method.example
except Example.DoesNotExist:
example = Example()
example.method = method
example.target = label
example.save()
saved_instances_counter += 1
print('Saved ', saved_instances_counter,
' for {0} examples'.format('Positive' if i == 0 else 'Negative'))
Actually verify that instances have been saved into the DB and that querying the dataset will work as expected.
In [ ]:
from coherence_dataset.models import Example
In [5]:
examples = Example.objects.all()
print("Total examples in Dataset: ", examples.count())
In [7]:
from coherence_dataset.settings import COHERENT, NOT_COHERENT
In [9]:
print("Positive examples: ", examples.filter(target=COHERENT).count())
In [10]:
print("Positive examples: ", examples.filter(target=NOT_COHERENT).count())
In [15]:
print('\t\t\t Positive \t Negative')
for project in projects:
data = examples.filter(method__project__id=project.id)
print('{0} \t {1} \t\t {2}'.format(str(project),
data.filter(target=COHERENT).count(),
data.filter(target=NOT_COHERENT).count()))
In [ ]: