Note: : this notebook assumes the use of Python 3
In [2]:
%load preamble_directives.py
In [3]:
from django.contrib.auth.models import User
from source_code_analysis.models import SoftwareProject
In [4]:
from evaluations import Judge
In [5]:
from evaluations import calculate_agreement_scores
In [6]:
from evaluations import cohens_kappa
In [8]:
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('CoffeeMaker & %.3f & %.3f' % (unweighted_k, weighted_k))
j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.6.0) & %.3f & %.3f' % (unweighted_k, weighted_k))
j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.7.1) & %.3f & %.3f' % (unweighted_k, weighted_k))
j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('JHotDraw (7.4.1) & %.3f & %.3f' % (unweighted_k, weighted_k))
In [56]:
print('-'*80)
print('\t\t CoffeeMaker')
print('-'*80)
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))
print('-'*80)
print('\t\t JfreeChart 0.6.0')
print('-'*80)
j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))
print('-'*80)
print('\t\t JfreeChart 0.7.1')
print('-'*80)
j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))
print('-'*80)
print('\t\t JHotDraw')
print('-'*80)
j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))
In [57]:
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('CoffeeMaker & %.3f & %.3f' % (unweighted_k, weighted_k))
j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.6.0) & %.3f & %.3f' % (unweighted_k, weighted_k))
j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.7.1) & %.3f & %.3f' % (unweighted_k, weighted_k))
j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('JHotDraw (7.4.1) & %.3f & %.3f' % (unweighted_k, weighted_k))
In [58]:
print('-'*80)
print('\t\t CoffeeMaker')
print('-'*80)
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))
print('-'*80)
print('\t\t JFreechart 0.6.0')
print('-'*80)
j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))
print('-'*80)
print('\t\t JFreechart 0.7.1')
print('-'*80)
j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))
print('-'*80)
print('\t\t JHotDraw')
print('-'*80)
j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))
In [59]:
# ------
# NOTE: In this case Weighted and Unweighted are exactly the same
# ------
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J)
# weighted_k = cohens_kappa(J, weighted=True)
print('CoffeeMaker & %.3f' % (unweighted_k))
j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J)
# weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.6.0) & %.3f' % (unweighted_k))
j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J)
# weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.7.1) & %.3f' % (unweighted_k,))
j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J)
# weighted_k = cohens_kappa(J, weighted=True)
print('JHotDraw (7.4.1) & %.3f' % (unweighted_k))
In [60]:
# ------
# NOTE: In this case Weighted and Unweighted are exactly the same
# ------
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J, log=True)
# weighted_k = cohens_kappa(J, weighted=True)
print('CoffeeMaker & %.3f' % (unweighted_k))
j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J, log=True)
# weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.6.0) & %.3f' % (unweighted_k))
j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J, log=True)
# weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.7.1) & %.3f' % (unweighted_k,))
j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J, log=True)
# weighted_k = cohens_kappa(J, weighted=True)
print('JHotDraw (7.4.1) & %.3f' % (unweighted_k))
In [8]:
from evaluations import mean_precision
In [61]:
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
j1_eval = j1.three_codes_evaluations[2]
j2_eval = j2.three_codes_evaluations[2]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('CoffeeMaker & %.3f & %.3f & %.3f' % (pj1, pj2, f))
j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
j1_eval = j1.three_codes_evaluations[2]
j2_eval = j2.three_codes_evaluations[2]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('JFreeChart (0.6.0) & %.3f & %.3f & %.3f' % (pj1, pj2, f))
j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
j1_eval = j1.three_codes_evaluations[2]
j2_eval = j2.three_codes_evaluations[2]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('JFreeChart (0.7.1) & %.3f & %.3f & %.3f' % (pj1, pj2, f))
j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
j1_eval = j1.three_codes_evaluations[2]
j2_eval = j2.three_codes_evaluations[2]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('JHotDraw (7.4.1) & %.3f & %.3f & %.3f' % (pj1, pj2, f))
In [62]:
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
j1_eval = j1.three_codes_evaluations[0]
j2_eval = j2.three_codes_evaluations[0]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('CoffeeMaker & %.3f & %.3f & %.3f' % (pj1, pj2, f))
j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
j1_eval = j1.three_codes_evaluations[0]
j2_eval = j2.three_codes_evaluations[0]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('JFreeChart (0.6.0) & %.3f & %.3f & %.3f' % (pj1, pj2, f))
j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
j1_eval = j1.three_codes_evaluations[0]
j2_eval = j2.three_codes_evaluations[0]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('JFreeChart (0.7.1) & %.3f & %.3f & %.3f' % (pj1, pj2, f))
j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
j1_eval = j1.three_codes_evaluations[0]
j2_eval = j2.three_codes_evaluations[0]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('JHotDraw (7.4.1) & %.3f & %.3f & %.3f' % (pj1, pj2, f))
In [5]:
from source_code_analysis.models import AgreementEvaluation, SoftwareProject
from django.contrib.auth.models import User
In [6]:
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
j1_evals = j1.two_codes_evaluations
j2_evals = j2.two_codes_evaluations
neg_diff = j1_evals[0].intersection(j2_evals[1])
pos_diff = j1_evals[1].intersection(j2_evals[0])
leo = User.objects.get(username='leonardo.nole')
ros = User.objects.get(username='rossella.linsalata')
# -------------------------
# NEG
# -------------------------
neg_id_list = list()
for meth_id in neg_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
neg_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(neg_id_list))
neg_id_list = list()
for meth_id in neg_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=ros)
neg_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(neg_id_list))
# -------------------------
# POS
# -------------------------
pos_id_list = list()
for meth_id in pos_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
pos_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(pos_id_list))
pos_id_list = list()
for meth_id in pos_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=ros)
pos_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(pos_id_list))
In [7]:
j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
j1_evals = j1.two_codes_evaluations
j2_evals = j2.two_codes_evaluations
neg_diff = j1_evals[0].intersection(j2_evals[1])
pos_diff = j1_evals[1].intersection(j2_evals[0])
leo = User.objects.get(username='leonardo.nole')
anto = User.objects.get(username='antonio.petrone')
# -------------------------
# NEG
# -------------------------
neg_id_list = list()
for meth_id in neg_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
neg_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(neg_id_list))
neg_id_list = list()
for meth_id in neg_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=anto)
neg_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(neg_id_list))
# -------------------------
# POS
# -------------------------
pos_id_list = list()
for meth_id in pos_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
pos_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(pos_id_list))
pos_id_list = list()
for meth_id in pos_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=anto)
pos_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(pos_id_list))
In [8]:
j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
j1_evals = j1.two_codes_evaluations
j2_evals = j2.two_codes_evaluations
neg_diff = j1_evals[0].intersection(j2_evals[1])
pos_diff = j1_evals[1].intersection(j2_evals[0])
leo = User.objects.get(username='leonardo.nole')
anto = User.objects.get(username='antonio.petrone')
# -------------------------
# NEG
# -------------------------
neg_id_list = list()
for meth_id in neg_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
neg_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(neg_id_list))
neg_id_list = list()
for meth_id in neg_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=anto)
neg_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(neg_id_list))
# -------------------------
# POS
# -------------------------
pos_id_list = list()
for meth_id in pos_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
pos_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(pos_id_list))
pos_id_list = list()
for meth_id in pos_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=anto)
pos_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(pos_id_list))
In [10]:
j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
j1_evals = j1.two_codes_evaluations
j2_evals = j2.two_codes_evaluations
neg_diff = j1_evals[0].intersection(j2_evals[1])
pos_diff = j1_evals[1].intersection(j2_evals[0])
leo = User.objects.get(username='leonardo.nole')
anto = User.objects.get(username='rossella.linsalata')
# -------------------------
# NEG
# -------------------------
neg_id_list = list()
for meth_id in neg_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
neg_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(neg_id_list))
neg_id_list = list()
for meth_id in neg_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=anto)
neg_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(neg_id_list))
# -------------------------
# POS
# -------------------------
pos_id_list = list()
for meth_id in pos_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
pos_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(pos_id_list))
pos_id_list = list()
for meth_id in pos_diff:
ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=anto)
pos_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(pos_id_list))
In [33]:
from sklearn.feature_extraction.text import TfidfVectorizer
judges_combinations = (('leonardo.nole', 'rossella.linsalata'),
('leonardo.nole', 'rossella.linsalata'),
('leonardo.nole', 'antonio.petrone'),
('leonardo.nole', 'antonio.petrone'),)
CODES_Labels = ('NC', 'DK', 'CO')
from collections import defaultdict
stats_results = defaultdict(list)
for pno, project in enumerate(projects):
if not pno == 1:
continue
# Get Methods
code_methods = project.code_methods.all()
# Populate the Doc Collection
document_collection = list()
method_ids_map = dict() # Map (dict) to store the association method.pk --> Row index in Tfidf Matrix
for mno, method in enumerate(code_methods):
clexicon_info = method.lexical_info
document_collection.append(clexicon_info.normalized_comment)
document_collection.append(clexicon_info.normalized_code)
method_ids_map[method.id] = mno * 2
vectorizer = TfidfVectorizer(input='content', sublinear_tf=True, lowercase=False)
tfidf_values = vectorizer.fit_transform(document_collection)
j1_usrname, j2_usrname = judges_combinations[pno]
j1 = Judge(j1_usrname, project.name, project.version)
j2 = Judge(j2_usrname, project.name, project.version)
j1_evals = j1.three_codes_evaluations
j2_evals = j2.three_codes_evaluations
project_stats = list()
method_ids = list()
for code in range(3):
j1_evals_code = j1_evals[code]
j2_evals_code = j2_evals[code]
method_ids.extend(j1_evals_code.intersection(j2_evals_code))
cosine_sim_vals = list()
for mid in method_ids:
i = method_ids_map[mid]
assert i % 2 == 0, print(i, mid)
dotprod = tfidf_values[i].dot(tfidf_values[i+1].T)[0,0]
cosine_sim_vals.append(dotprod)
if dotprod == 1.0:
print('MID: ', mid)
vals = np.array(cosine_sim_vals)
print('{proj} ({ver}) & {total} & {min:.3} & {max:.3} & {median:.3} & {mean:.3} & {variance:.3} & {devstd:.3} \\\\'.format(
proj = project.name.title(),
ver=project.version,
total=vals.size,
min=vals.min(),
max=vals.max(),
median=median(vals),
mean=vals.mean(),
variance=var(vals),
devstd=std(vals)))
In [30]:
from sklearn.feature_extraction.text import TfidfVectorizer
judges_combinations = (('leonardo.nole', 'rossella.linsalata'),
('leonardo.nole', 'rossella.linsalata'),
('leonardo.nole', 'antonio.petrone'),
('leonardo.nole', 'antonio.petrone'),)
CODES_Labels = ('NC', 'DK', 'CO')
from collections import defaultdict
stats_results = defaultdict(list)
for pno, project in enumerate(projects):
# Get Methods
code_methods = project.code_methods.all()
# Populate the Doc Collection
document_collection = list()
method_ids_map = dict() # Map (dict) to store the association method.pk --> Row index in Tfidf Matrix
for mno, method in enumerate(code_methods):
clexicon_info = method.lexical_info
document_collection.append(clexicon_info.normalized_comment)
document_collection.append(clexicon_info.normalized_code)
method_ids_map[method.id] = mno * 2
vectorizer = TfidfVectorizer(input='content', sublinear_tf=True, lowercase=False)
tfidf_values = vectorizer.fit_transform(document_collection)
j1_usrname, j2_usrname = judges_combinations[pno]
j1 = Judge(j1_usrname, project.name, project.version)
j2 = Judge(j2_usrname, project.name, project.version)
j1_evals = j1.three_codes_evaluations
j2_evals = j2.three_codes_evaluations
project_stats = list()
method_ids = list()
for code in range(3):
j1_evals_code = j1_evals[code]
j2_evals_code = j2_evals[code]
method_ids.extend(j1_evals_code.intersection(j2_evals_code))
cosine_sim_vals = list()
for mid in method_ids_map:
if not mid in method_ids:
i = method_ids_map[mid]
cosine_sim_vals.append(tfidf_values[i].dot(tfidf_values[i+1].T)[0,0])
vals = np.array(cosine_sim_vals)
print('{proj} ({ver}) & {total} & {min:.3} & {max:.3} & {median:.3} & {mean:.3} & {variance:.3} & {devstd:.3} \\\\'.format(
proj = project.name.title(),
ver=project.version,
total=vals.size,
min=vals.min(),
max=vals.max(),
median=median(vals),
mean=vals.mean(),
variance=var(vals),
devstd=std(vals)))