In [1]:
import numpy as np
import matplotlib as mpl
In [4]:
import json
with open('rf_stats.json') as data_file:
data = json.load(data_file)
print(data)
In [10]:
precision = []
recall = []
fscore = []
for threshold, element in data.items():
precision.append((float(threshold), element["precision"]))
recall.append((float(threshold), element["recall"]))
fscore.append((float(threshold), element["fscore"]))
In [22]:
(px, py) = zip(*precision)
(rx, ry) = zip(*recall)
(fx, fy) = zip(*fscore)
In [ ]:
In [23]:
px = list(px)
py = list(py)
rx = list(rx)
ry = list(ry)
fx = list(fx)
fy = list(fy)
In [30]:
import matplotlib.pyplot as plt
plt.scatter(px, py, color = 'yellow')
plt.scatter(rx, ry, color = 'blue')
plt.scatter(fx, fy, color = 'red')
plt.show()
In [27]:
ry
Out[27]:
In [108]:
N = 4
precision = (0.755769, 0.735918, 0.722964, 0.741135)
recall = (0.550755, 0.41058, 0.23781, 0.553101)
f1score = (0.637177, 0.527089, 0.357895, 0.633458)
# the x locations for the groups
ind = np.arange(N)
# the width of the bars
width = 0.20
fig, ax = plt.subplots()
rects1 = ax.bar(ind, precision, width, color='#FF9800')
rects2 = ax.bar(ind + width, recall, width, color='#2196F3')
rects3 = ax.bar(ind + width + width, f1score, width, color='#F44336')
# y-axis label and diagram title
ax.set_ylabel('Precision / Recall / F1-Score')
ax.set_title('Evaluation of first order features')
# x axis ticks spacing, labels
ax.set_xticks(ind + width + width / 2)
ax.set_xticklabels(('all first order features', 'without link score', 'without entity score', 'without context score'))
# x and y axis ticks direction and on which axes they are displayed
ax.get_yaxis().set_tick_params(direction='out')
ax.get_xaxis().set_tick_params(direction='out')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# bar legend
ax.legend((rects1[0], rects2[0], rects3[0]), ('Precision', 'Recall', 'F1-Score'), prop={'size':10})
# Force y-axis range
axes = plt.gca()
axes.set_ylim([0.0,1.0])
# sets space between x-axis and the first bar
ax.margins(0.04, 0)
# sets gridlines on y-axis
ax.yaxis.grid(which="major", color='#000000', linestyle='dotted', linewidth=0.5)
# changes the plot size
fig.set_size_inches(11, 6)
# attach a text label above each bar displaying its height
def autolabel(rects):
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2., height, '%.2f' % height, ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
autolabel(rects3)
plt.show()
fig.savefig("first_order_eval.pdf", bbox_inches='tight')
In [101]:
fig.savefig("first_order_eval.pdf", bbox_inches='tight')
In [116]:
N = 4
precision = (0.755769, 0.76262, 0.782233, 0.791018)
recall = (0.550755, 0.53975, 0.545011, 0.527181)
f1score = (0.637177, 0.632115, 0.642422, 0.632696)
# the x locations for the groups
ind = np.arange(N)
# the width of the bars
width = 0.20
fig, ax = plt.subplots()
rects1 = ax.bar(ind, precision, width, color='#FF9800')
rects2 = ax.bar(ind + width, recall, width, color='#2196F3')
rects3 = ax.bar(ind + width + width, f1score, width, color='#F44336')
# y-axis label and diagram title
ax.set_ylabel('Precision / Recall / F1-Score')
ax.set_title('Evaluation of higher order features')
# x axis ticks spacing, labels
ax.set_xticks(ind + width + width / 2)
ax.set_xticklabels(('all first order features', 'with entity score\nhigher order features', 'with context score\nhigher order features', 'with both higher\norder features'))
# x and y axis ticks direction and on which axes they are displayed
ax.get_yaxis().set_tick_params(direction='out')
ax.get_xaxis().set_tick_params(direction='out')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# bar legend
ax.legend((rects1[0], rects2[0], rects3[0]), ('Precision', 'Recall', 'F1-Score'), prop={'size':10})
# Force y-axis range
axes = plt.gca()
axes.set_ylim([0.0,1.0])
# sets space between x-axis and the first bar
ax.margins(0.04, 0)
# sets gridlines on y-axis
ax.yaxis.grid(which="major", color='#000000', linestyle='dotted', linewidth=0.5)
# changes the plot size
fig.set_size_inches(11, 6)
# attach a text label above each bar displaying its height
def autolabel(rects):
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2., height, '%.2f' % height, ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
autolabel(rects3)
plt.show()
fig.savefig("higher_order_eval.pdf", bbox_inches='tight')
In [119]:
N = 4
precision = (0.791018, 0.788075, 0.781005, 0.77749)
recall = (0.527181, 0.533276, 0.542331, 0.539451)
f1score = (0.632696, 0.636109, 0.640145, 0.636958)
# the x locations for the groups
ind = np.arange(N)
# the width of the bars
width = 0.20
fig, ax = plt.subplots()
rects1 = ax.bar(ind, precision, width, color='#FF9800')
rects2 = ax.bar(ind + width, recall, width, color='#2196F3')
rects3 = ax.bar(ind + width + width, f1score, width, color='#F44336')
# y-axis label and diagram title
ax.set_ylabel('Precision / Recall / F1-Score')
ax.set_title('Evaluation of entity score\'s higher order features')
# x axis ticks spacing, labels
ax.set_xticks(ind + width + width / 2)
ax.set_xticklabels(('all first and higher\norder features', 'without entity score\nhigher order feature rank', 'without entity score\nhigher order feature ∆top', 'without entity score\nhigher order feature ∆succ'))
# x and y axis ticks direction and on which axes they are displayed
ax.get_yaxis().set_tick_params(direction='out')
ax.get_xaxis().set_tick_params(direction='out')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# bar legend
ax.legend((rects1[0], rects2[0], rects3[0]), ('Precision', 'Recall', 'F1-Score'), prop={'size':10})
# Force y-axis range
axes = plt.gca()
axes.set_ylim([0.0,1.0])
# sets space between x-axis and the first bar
ax.margins(0.04, 0)
# sets gridlines on y-axis
ax.yaxis.grid(which="major", color='#000000', linestyle='dotted', linewidth=0.5)
# changes the plot size
fig.set_size_inches(11, 6)
# attach a text label above each bar displaying its height
def autolabel(rects):
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2., height, '%.2f' % height, ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
autolabel(rects3)
plt.show()
fig.savefig("higher_order_eval_entity.pdf", bbox_inches='tight')
In [120]:
N = 4
precision = (0.791018, 0.766759, 0.762848, 0.758634)
recall = (0.527181, 0.555244, 0.558764, 0.555561)
f1score = (0.632696, 0.64408, 0.645049, 0.641408)
# the x locations for the groups
ind = np.arange(N)
# the width of the bars
width = 0.20
fig, ax = plt.subplots()
rects1 = ax.bar(ind, precision, width, color='#FF9800')
rects2 = ax.bar(ind + width, recall, width, color='#2196F3')
rects3 = ax.bar(ind + width + width, f1score, width, color='#F44336')
# y-axis label and diagram title
ax.set_ylabel('Precision / Recall / F1-Score')
ax.set_title('Evaluation of context score\'s higher order features')
# x axis ticks spacing, labels
ax.set_xticks(ind + width + width / 2)
ax.set_xticklabels(('all first and higher\norder features', 'without context score\nhigher order feature rank', 'without context score\nhigher order feature ∆top', 'without context score\nhigher order feature ∆succ'))
# x and y axis ticks direction and on which axes they are displayed
ax.get_yaxis().set_tick_params(direction='out')
ax.get_xaxis().set_tick_params(direction='out')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# bar legend
ax.legend((rects1[0], rects2[0], rects3[0]), ('Precision', 'Recall', 'F1-Score'), prop={'size':10})
# Force y-axis range
axes = plt.gca()
axes.set_ylim([0.0,1.0])
# sets space between x-axis and the first bar
ax.margins(0.04, 0)
# sets gridlines on y-axis
ax.yaxis.grid(which="major", color='#000000', linestyle='dotted', linewidth=0.5)
# changes the plot size
fig.set_size_inches(11, 6)
# attach a text label above each bar displaying its height
def autolabel(rects):
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2., height, '%.2f' % height, ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
autolabel(rects3)
plt.show()
fig.savefig("higher_order_eval_context.pdf", bbox_inches='tight')
In [137]:
N = 4
precision = (np.nan, np.nan, 0.765604, 0.791018)
recall = (0.0, 0.0, 0.589778, 0.527181)
f1score = (np.nan, np.nan, 0.666286, 0.632696)
f025score = (np.nan, np.nan, 0.752409, 0.768396)
# the x locations for the groups
ind = np.arange(N)
# the width of the bars
width = 0.20
fig, ax = plt.subplots()
rects1 = ax.bar(ind, precision, width, color='#FF9800')
rects2 = ax.bar(ind + width, recall, width, color='#2196F3')
rects3 = ax.bar(ind + width + width, f1score, width, color='#F44336')
rects4 = ax.bar(ind + width + width + width, f025score, width, color='#9C27B0')
# y-axis label and diagram title
ax.set_ylabel('Precision / Recall / F1-Score / F0.25-Score')
ax.set_title('Evaluation of different classification models')
# x axis ticks spacing, labels
ax.set_xticks(ind + width + width / 2)
ax.set_xticklabels(('Naive Bayes', 'Logistic Regression', 'Gradient Boosted Trees', 'Random Forest'))
# x and y axis ticks direction and on which axes they are displayed
ax.get_yaxis().set_tick_params(direction='out')
ax.get_xaxis().set_tick_params(direction='out')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# bar legend
ax.legend((rects1[0], rects2[0], rects3[0], rects4[0]), ('Precision', 'Recall', 'F1-Score', 'F0.25-Score'), prop={'size':10}, loc = 'upper left')
# Force y-axis range
axes = plt.gca()
axes.set_ylim([0.0,1.0])
# sets space between x-axis and the first bar
ax.margins(0.08, 0)
# sets gridlines on y-axis
ax.yaxis.grid(which="major", color='#000000', linestyle='dotted', linewidth=0.5)
# changes the plot size
fig.set_size_inches(11, 6)
# attach a text label above each bar displaying its height
def autolabel(rects):
for rect in rects:
height = 1.0
height = rect.get_height()
if math.isnan(height):
ax.text(rect.get_x() + rect.get_width()/2., 0, 'NaN', ha='center', va='bottom')
else:
ax.text(rect.get_x() + rect.get_width()/2., height, '%.2f' % height, ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
autolabel(rects3)
autolabel(rects4)
plt.show()
fig.savefig("classifier_eval.pdf", bbox_inches='tight')
In [130]:
import math
In [143]:
N = 2
precision = (0.791018, 0.775543)
recall = (0.527181, 0.544121)
f1score = (0.632696, 0.639541)
# the x locations for the groups
ind = np.arange(N)
# the width of the bars
width = 0.20
fig, ax = plt.subplots()
rects1 = ax.bar(ind, precision, width, color='#FF9800')
rects2 = ax.bar(ind + width, recall, width, color='#2196F3')
rects3 = ax.bar(ind + width + width, f1score, width, color='#F44336')
# y-axis label and diagram title
ax.set_ylabel('Precision / Recall / F1-Score')
ax.set_title('Evaluation of quality measures with a Random Forest')
# x axis ticks spacing, labels
ax.set_xticks(ind + width + width / 2)
ax.set_xticklabels(('normal quality measures', 'adjusted quality measures'))
# x and y axis ticks direction and on which axes they are displayed
ax.get_yaxis().set_tick_params(direction='out')
ax.get_xaxis().set_tick_params(direction='out')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# bar legend
ax.legend((rects1[0], rects2[0], rects3[0]), ('Precision', 'Recall', 'F1-Score'), prop={'size':10})
# Force y-axis range
axes = plt.gca()
axes.set_ylim([0.0,1.0])
# sets space between x-axis and the first bar
ax.margins(0.08, 0)
# sets gridlines on y-axis
ax.yaxis.grid(which="major", color='#000000', linestyle='dotted', linewidth=0.5)
# changes the plot size
fig.set_size_inches(8, 6)
# attach a text label above each bar displaying its height
def autolabel(rects):
for rect in rects:
height = 1.0
height = rect.get_height()
if math.isnan(height):
ax.text(rect.get_x() + rect.get_width()/2., 0, 'NaN', ha='center', va='bottom')
else:
ax.text(rect.get_x() + rect.get_width()/2., height, '%.2f' % height, ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
autolabel(rects3)
plt.show()
fig.savefig("qualitymeasure_eval.pdf", bbox_inches='tight')
In [ ]:
In [ ]:
In [ ]:
In [ ]: