In [1]:
import numpy as np
import matplotlib as mpl

In [4]:
import json
with open('rf_stats.json') as data_file:
    data = json.load(data_file)
print(data)


{'13': {'recall': 0.893993, 'fscore': 0.51717, 'precision': 0.363818}, '4': {'recall': 0.798425, 'fscore': 0.665911, 'precision': 0.571123}, '15': {'recall': 0.903619, 'fscore': 0.485015, 'precision': 0.331463}, '2.5': {'recall': 0.747807, 'fscore': 0.686329, 'precision': 0.634191}, '5': {'recall': 0.816592, 'fscore': 0.65293, 'precision': 0.543917}, '12.5': {'recall': 0.89051, 'fscore': 0.526852, 'precision': 0.374086}, '10.5': {'recall': 0.876052, 'fscore': 0.555621, 'precision': 0.40682}, '3': {'recall': 0.775688, 'fscore': 0.678396, 'precision': 0.60279}, '12': {'recall': 0.88614, 'fscore': 0.536779, 'precision': 0.384995}, '3.5': {'recall': 0.788299, 'fscore': 0.672443, 'precision': 0.586278}, '8.5': {'recall': 0.856729, 'fscore': 0.588555, 'precision': 0.448245}, '9': {'recall': 0.862046, 'fscore': 0.58108, 'precision': 0.438244}, '14.5': {'recall': 0.901567, 'fscore': 0.492072, 'precision': 0.338379}, '9.5': {'recall': 0.867381, 'fscore': 0.571026, 'precision': 0.425609}, '8': {'recall': 0.852412, 'fscore': 0.594704, 'precision': 0.456647}, '5.5': {'recall': 0.823961, 'fscore': 0.645041, 'precision': 0.529962}, '6.5': {'recall': 0.836012, 'fscore': 0.627863, 'precision': 0.502701}, '4.5': {'recall': 0.807487, 'fscore': 0.66011, 'precision': 0.558226}, '11.5': {'recall': 0.882406, 'fscore': 0.542666, 'precision': 0.391812}, '1': {'recall': 0.555632, 'fscore': 0.645298, 'precision': 0.769471}, '13.5': {'recall': 0.896797, 'fscore': 0.508024, 'precision': 0.354391}, '0.5': {'recall': 0.357814, 'fscore': 0.510504, 'precision': 0.890514}, '14': {'recall': 0.899756, 'fscore': 0.498592, 'precision': 0.344841}, '2': {'recall': 0.697279, 'fscore': 0.687082, 'precision': 0.67718}, '6': {'recall': 0.829979, 'fscore': 0.638204, 'precision': 0.518419}, '11': {'recall': 0.879784, 'fscore': 0.548553, 'precision': 0.398515}, '7': {'recall': 0.841102, 'fscore': 0.617137, 'precision': 0.487363}, '7.5': {'recall': 0.848595, 'fscore': 0.601223, 'precision': 0.46552}, '10': {'recall': 0.872115, 'fscore': 0.561738, 'precision': 0.414295}, '1.5': {'recall': 0.645914, 'fscore': 0.676122, 'precision': 0.709294}}

In [10]:
precision = []
recall = []
fscore = []
for threshold, element in data.items():
    precision.append((float(threshold), element["precision"]))
    recall.append((float(threshold), element["recall"]))
    fscore.append((float(threshold), element["fscore"]))

In [22]:
(px, py) = zip(*precision)
(rx, ry) = zip(*recall)
(fx, fy) = zip(*fscore)

In [ ]:


In [23]:
px = list(px)
py = list(py)
rx = list(rx)
ry = list(ry)
fx = list(fx)
fy = list(fy)

In [30]:
import matplotlib.pyplot as plt
plt.scatter(px, py, color = 'yellow')
plt.scatter(rx, ry, color = 'blue')
plt.scatter(fx, fy, color = 'red')
plt.show()



In [27]:
ry


Out[27]:
[0.893993,
 0.798425,
 0.903619,
 0.747807,
 0.816592,
 0.89051,
 0.876052,
 0.775688,
 0.88614,
 0.788299,
 0.856729,
 0.862046,
 0.901567,
 0.867381,
 0.852412,
 0.823961,
 0.836012,
 0.807487,
 0.882406,
 0.555632,
 0.896797,
 0.357814,
 0.899756,
 0.697279,
 0.829979,
 0.879784,
 0.841102,
 0.848595,
 0.872115,
 0.645914]

In [108]:
N = 4
precision = (0.755769, 0.735918, 0.722964, 0.741135)
recall = (0.550755, 0.41058, 0.23781, 0.553101)
f1score = (0.637177, 0.527089, 0.357895, 0.633458)
# the x locations for the groups
ind = np.arange(N)
# the width of the bars
width = 0.20

fig, ax = plt.subplots()
rects1 = ax.bar(ind, precision, width, color='#FF9800')
rects2 = ax.bar(ind + width, recall, width, color='#2196F3')
rects3 = ax.bar(ind + width + width, f1score, width, color='#F44336')

# y-axis label and diagram title
ax.set_ylabel('Precision / Recall / F1-Score')
ax.set_title('Evaluation of first order features')
# x axis ticks spacing, labels
ax.set_xticks(ind + width + width / 2)
ax.set_xticklabels(('all first order features', 'without link score', 'without entity score', 'without context score'))
# x and y axis ticks direction and on which axes they are displayed
ax.get_yaxis().set_tick_params(direction='out')
ax.get_xaxis().set_tick_params(direction='out')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# bar legend
ax.legend((rects1[0], rects2[0], rects3[0]), ('Precision', 'Recall', 'F1-Score'), prop={'size':10})

# Force y-axis range
axes = plt.gca()
axes.set_ylim([0.0,1.0])

# sets space between x-axis and the first bar
ax.margins(0.04, 0)

# sets gridlines on y-axis
ax.yaxis.grid(which="major", color='#000000', linestyle='dotted', linewidth=0.5)

# changes the plot size
fig.set_size_inches(11, 6)

# attach a text label above each bar displaying its height
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., height, '%.2f' % height, ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)
autolabel(rects3)

plt.show()
fig.savefig("first_order_eval.pdf", bbox_inches='tight')



In [101]:
fig.savefig("first_order_eval.pdf", bbox_inches='tight')

In [116]:
N = 4
precision = (0.755769, 0.76262, 0.782233, 0.791018)
recall = (0.550755, 0.53975, 0.545011, 0.527181)
f1score = (0.637177, 0.632115, 0.642422, 0.632696)
# the x locations for the groups
ind = np.arange(N)
# the width of the bars
width = 0.20

fig, ax = plt.subplots()
rects1 = ax.bar(ind, precision, width, color='#FF9800')
rects2 = ax.bar(ind + width, recall, width, color='#2196F3')
rects3 = ax.bar(ind + width + width, f1score, width, color='#F44336')

# y-axis label and diagram title
ax.set_ylabel('Precision / Recall / F1-Score')
ax.set_title('Evaluation of higher order features')
# x axis ticks spacing, labels
ax.set_xticks(ind + width + width / 2)
ax.set_xticklabels(('all first order features', 'with entity score\nhigher order features', 'with context score\nhigher order features', 'with both higher\norder features'))
# x and y axis ticks direction and on which axes they are displayed
ax.get_yaxis().set_tick_params(direction='out')
ax.get_xaxis().set_tick_params(direction='out')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# bar legend
ax.legend((rects1[0], rects2[0], rects3[0]), ('Precision', 'Recall', 'F1-Score'), prop={'size':10})

# Force y-axis range
axes = plt.gca()
axes.set_ylim([0.0,1.0])

# sets space between x-axis and the first bar
ax.margins(0.04, 0)

# sets gridlines on y-axis
ax.yaxis.grid(which="major", color='#000000', linestyle='dotted', linewidth=0.5)

# changes the plot size
fig.set_size_inches(11, 6)

# attach a text label above each bar displaying its height
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., height, '%.2f' % height, ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)
autolabel(rects3)

plt.show()
fig.savefig("higher_order_eval.pdf", bbox_inches='tight')



In [119]:
N = 4
precision = (0.791018, 0.788075, 0.781005, 0.77749)
recall = (0.527181, 0.533276, 0.542331, 0.539451)
f1score = (0.632696, 0.636109, 0.640145, 0.636958)
# the x locations for the groups
ind = np.arange(N)
# the width of the bars
width = 0.20

fig, ax = plt.subplots()
rects1 = ax.bar(ind, precision, width, color='#FF9800')
rects2 = ax.bar(ind + width, recall, width, color='#2196F3')
rects3 = ax.bar(ind + width + width, f1score, width, color='#F44336')

# y-axis label and diagram title
ax.set_ylabel('Precision / Recall / F1-Score')
ax.set_title('Evaluation of entity score\'s higher order features')
# x axis ticks spacing, labels
ax.set_xticks(ind + width + width / 2)
ax.set_xticklabels(('all first and higher\norder features', 'without entity score\nhigher order feature rank', 'without entity score\nhigher order feature ∆top', 'without entity score\nhigher order feature ∆succ'))
# x and y axis ticks direction and on which axes they are displayed
ax.get_yaxis().set_tick_params(direction='out')
ax.get_xaxis().set_tick_params(direction='out')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# bar legend
ax.legend((rects1[0], rects2[0], rects3[0]), ('Precision', 'Recall', 'F1-Score'), prop={'size':10})

# Force y-axis range
axes = plt.gca()
axes.set_ylim([0.0,1.0])

# sets space between x-axis and the first bar
ax.margins(0.04, 0)

# sets gridlines on y-axis
ax.yaxis.grid(which="major", color='#000000', linestyle='dotted', linewidth=0.5)

# changes the plot size
fig.set_size_inches(11, 6)

# attach a text label above each bar displaying its height
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., height, '%.2f' % height, ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)
autolabel(rects3)

plt.show()
fig.savefig("higher_order_eval_entity.pdf", bbox_inches='tight')



In [120]:
N = 4
precision = (0.791018, 0.766759, 0.762848, 0.758634)
recall = (0.527181, 0.555244, 0.558764, 0.555561)
f1score = (0.632696, 0.64408, 0.645049, 0.641408)
# the x locations for the groups
ind = np.arange(N)
# the width of the bars
width = 0.20

fig, ax = plt.subplots()
rects1 = ax.bar(ind, precision, width, color='#FF9800')
rects2 = ax.bar(ind + width, recall, width, color='#2196F3')
rects3 = ax.bar(ind + width + width, f1score, width, color='#F44336')

# y-axis label and diagram title
ax.set_ylabel('Precision / Recall / F1-Score')
ax.set_title('Evaluation of context score\'s higher order features')
# x axis ticks spacing, labels
ax.set_xticks(ind + width + width / 2)
ax.set_xticklabels(('all first and higher\norder features', 'without context score\nhigher order feature rank', 'without context score\nhigher order feature ∆top', 'without context score\nhigher order feature ∆succ'))
# x and y axis ticks direction and on which axes they are displayed
ax.get_yaxis().set_tick_params(direction='out')
ax.get_xaxis().set_tick_params(direction='out')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# bar legend
ax.legend((rects1[0], rects2[0], rects3[0]), ('Precision', 'Recall', 'F1-Score'), prop={'size':10})

# Force y-axis range
axes = plt.gca()
axes.set_ylim([0.0,1.0])

# sets space between x-axis and the first bar
ax.margins(0.04, 0)

# sets gridlines on y-axis
ax.yaxis.grid(which="major", color='#000000', linestyle='dotted', linewidth=0.5)

# changes the plot size
fig.set_size_inches(11, 6)

# attach a text label above each bar displaying its height
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., height, '%.2f' % height, ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)
autolabel(rects3)

plt.show()
fig.savefig("higher_order_eval_context.pdf", bbox_inches='tight')



In [137]:
N = 4
precision = (np.nan, np.nan, 0.765604, 0.791018)
recall = (0.0, 0.0, 0.589778, 0.527181)
f1score = (np.nan, np.nan, 0.666286, 0.632696)
f025score = (np.nan, np.nan, 0.752409, 0.768396)

# the x locations for the groups
ind = np.arange(N)
# the width of the bars
width = 0.20

fig, ax = plt.subplots()
rects1 = ax.bar(ind, precision, width, color='#FF9800')
rects2 = ax.bar(ind + width, recall, width, color='#2196F3')
rects3 = ax.bar(ind + width + width, f1score, width, color='#F44336')
rects4 = ax.bar(ind + width + width + width, f025score, width, color='#9C27B0')

# y-axis label and diagram title
ax.set_ylabel('Precision / Recall / F1-Score / F0.25-Score')
ax.set_title('Evaluation of different classification models')
# x axis ticks spacing, labels
ax.set_xticks(ind + width + width / 2)
ax.set_xticklabels(('Naive Bayes', 'Logistic Regression', 'Gradient Boosted Trees', 'Random Forest'))
# x and y axis ticks direction and on which axes they are displayed
ax.get_yaxis().set_tick_params(direction='out')
ax.get_xaxis().set_tick_params(direction='out')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# bar legend
ax.legend((rects1[0], rects2[0], rects3[0], rects4[0]), ('Precision', 'Recall', 'F1-Score', 'F0.25-Score'), prop={'size':10}, loc = 'upper left')

# Force y-axis range
axes = plt.gca()
axes.set_ylim([0.0,1.0])

# sets space between x-axis and the first bar
ax.margins(0.08, 0)

# sets gridlines on y-axis
ax.yaxis.grid(which="major", color='#000000', linestyle='dotted', linewidth=0.5)

# changes the plot size
fig.set_size_inches(11, 6)

# attach a text label above each bar displaying its height
def autolabel(rects):
    for rect in rects:
        height = 1.0
        height = rect.get_height()
        if math.isnan(height):
            ax.text(rect.get_x() + rect.get_width()/2., 0, 'NaN', ha='center', va='bottom')
        else:
            ax.text(rect.get_x() + rect.get_width()/2., height, '%.2f' % height, ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)
autolabel(rects3)
autolabel(rects4)

plt.show()
fig.savefig("classifier_eval.pdf", bbox_inches='tight')



In [130]:
import math

In [143]:
N = 2
precision = (0.791018, 0.775543)
recall = (0.527181, 0.544121)
f1score = (0.632696, 0.639541)

# the x locations for the groups
ind = np.arange(N)
# the width of the bars
width = 0.20

fig, ax = plt.subplots()
rects1 = ax.bar(ind, precision, width, color='#FF9800')
rects2 = ax.bar(ind + width, recall, width, color='#2196F3')
rects3 = ax.bar(ind + width + width, f1score, width, color='#F44336')

# y-axis label and diagram title
ax.set_ylabel('Precision / Recall / F1-Score')
ax.set_title('Evaluation of quality measures with a Random Forest')
# x axis ticks spacing, labels
ax.set_xticks(ind + width + width / 2)
ax.set_xticklabels(('normal quality measures', 'adjusted quality measures'))
# x and y axis ticks direction and on which axes they are displayed
ax.get_yaxis().set_tick_params(direction='out')
ax.get_xaxis().set_tick_params(direction='out')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# bar legend
ax.legend((rects1[0], rects2[0], rects3[0]), ('Precision', 'Recall', 'F1-Score'), prop={'size':10})

# Force y-axis range
axes = plt.gca()
axes.set_ylim([0.0,1.0])

# sets space between x-axis and the first bar
ax.margins(0.08, 0)

# sets gridlines on y-axis
ax.yaxis.grid(which="major", color='#000000', linestyle='dotted', linewidth=0.5)

# changes the plot size
fig.set_size_inches(8, 6)

# attach a text label above each bar displaying its height
def autolabel(rects):
    for rect in rects:
        height = 1.0
        height = rect.get_height()
        if math.isnan(height):
            ax.text(rect.get_x() + rect.get_width()/2., 0, 'NaN', ha='center', va='bottom')
        else:
            ax.text(rect.get_x() + rect.get_width()/2., height, '%.2f' % height, ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)
autolabel(rects3)

plt.show()
fig.savefig("qualitymeasure_eval.pdf", bbox_inches='tight')



In [ ]:


In [ ]:


In [ ]:


In [ ]: