In [ ]:
%matplotlib inline
import os
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import unicodedata
from os import path
matplotlib.style.use('ggplot')
pylab.rcParams['figure.figsize'] = 18, 10 # that's default image size for this interactive session
In [ ]:
experiments = [
"mfl",
"bow_logreg",
"bopos_logreg",
"pos_logreg",
"wordvec_mlp_2_0",
"wordvecpos_mlp_2_0"
]
experiments_names = [
"Baseline",
"Bag-of-Words w/Logistic Regression",
"Bag-of-PoS w/Logistic Regression",
"BoW with PoS w/Logistic Regression",
"Word Embeddings w/Multilayer Perceptron",
"Word Embeddings with PoS w/Multilayer Perceptron"
]
directory = "../resources/results/results_supervised_sensem/"
lemmas_file = "../resources/sensem/lemmas"
lemmas_amount = 215
In [ ]:
with open(lemmas_file, "r") as f:
lemmas = unicodedata.normalize("NFC", f.read().decode("utf-8")).strip().split()
accuracies = pd.DataFrame({e: np.zeros(lemmas_amount, dtype=np.float) for e in experiments})
most_common_precision = pd.DataFrame({e: np.zeros(lemmas_amount, dtype=np.float) for e in experiments})
less_common_recall = pd.DataFrame({e: np.zeros(lemmas_amount, dtype=np.float) for e in experiments})
for lidx, lemma in enumerate(lemmas):
lidx = "{:03}".format(lidx)
if not path.isdir(path.join(directory, lidx)):
continue
for experiment in experiments:
accuracy_file = path.join(directory, lidx, experiment, "accuracy")
mcp_file = path.join(directory, lidx, experiment, "most_common_precision")
lcr_file = path.join(directory, lidx, experiment, "less_common_recall")
with open(accuracy_file, "r") as f:
accuracies[experiment][int(lidx)] = np.mean([float(acc.strip()) for acc in f.readlines()])
with open(mcp_file, "r") as f:
most_common_precision[experiment][int(lidx)] = np.mean([float(mcp.strip()) for mcp in f.readlines()])
with open(lcr_file, "r") as f:
less_common_recall[experiment][int(lidx)] = np.mean([float(lcr.strip()) for lcr in f.readlines()])
In [ ]:
accuracies.to_csv("accuracies_sensem_supervised.csv")
most_common_precision.to_csv("mcp_sensem_supervised.csv")
less_common_recall.to_csv("lcr_sensem_supervised.csv")
In [ ]:
accuracies_boxplot = accuracies.plot(kind='box', rot=5, patch_artist=True)
x = accuracies_boxplot.set_xticklabels(experiments_names)
x = accuracies_boxplot.set_xlabel("Experiment")
x = accuracies_boxplot.set_ylim((-0.01, 1.01))
x = accuracies_boxplot.set_ylabel("Accuracy")
x = accuracies_boxplot.set_yticks([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
x = accuracies_boxplot.set_yticklabels([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
In [ ]: