In [ ]:
%matplotlib inline

import os
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import unicodedata
from os import path

matplotlib.style.use('ggplot')
pylab.rcParams['figure.figsize'] = 18, 10  # that's default image size for this interactive session

In [ ]:
experiments = [
    "mfl",
    "bow_logreg",
    "bopos_logreg",
    "pos_logreg",
    "wordvec_mlp_2_0",
    "wordvecpos_mlp_2_0"
]

experiments_names = [
    "Baseline",
    "Bag-of-Words w/Logistic Regression",
    "Bag-of-PoS w/Logistic Regression",
    "BoW with PoS w/Logistic Regression",
    "Word Embeddings w/Multilayer Perceptron",
    "Word Embeddings with PoS w/Multilayer Perceptron"
]

directory = "../resources/results/results_supervised_sensem/"
lemmas_file = "../resources/sensem/lemmas"
lemmas_amount = 215

In [ ]:
with open(lemmas_file, "r") as f:
    lemmas = unicodedata.normalize("NFC", f.read().decode("utf-8")).strip().split()

accuracies = pd.DataFrame({e: np.zeros(lemmas_amount, dtype=np.float) for e in experiments})
most_common_precision = pd.DataFrame({e: np.zeros(lemmas_amount, dtype=np.float) for e in experiments})
less_common_recall = pd.DataFrame({e: np.zeros(lemmas_amount, dtype=np.float) for e in experiments})

for lidx, lemma in enumerate(lemmas):
    lidx = "{:03}".format(lidx)

    if not path.isdir(path.join(directory, lidx)):
        continue

    for experiment in experiments:
        accuracy_file = path.join(directory, lidx, experiment, "accuracy")
        mcp_file = path.join(directory, lidx, experiment, "most_common_precision")
        lcr_file = path.join(directory, lidx, experiment, "less_common_recall")

        with open(accuracy_file, "r") as f:
            accuracies[experiment][int(lidx)] = np.mean([float(acc.strip()) for acc in f.readlines()])
            
        with open(mcp_file, "r") as f:
            most_common_precision[experiment][int(lidx)] = np.mean([float(mcp.strip()) for mcp in f.readlines()])
            
        with open(lcr_file, "r") as f:
            less_common_recall[experiment][int(lidx)] = np.mean([float(lcr.strip()) for lcr in f.readlines()])

In [ ]:
accuracies.to_csv("accuracies_sensem_supervised.csv")
most_common_precision.to_csv("mcp_sensem_supervised.csv")
less_common_recall.to_csv("lcr_sensem_supervised.csv")

In [ ]:
accuracies_boxplot = accuracies.plot(kind='box', rot=5, patch_artist=True)
x = accuracies_boxplot.set_xticklabels(experiments_names)
x = accuracies_boxplot.set_xlabel("Experiment")
x = accuracies_boxplot.set_ylim((-0.01, 1.01))
x = accuracies_boxplot.set_ylabel("Accuracy")
x = accuracies_boxplot.set_yticks([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
x = accuracies_boxplot.set_yticklabels([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])

In [ ]: