In [30]:

    
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
import re
import seaborn as sns
from scipy.stats import ttest_rel
from sklearn.metrics import (accuracy_score, classification_report,
    confusion_matrix, precision_recall_fscore_support)



In [31]:

    
with open("/home/ccardellino/datasets/classes.p", "rb") as f:
    classes = pickle.load(f)

Read the filenames

Specify here the filenames to read the predictions, along with their abstraction level.



In [75]:

    
basedir_cl = '../results/echr/cl/evaluated_wordvectors_wiki/'
layers_string = '2000'
features = 'WordVectors'
outdir = '../results/echr/cl/evaluated_wordvectors_wiki/'

results_files = [
    ("NER",
     "",
     os.path.join(basedir_cl, "test_predictions_NER_%s.csv" % layers_string)
    ),
    ("ENTITY",
     "",
     os.path.join(basedir_cl, "test_predictions_NER_ENTITY_%s.csv" % layers_string)
#      os.path.join(basedir_cl, "test_predictions_ENTITY_%s.csv" % layers_string)
    ),
    ("LKIF",
     "",
     os.path.join(basedir_cl, "test_predictions_NER_ENTITY_LKIF_%s.csv" % layers_string)
#      os.path.join(basedir_cl, "test_predictions_LKIF_%s.csv" % layers_string)
    ),
    ("YAGO",
     "",
     os.path.join(basedir_cl, "test_predictions_NER_ENTITY_LKIF_YAGO_%s.csv" % layers_string)
#      os.path.join(basedir_cl, "test_predictions_YAGO_%s.csv" % layers_string)
    )
]



In [76]:

    
accuracy_results = []

for iteration, batch, cl in results_files:
    batch_accuracy = 0
    cl_accuracy = 0
    pvalue = 0
    if batch != "":
        batch_results = pd.read_csv(batch)
        batch_accuracy = accuracy_score(batch_results.true, batch_results.prediction)
    
    if cl != "": # and iteration != "NER":
        cl_results = pd.read_csv(cl)
        cl_accuracy = accuracy_score(cl_results.true, cl_results.prediction)
    
        if batch != "":
            _, pvalue = ttest_rel(batch_results.prediction, cl_results.prediction)
    
    accuracy_results.append({
        'iteration': iteration,
        'batch_accuracy': batch_accuracy,
        'cl_accuracy': cl_accuracy,
        'pvalue': pvalue
    })

accuracy_results = pd.DataFrame(accuracy_results).rename_axis("Index", axis="columns")



In [77]:

    
accuracy_results









    Out[77]:






  
    
      Index
      batch_accuracy
      cl_accuracy
      iteration
      pvalue
    
  
  
    
      0
      0
      0.555128
      NER
      0
    
    
      1
      0
      0.535897
      ENTITY
      0
    
    
      2
      0
      0.730769
      LKIF
      0
    
    
      3
      0
      0.851282
      YAGO
      0



In [78]:

    
def read_predictions(filename, classes):
    results = pd.read_csv(filename)
    return results



In [79]:

    
results_df = []

for iteration, batch, cl in results_files:
    if batch != "":
        results = read_predictions(batch, classes[iteration])

        if iteration == 'NER':
            precision, recall, fscore, _ = precision_recall_fscore_support(
                results.true, results.prediction,
                labels=np.arange(classes[iteration][0].shape[0]),
                warn_for=()
            )

            prec_rec_fscore = pd.DataFrame()
            prec_rec_fscore['Class'] = classes[iteration][0]
            prec_rec_fscore['ClassCount'] = classes[iteration][1]
            prec_rec_fscore['Precision'] = precision
            prec_rec_fscore['Recall'] = recall
            prec_rec_fscore['Fscore'] = fscore
            prec_rec_fscore['Iteration'] = iteration
            prec_rec_fscore['Method'] = 'Batch Learning'

            results_df.append(prec_rec_fscore)

            continue

        precision, recall, fscore, _ = precision_recall_fscore_support(
            results.true, results.prediction,
            labels=np.arange(classes[iteration][0].shape[0] - 1),
            warn_for=()
        )

        prec_rec_fscore = pd.DataFrame()
        prec_rec_fscore['Class'] = classes[iteration][0][:-1]
        prec_rec_fscore['ClassCount'] = classes[iteration][1][:-1]
        prec_rec_fscore['Precision'] = precision
        prec_rec_fscore['Recall'] = recall
        prec_rec_fscore['Fscore'] = fscore
        prec_rec_fscore['Iteration'] = iteration
        prec_rec_fscore['Method'] = 'Batch Learning'

        results_df.append(prec_rec_fscore)
    
    if cl != "":
        results = read_predictions(cl, classes[iteration])

        precision, recall, fscore, _ = precision_recall_fscore_support(
            results.true, results.prediction,
            labels=np.arange(classes[iteration][0].shape[0] - 1),
            warn_for=()
        )

        prec_rec_fscore = pd.DataFrame()
        prec_rec_fscore['Class'] = classes[iteration][0][:-1]
        prec_rec_fscore['ClassCount'] = classes[iteration][1][:-1]
        prec_rec_fscore['Precision'] = precision
        prec_rec_fscore['Recall'] = recall
        prec_rec_fscore['Fscore'] = fscore
        prec_rec_fscore['Iteration'] = iteration
        prec_rec_fscore['Method'] = 'Curriculum Learning'

        results_df.append(prec_rec_fscore)

results_df = pd.concat(results_df)
results_df = pd.melt(results_df, id_vars=["Class", "ClassCount", "Iteration", "Method"],
                     value_vars=["Precision", "Fscore", "Recall"],
                     var_name="Metric", value_name="Value")

metric_map = {"Precision": 0, "Recall": 1, "Fscore": 2}
results_df['MetricRank'] = results_df['Metric'].map(metric_map)
results_df = results_df.sort_values(["Iteration", "Method", "MetricRank", "Class"]).reset_index(drop=True)

Print results as a latex table



In [ ]:

    
pivot_df = results_df[(results_df.Iteration == 'YAGO')]

pivot_df = pivot_df.pivot_table(index=['Class', 'Method'],
                                columns='Metric', values='Value')[['Precision', 'Recall', 'Fscore']].reset_index()

pivot_df = pivot_df.append(pivot_df[pivot_df.Method == 'Batch Learning'].mean(), ignore_index=True)
pivot_df = pivot_df.append(pivot_df[pivot_df.Method == 'Curriculum Learning'].mean(), ignore_index=True)
pivot_df = pivot_df[['Precision', 'Recall', 'Fscore']]
print(pivot_df.to_latex(index=False, float_format=lambda x: '%.2f' % x))



In [80]:

    
for iteration, idf in results_df.groupby("Iteration"):
    if iteration == "NER":
        continue
    
    plt.clf()

    ax = sns.boxplot(x='Metric', y='Value', hue='Method',
                     data=idf)
    ax.set_title('Precision/Recall/F1-Score for Iteration %s \nwithout considering O class) using %s'
                 % (iteration, features))
    ax.title.set_y(1.05)
    ax.figure.set_size_inches(7, 5)
    ax.figure.tight_layout(pad=1)
    ax.figure.savefig(os.path.join(outdir, 'prec_rec_fscore/%s_prec_rec_fscore.png' % (iteration, )))
    plt.show()



In [81]:

    
rdf_top_bottom = []

for (iteration, method), rdf in results_df.groupby(["Iteration", "Method"]):
    if iteration == 'NER':
        continue
    
    prec_rdf = rdf[rdf.Metric == 'Precision']
    rec_rdf = rdf[rdf.Metric == 'Recall']
    fscore_rdf = rdf[rdf.Metric == 'Fscore']

    to_take = np.ceil(prec_rdf.shape[0] * 0.2).astype(np.int32)

    bottom_prec = prec_rdf.sort_values('ClassCount', ascending=False)[:to_take].Value.mean()
    top_prec = prec_rdf.sort_values('ClassCount', ascending=True)[:to_take].Value.mean()
    bottom_rec = rec_rdf.sort_values('ClassCount', ascending=False)[:to_take].Value.mean()
    top_rec = rec_rdf.sort_values('ClassCount', ascending=True)[:to_take].Value.mean()
    bottom_fscore = fscore_rdf.sort_values('ClassCount', ascending=False)[:to_take].Value.mean()
    top_fscore = fscore_rdf.sort_values('ClassCount', ascending=True)[:to_take].Value.mean()

    rdf_top_bottom.append({
        'Iteration': iteration,
#         'Method': method,
        'Top 20% Precision Mean': top_prec,
        'Bottom 20% Precision Mean': bottom_prec,
        'Top 20% Recall Mean': top_rec,
        'Bottom 20% Recall Mean': bottom_rec,
        'Top 20% Fscore Mean': top_fscore,
        'Bottom 20% Fscore Mean': bottom_fscore,
    })

rdf_top_bottom = pd.DataFrame(rdf_top_bottom,
                              columns=['Iteration', 'Top 20% Precision Mean', 'Bottom 20% Precision Mean',
                                      'Top 20% Recall Mean', 'Bottom 20% Recall Mean', 'Top 20% Fscore Mean',
                                      'Bottom 20% Fscore Mean'])



In [82]:

    
rdf_top_bottom.rename_axis("Index", axis="columns")









    Out[82]:






  
    
      Index
      Iteration
      Top 20% Precision Mean
      Bottom 20% Precision Mean
      Top 20% Recall Mean
      Bottom 20% Recall Mean
      Top 20% Fscore Mean
      Bottom 20% Fscore Mean
    
  
  
    
      0
      ENTITY
      0.0
      0.727273
      0.0
      0.253968
      0.0
      0.376471
    
    
      1
      LKIF
      0.0
      0.100000
      0.0
      0.029412
      0.0
      0.045455
    
    
      2
      YAGO
      0.0
      0.025455
      0.0
      0.006829
      0.0
      0.010769



In [47]:

    
print(rdf_top_bottom.to_latex(index=False, float_format='%.2f'))









    



\begin{tabular}{lrrrrrr}
\toprule
Iteration &  Top 20\% Precision Mean &  Bottom 20\% Precision Mean &  Top 20\% Recall Mean &  Bottom 20\% Recall Mean &  Top 20\% Fscore Mean &  Bottom 20\% Fscore Mean \\
\midrule
   ENTITY &                     0.0 &                       0.43 &                  0.0 &                    0.19 &                  0.0 &                    0.26 \\
     LKIF &                     0.0 &                       0.00 &                  0.0 &                    0.00 &                  0.0 &                    0.00 \\
     YAGO &                     0.0 &                       0.05 &                  0.0 &                    0.01 &                  0.0 &                    0.02 \\
\bottomrule
\end{tabular}



In [83]:

    
confusion_matrices = {}
columns = {}

for iteration, batch, cl in results_files:
    if iteration == 'NER' or iteration == 'YAGO':
        continue
    
    classes_ = np.array([re.sub('^I-', '', cls) for cls in classes[iteration][0]])
    
    if batch != "":
        batch_df = read_predictions(batch, classes[iteration])
        confusion_matrices[('batch', iteration)] =\
            confusion_matrix(batch_df.true, batch_df.prediction, labels=np.arange(classes_.shape[0]))
        columns[('batch', iteration)] = classes_
    
    if cl != "":
        cl_df = read_predictions(cl, classes[iteration])
        confusion_matrices[('cl', iteration)] =\
            confusion_matrix(cl_df.true, cl_df.prediction, labels=np.arange(classes_.shape[0]))
        columns[('cl', iteration)] = classes_



In [84]:

    
for (method, iteration), cm in confusion_matrices.items():
    plt.clf()

    normalized_cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    normalized_cm = pd.DataFrame(normalized_cm, columns=columns[(method, iteration)],
                                 index=columns[(method, iteration)])

    ax = sns.heatmap(normalized_cm.reindex(index=normalized_cm.index[::-1]), vmin=0.0, vmax=1.0, annot=False,
                     fmt=".2f", linewidths=.5, cmap="Blues", cbar=False)
    method_name = 'Batch Learning' if method == 'batch' else 'Curriculum Learning'
#     ax.set_title("Confusion Matrix Normalized Heatmap\nfor %s and Iteration %s using %s"
#                  % (method_name, iteration, features))

    ax.tick_params(labelsize=17)
    plt.xticks(rotation=90)
    if iteration == 'ENTITY':
        plt.yticks(rotation=0)
    
    ax.title.set_y(1.05)
    
    if iteration == 'LKIF':
        ax.figure.set_size_inches(12, 12)
    else:
        ax.figure.set_size_inches(8, 8)
    
    ax.figure.tight_layout(pad=1.5)
    
    ax.figure.savefig(
        '%s/heatmaps/%s_%s_heatmaps.png' % (outdir, method, iteration))
    plt.show()









    



/home/mteruel/anaconda2/envs/env35/lib/python3.5/site-packages/ipykernel/__main__.py:4: RuntimeWarning: invalid value encountered in true_divide



In [85]:

    
def mapping(label):
    if counts[label] < 5:
        return "< 5"
    elif counts[label] < 15:
        return "< 15"
    elif counts[label] < 30:
        return "< 30"
    elif counts[label] < 50:
        return "< 50"
    elif counts[label] < 100:
        return "< 100"
    else:
        return "O"

vmapping = np.vectorize(mapping)
# bins = ["< 5", "< 15", "< 30", "< 50", "< 100", "< 200", "< 500",
#         "< 1000", "< 2000", "< 5000", "< 10000", "< 15000",
#         "< 30000", "< 50000", "< 1000000", "O"]
bins = ["< 5", "< 15", "< 30", "< 50", "< 100"]

_, batch, cl = results_files[-1]

for method, method_file in [('batch', batch), ('cl', cl)]:
    if method_file != "":
        df = pd.read_csv(method_file)

        counts = {}
        for label_index in range(classes['YAGO'][0].shape[0]):
            counts[label_index] = (df.true == label_index).sum()

        true_mapped = vmapping(df.true.values)
        prediction_mapped = vmapping(df.prediction.values)

        cm = confusion_matrix(true_mapped, prediction_mapped, labels=bins)

        plt.clf()
    
        normalized_cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        normalized_cm = pd.DataFrame(normalized_cm, columns=bins,
                                     index=bins)

        ax = sns.heatmap(normalized_cm.reindex(index=normalized_cm.index[::-1]), vmin=0.0, vmax=1.0, annot=False,
                         fmt=".2f", linewidths=.5, cmap="Blues", cbar=False)
        method_name = 'Batch Learning' if method == 'batch' else 'Curriculum Learning'
    #     ax.set_title("Confusion Matrix Normalized Heatmap\nfor %s and Iteration YAGO using %s"
    #                  % (method_name, features))

        ax.tick_params(labelsize=17)
        plt.xticks(rotation=90)

        ax.title.set_y(1.05)

        ax.set_xlabel('Bins (number of occurrences)')

        ax.figure.set_size_inches(8, 8)

        ax.figure.tight_layout(pad=1.5)

        ax.figure.savefig(
            '%s/heatmaps/%s_YAGO_heatmaps.png' % (outdir, method))









    



/home/mteruel/anaconda2/envs/env35/lib/python3.5/site-packages/ipykernel/__main__.py:38: RuntimeWarning: invalid value encountered in true_divide

Get average precision and recall



In [86]:

    
averages = ['micro', 'macro', 'weighted']
levels = [(iteration[0], average) for iteration in results_files for average in averages]
index = pd.MultiIndex.from_tuples(levels, names=['Task', 'Average type'])

batch_metrics = pd.DataFrame(0, index=index, columns=['Precision', 'Recall', 'F1 Score'])
cl_metrics = pd.DataFrame(0, index=index, columns=['Precision', 'Recall', 'F1 Score'])
for iteration, batch, cl in results_files:
    if batch != "":
        results = read_predictions(batch, classes[iteration])
        for average in averages:
            values = precision_recall_fscore_support(
                results.true, results.prediction,
                average=average, warn_for=()
            )[:3]
            print(values)

            batch_metrics.loc[iteration, average] = values
    if cl != "":
        results = read_predictions(cl, classes[iteration])
        for average in averages:
            values = precision_recall_fscore_support(
                results.true, results.prediction,
                average=average, warn_for=()
            )[:3]

            cl_metrics.loc[iteration, average] = values



In [87]:

    
cl_metrics.rename_axis("Index", axis="columns")









    Out[87]:






  
    
      
      Index
      Precision
      Recall
      F1 Score
    
    
      Task
      Average type
      
      
      
    
  
  
    
      NER
      micro
      0.555128
      0.555128
      0.555128
    
    
      macro
      0.707345
      0.561592
      0.464420
    
    
      weighted
      0.710138
      0.555128
      0.461029
    
    
      ENTITY
      micro
      0.535897
      0.535897
      0.535897
    
    
      macro
      0.431442
      0.261624
      0.261561
    
    
      weighted
      0.435497
      0.535897
      0.413283
    
    
      LKIF
      micro
      0.730769
      0.730769
      0.730769
    
    
      macro
      0.209695
      0.125588
      0.135228
    
    
      weighted
      0.670849
      0.730769
      0.670014
    
    
      YAGO
      micro
      0.851282
      0.851282
      0.851282
    
    
      macro
      0.107797
      0.130863
      0.094817
    
    
      weighted
      0.840842
      0.851282
      0.838199



In [ ]:

Index	cl_accuracy	iteration
0	0.555128	NER
1	0.535897	ENTITY
2	0.730769	LKIF
3	0.851282	YAGO

Index	Iteration	Bottom 20% Precision Mean	Bottom 20% Recall Mean	Bottom 20% Fscore Mean
0	ENTITY	0.727273	0.253968	0.376471
1	LKIF	0.100000	0.029412	0.045455
2	YAGO	0.025455	0.006829	0.010769

	Index	Precision	Recall	F1 Score
Task	Average type
NER	micro	0.555128	0.555128	0.555128
	macro	0.707345	0.561592	0.464420
	weighted	0.710138	0.555128	0.461029
ENTITY	micro	0.535897	0.535897	0.535897
	macro	0.431442	0.261624	0.261561
	weighted	0.435497	0.535897	0.413283
LKIF	micro	0.730769	0.730769	0.730769
	macro	0.209695	0.125588	0.135228
	weighted	0.670849	0.730769	0.670014
YAGO	micro	0.851282	0.851282	0.851282
	macro	0.107797	0.130863	0.094817
	weighted	0.840842	0.851282	0.838199