In [1]:
import json
from scipy import stats
import pandas as pd

In [2]:
t_test_folder = '../output/intrusion/'
eve_model = "eve"
methods = [eve_model,"word2vec_sg","word2vec_cbow","fasttext_cbow","fasttext_sg","glove"]
dataset_ids = ["animal_classes", "european_cities", "movie_genres", "cuisine", "music_genres", "nobel_laureates",
               "country_continent"]

In [3]:
def standard_t_test(a, b):
    t, pvalue = stats.ttest_ind(a, b)
    return t, pvalue

def pair_t_test(a, b):
    t, pvalue = stats.ttest_rel(a, b)
    return t, pvalue

def load_items(filepath):
    print("Loading %s ..." % filepath)
    return json.load(open(filepath))

In [4]:
results = dict()
for dataset_id in dataset_ids:
    # Load the data
    print()
    results[dataset_id] =load_items(t_test_folder + "results-for-tests-%s.json" % dataset_id )


Loading ../output/intrusion/results-for-tests-animal_classes.json ...

Loading ../output/intrusion/results-for-tests-european_cities.json ...

Loading ../output/intrusion/results-for-tests-movie_genres.json ...

Loading ../output/intrusion/results-for-tests-cuisine.json ...

Loading ../output/intrusion/results-for-tests-music_genres.json ...

Loading ../output/intrusion/results-for-tests-nobel_laureates.json ...

Loading ../output/intrusion/results-for-tests-country_continent.json ...

In [5]:
distribution = dict()
distribution['all'] = dict()
for method in methods:
    distribution['all'][method] = list()
for dataset_id in dataset_ids:
    print('Processing', dataset_id)
    distribution[dataset_id] = dict()
    for method in methods:
        distribution[dataset_id][method] = list(zip(*results[dataset_id][method]))[1]
        distribution['all'][method] += distribution[dataset_id][method]


Processing animal_classes
Processing european_cities
Processing movie_genres
Processing cuisine
Processing music_genres
Processing nobel_laureates
Processing country_continent

In [6]:
result_cols = ["Dataset","Method","Standard t-stats","Standard p-value","Pairwise t-stats", "Pairwise p-value"]
result_rows = list()
for dataset_id in dataset_ids:
    for i in range(len(methods)):
        for j in range(i+1, len(methods)):
            dist_a = distribution[dataset_id][methods[i]]
            dist_b = distribution[dataset_id][methods[j]]
            s_t, s_pvalue = standard_t_test(dist_a, dist_b)
            p_t, p_pvalue = pair_t_test(dist_a, dist_b)
            if methods[i] == eve_model or methods[j] == eve_model:
                result_rows.append([dataset_id, methods[i] + ', ' + methods[j], s_t, s_pvalue, p_t, p_pvalue])

for i in range(len(methods)):
    for j in range(i+1, len(methods)):
        dist_a = distribution['all'][methods[i]]
        dist_b = distribution['all'][methods[j]]
        s_t, s_pvalue = standard_t_test(dist_a, dist_b)
        p_t, p_pvalue = pair_t_test(dist_a, dist_b)
        if methods[i] == eve_model or methods[j] == eve_model:
            result_rows.append(['all', methods[i] + ', ' + methods[j], s_t, s_pvalue, p_t, p_pvalue])
        
print('preparing dataframe')
df_results = pd.DataFrame(result_rows, columns=result_cols)
df_results


preparing dataframe
Out[6]:
Dataset Method Standard t-stats Standard p-value Pairwise t-stats Pairwise p-value
0 animal_classes eve, word2vec_sg 745.839799 0.000000e+00 1433.278113 0.0
1 animal_classes eve, word2vec_cbow 824.137907 0.000000e+00 1513.655448 0.0
2 animal_classes eve, fasttext_cbow 880.544687 0.000000e+00 1544.174976 0.0
3 animal_classes eve, fasttext_sg 711.328942 0.000000e+00 1405.219611 0.0
4 animal_classes eve, glove 1010.348198 0.000000e+00 1666.247292 0.0
5 european_cities eve, word2vec_sg -214.663573 0.000000e+00 372.932230 0.0
6 european_cities eve, word2vec_cbow 69.707905 0.000000e+00 515.288276 0.0
7 european_cities eve, fasttext_cbow 117.705245 0.000000e+00 538.114822 0.0
8 european_cities eve, fasttext_sg -277.870906 0.000000e+00 353.113874 0.0
9 european_cities eve, glove 572.433342 0.000000e+00 880.064324 0.0
10 movie_genres eve, word2vec_sg 1039.739005 0.000000e+00 1742.296929 0.0
11 movie_genres eve, word2vec_cbow 1084.305636 0.000000e+00 1767.025142 0.0
12 movie_genres eve, fasttext_cbow 1112.240159 0.000000e+00 1748.746076 0.0
13 movie_genres eve, fasttext_sg 1031.625630 0.000000e+00 1768.306838 0.0
14 movie_genres eve, glove 1128.827718 0.000000e+00 1822.010267 0.0
15 cuisine eve, word2vec_sg 1424.712360 0.000000e+00 1559.613643 0.0
16 cuisine eve, word2vec_cbow 1729.702412 0.000000e+00 1897.811663 0.0
17 cuisine eve, fasttext_cbow 959.293921 0.000000e+00 1109.979776 0.0
18 cuisine eve, fasttext_sg 668.556999 0.000000e+00 845.330451 0.0
19 cuisine eve, glove 2153.260540 0.000000e+00 2312.151424 0.0
20 music_genres eve, word2vec_sg 650.313825 0.000000e+00 1019.158624 0.0
21 music_genres eve, word2vec_cbow 724.420815 0.000000e+00 1076.072866 0.0
22 music_genres eve, fasttext_cbow 851.829198 0.000000e+00 1225.514655 0.0
23 music_genres eve, fasttext_sg 681.951611 0.000000e+00 1061.649076 0.0
24 music_genres eve, glove 1138.395665 0.000000e+00 1488.631134 0.0
25 nobel_laureates eve, word2vec_sg 1674.353556 0.000000e+00 1884.428873 0.0
26 nobel_laureates eve, word2vec_cbow 1654.112860 0.000000e+00 1876.283311 0.0
27 nobel_laureates eve, fasttext_cbow 1889.836697 0.000000e+00 2190.112763 0.0
28 nobel_laureates eve, fasttext_sg 1708.292641 0.000000e+00 1926.006656 0.0
29 nobel_laureates eve, glove 1846.478086 0.000000e+00 2083.799836 0.0
30 country_continent eve, word2vec_sg 99.902263 0.000000e+00 982.471062 0.0
31 country_continent eve, word2vec_cbow -26.816366 2.154477e-158 950.010509 0.0
32 country_continent eve, fasttext_cbow -90.919963 0.000000e+00 910.637297 0.0
33 country_continent eve, fasttext_sg -112.081153 0.000000e+00 851.114390 0.0
34 country_continent eve, glove 28.914447 8.229161e-184 972.486590 0.0
35 all eve, word2vec_sg 1940.408876 0.000000e+00 3263.107109 0.0
36 all eve, word2vec_cbow 2083.233501 0.000000e+00 3446.798533 0.0
37 all eve, fasttext_cbow 1962.754930 0.000000e+00 3315.088524 0.0
38 all eve, fasttext_sg 1604.260811 0.000000e+00 2962.698271 0.0
39 all eve, glove 2631.368145 0.000000e+00 4008.887070 0.0

In [7]:
df_results.to_csv("intrusion_significance.csv")