In [1]:
# This model is used for wordsim evaluation for results in the paper Multimodal Word Distributions

In [2]:
%matplotlib inline
from word2gm_loader import Word2GM
from quantitative_eval import *

In [3]:
model_dir = 'modelfiles/w2gm-k2-d50'

In [4]:
w2gm_2s = Word2GM(model_dir)


('Using the latest checkpoint file', u'modelfiles/w2gm-k2-d50/model.ckpt-481392835')
('Number of mixtures = ', 2)

In [5]:
w2gm_2s.show_nearest_neighbors('rock', 0)
w2gm_2s.show_nearest_neighbors('rock', 1)


<ggplot: (8787654437793)>
Top 10 highest similarity
['rock:0', 'basalt:1', 'boulder:1', 'boulders:0', 'stalagmites:0', 'stalactites:0', 'rocks:1', 'sand:0', 'quartzite:1', 'bedrock:0']
Top 10 lowest variance of top 20 highest similarity
['breccia:0', 'gneiss:0', 'tuff:0', 'outcropping:1', 'quartzite:1', 'stalagmites:0', 'stalactites:0', 'eroded:0', 'sedimentary:1', 'basalt:1', 'bedrock:0', 'boulders:0', 'boulder:1', 'pebbles:1', 'rocks:1', 'cave:0', 'caves:1', 'limestone:0', 'rock:0', 'sand:0']
<ggplot: (8787587078329)>
Top 10 highest similarity
['rock:1', 'rock/:1', 'ska:0', 'funk:1', 'pop-rock:1', 'punk:1', 'indie-rock:0', 'band:0', 'indie:0', 'pop:1']
Top 10 lowest variance of top 20 highest similarity
['blues-rock:0', 'psychobilly:0', 'indie-rock:0', 'pop-rock:1', 'pop/:1', 'folk-rock:1', 'psychedelia:0', 'rockabilly:1', 'five-piece:1', 'rock/:1', 'electronica:0', 'ska:0', 'reggae:0', 'techno:1', 'funk:1', 'punk:1', 'indie:0', 'rock:1', 'pop:1', 'band:0']

In [6]:
quantitative_eval(model_names=[('w2gm-2s-k50', model_dir)])


Out[6]:
Dataset w2gm-2s-k50/max w2gm-2s-k50/dis
0 SL 29.310622 26.017090
1 WS 73.469769 62.846685
2 WS-S 76.729902 70.077696
3 WS-R 71.748623 57.983979
4 MEN 73.553575 68.499809
5 MC 79.083224 76.746775
6 RG 74.511261 71.549869
7 YP 45.074084 39.178062
8 MT-287 66.596475 57.241792
9 MT-771 60.817616 57.258203
10 RW 28.618603 31.641952

In [7]:
# the number is slightly different from the paper result for SCWS which uses window size = 5
quantitative_scws_df(model_dir)


Out[7]:
method spearman
0 SCWS_maxdot 62.136451