In [1]:
%matplotlib inline
from word2gm_loader import Word2GM
from quantitative_eval import *

In [2]:
text8_model_dir = 'modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10'

In [3]:
w2gm_text8_2s = Word2GM(text8_model_dir)


('Using the latest checkpoint file', u'modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-4792139')
('Number of mixtures = ', 2)

In [4]:
w2gm_text8_2s.visualize_embeddings()


The directory already exists!
WARNING:tensorflow:From word2gm_loader.py:373 in visualize_embeddings.: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Use `tf.global_variables_initializer` instead.

In [5]:
w2gm_text8_2s.show_nearest_neighbors('rock', 0)
w2gm_text8_2s.show_nearest_neighbors('rock', 1)


<ggplot: (8743742896109)>
Top 10 highest similarity
['rock:0', 'limestone:1', 'basalt:1', 'rocks:0', 'metamorphic:1', 'sedimentary:1', 'granite:1', 'boulders:1', 'lava:0', 'weathering:1']
Top 10 lowest variance of top 20 highest similarity
['masonry:0', 'limestones:1', 'weathering:1', 'sediments:0', 'basalt:1', 'silt:0', 'cools:0', 'ridges:1', 'sedimentary:1', 'boulders:1', 'lava:0', 'felsic:0', 'limestone:1', 'sediment:0', 'drilled:0', 'deposited:1', 'metamorphic:1', 'granite:1', 'rocks:0', 'rock:0']
<ggplot: (8743786016425)>
Top 10 highest similarity
['rock:1', 'pop:0', 'bands:1', 'rap:0', 'hardcore:0', 'band:1', 'disco:0', 'funk:0', 'dj:0', 'jazz:1']
Top 10 lowest variance of top 20 highest similarity
['punk:0', 'songwriters:0', 'funk:0', 'hardcore:0', 'vocalists:1', 'rap:0', 'hip:1', 'disco:0', 'dj:0', 'singers:1', 'musicians:1', 'digweed:0', 'beastie:0', 'jazz:1', 'bands:1', 'pop:0', 'album:1', 'rock:1', 'band:1', 'musical:0']

In [6]:
quantitative_eval(model_names=[('text8', text8_model_dir)])


Out[6]:
Dataset text8/max text8/dis
0 SL 18.891734 9.837838
1 WS 60.848186 39.015669
2 WS-S 65.713851 40.652039
3 WS-R 59.955224 42.891791
4 MEN 59.281383 39.223864
5 MC 55.585226 39.096574
6 RG 51.628766 34.151086
7 YP 31.158988 22.942625
8 MT-287 62.346112 57.716732
9 MT-771 50.059093 37.151162
10 RW 9.831788 -1.538599

In [7]:
quantitative_scws_df(text8_model_dir)


Out[7]:
method spearman
0 SCWS_maxdot 42.729328

In [8]:
# print best AP and best F1
calculate_entailment(text8_model_dir)


/home/ben/anaconda2/lib/python2.7/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
Out[8]:
(0.6808664259927798, 0.71251798561151081)

In [9]:
quantitative_eval_over_time(text8_model_dir)


/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-3609
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-382401
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-482817
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-861529
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-962257
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-1339840
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-1441452
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-1821448
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-1920537
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-2303397
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-2399871
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-2786525
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-2879567
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-3223248
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-3358382
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-3737020
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-3837623
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-4225870
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-4316927
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-4695804
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-4792139
Out[9]:
(<ggplot: (8743743209897)>,           x      scores
 0      3609  -10.726453
 1    382401  251.778523
 2    482817  312.076270
 3    861529  384.000869
 4    962257  408.043479
 5   1339840  437.679356
 6   1441452  453.091748
 7   1821448  474.778770
 8   1920537  483.694488
 9   2303397  489.600711
 10  2399871  496.093813
 11  2786525  504.517808
 12  2879567  509.520904
 13  3223248  512.741674
 14  3358382  516.898310
 15  3737020  516.366329
 16  3837623  518.469980
 17  4225870  522.224319
 18  4316927  521.674650
 19  4695804  524.721928
 20  4792139  525.300352)

In [10]:
quanteval_plot_ind(text8_model_dir)


Out[10]:
(<ggplot: (8743857298401)>,            x     scores      dataset
 0       3609  -0.590109           SL
 1       3609   5.939706           WS
 2       3609   3.905465         WS-S
 3       3609  11.968151         WS-R
 4       3609   1.889171          MEN
 5       3609 -17.467735           MC
 6       3609  -8.044935           RG
 7       3609 -17.746086           YP
 8       3609   6.459613       MT-287
 9       3609   0.059196       MT-771
 10      3609   2.901111           RW
 11      3609  21.890397  SCWS_maxdot
 12      3609  -0.975132      AVERAGE
 13    382401   9.856093           SL
 14    382401  35.317139           WS
 15    382401  38.131721         WS-S
 16    382401  35.509333         WS-R
 17    382401  29.054848          MEN
 18    382401  16.555408           MC
 19    382401  15.309635           RG
 20    382401  -4.427563           YP
 21    382401  42.076081       MT-287
 22    382401  28.672174       MT-771
 23    382401   5.723654           RW
 24    382401  34.599566  SCWS_maxdot
 25    382401  22.888957      AVERAGE
 26    482817  12.253120           SL
 27    482817  41.991597           WS
 28    482817  45.429812         WS-S
 29    482817  40.474108         WS-R
 ..       ...        ...          ...
 243  4316927  49.690723       MT-771
 244  4316927   9.440445           RW
 245  4316927  42.392824  SCWS_maxdot
 246  4316927  47.424968      AVERAGE
 247  4695804  18.690433           SL
 248  4695804  59.946487           WS
 249  4695804  65.249716         WS-S
 250  4695804  58.871779         WS-R
 251  4695804  59.262845          MEN
 252  4695804  55.696486           MC
 253  4695804  53.495208           RG
 254  4695804  31.160083           YP
 255  4695804  62.028293       MT-287
 256  4695804  50.072635       MT-771
 257  4695804  10.247963           RW
 258  4695804  42.548652  SCWS_maxdot
 259  4695804  47.701993      AVERAGE
 260  4792139  18.891734           SL
 261  4792139  60.848186           WS
 262  4792139  65.713851         WS-S
 263  4792139  59.955224         WS-R
 264  4792139  59.281383          MEN
 265  4792139  55.585226           MC
 266  4792139  51.628766           RG
 267  4792139  31.158988           YP
 268  4792139  62.346112       MT-287
 269  4792139  50.059093       MT-771
 270  4792139   9.831788           RW
 271  4792139  42.729328  SCWS_maxdot
 272  4792139  47.754577      AVERAGE
 
 [273 rows x 3 columns])

In [ ]: