In [1]:
%matplotlib inline
from word2gm_loader import Word2GM
from quantitative_eval import *
In [2]:
text8_model_dir = 'modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10'
In [3]:
w2gm_text8_2s = Word2GM(text8_model_dir)
('Using the latest checkpoint file', u'modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-4792139')
('Number of mixtures = ', 2)
In [4]:
w2gm_text8_2s.visualize_embeddings()
The directory already exists!
WARNING:tensorflow:From word2gm_loader.py:373 in visualize_embeddings.: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Use `tf.global_variables_initializer` instead.
In [5]:
w2gm_text8_2s.show_nearest_neighbors('rock', 0)
w2gm_text8_2s.show_nearest_neighbors('rock', 1)
<ggplot: (8743742896109)>
Top 10 highest similarity
['rock:0', 'limestone:1', 'basalt:1', 'rocks:0', 'metamorphic:1', 'sedimentary:1', 'granite:1', 'boulders:1', 'lava:0', 'weathering:1']
Top 10 lowest variance of top 20 highest similarity
['masonry:0', 'limestones:1', 'weathering:1', 'sediments:0', 'basalt:1', 'silt:0', 'cools:0', 'ridges:1', 'sedimentary:1', 'boulders:1', 'lava:0', 'felsic:0', 'limestone:1', 'sediment:0', 'drilled:0', 'deposited:1', 'metamorphic:1', 'granite:1', 'rocks:0', 'rock:0']
<ggplot: (8743786016425)>
Top 10 highest similarity
['rock:1', 'pop:0', 'bands:1', 'rap:0', 'hardcore:0', 'band:1', 'disco:0', 'funk:0', 'dj:0', 'jazz:1']
Top 10 lowest variance of top 20 highest similarity
['punk:0', 'songwriters:0', 'funk:0', 'hardcore:0', 'vocalists:1', 'rap:0', 'hip:1', 'disco:0', 'dj:0', 'singers:1', 'musicians:1', 'digweed:0', 'beastie:0', 'jazz:1', 'bands:1', 'pop:0', 'album:1', 'rock:1', 'band:1', 'musical:0']
In [6]:
quantitative_eval(model_names=[('text8', text8_model_dir)])
Out[6]:
Dataset
text8/max
text8/dis
0
SL
18.891734
9.837838
1
WS
60.848186
39.015669
2
WS-S
65.713851
40.652039
3
WS-R
59.955224
42.891791
4
MEN
59.281383
39.223864
5
MC
55.585226
39.096574
6
RG
51.628766
34.151086
7
YP
31.158988
22.942625
8
MT-287
62.346112
57.716732
9
MT-771
50.059093
37.151162
10
RW
9.831788
-1.538599
In [7]:
quantitative_scws_df(text8_model_dir)
Out[7]:
method
spearman
0
SCWS_maxdot
42.729328
In [8]:
# print best AP and best F1
calculate_entailment(text8_model_dir)
/home/ben/anaconda2/lib/python2.7/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
'precision', 'predicted', average, warn_for)
Out[8]:
(0.6808664259927798, 0.71251798561151081)
In [9]:
quantitative_eval_over_time(text8_model_dir)
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-3609
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-382401
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-482817
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-861529
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-962257
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-1339840
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-1441452
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-1821448
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-1920537
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-2303397
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-2399871
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-2786525
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-2879567
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-3223248
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-3358382
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-3737020
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-3837623
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-4225870
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-4316927
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-4695804
/home/ben/research/word2gm//modelfiles/t8-2s-e10-v05-lr05d-mc100-ss5-nwout-adg-win10/model.ckpt-4792139
Out[9]:
(<ggplot: (8743743209897)>, x scores
0 3609 -10.726453
1 382401 251.778523
2 482817 312.076270
3 861529 384.000869
4 962257 408.043479
5 1339840 437.679356
6 1441452 453.091748
7 1821448 474.778770
8 1920537 483.694488
9 2303397 489.600711
10 2399871 496.093813
11 2786525 504.517808
12 2879567 509.520904
13 3223248 512.741674
14 3358382 516.898310
15 3737020 516.366329
16 3837623 518.469980
17 4225870 522.224319
18 4316927 521.674650
19 4695804 524.721928
20 4792139 525.300352)
In [10]:
quanteval_plot_ind(text8_model_dir)
Out[10]:
(<ggplot: (8743857298401)>, x scores dataset
0 3609 -0.590109 SL
1 3609 5.939706 WS
2 3609 3.905465 WS-S
3 3609 11.968151 WS-R
4 3609 1.889171 MEN
5 3609 -17.467735 MC
6 3609 -8.044935 RG
7 3609 -17.746086 YP
8 3609 6.459613 MT-287
9 3609 0.059196 MT-771
10 3609 2.901111 RW
11 3609 21.890397 SCWS_maxdot
12 3609 -0.975132 AVERAGE
13 382401 9.856093 SL
14 382401 35.317139 WS
15 382401 38.131721 WS-S
16 382401 35.509333 WS-R
17 382401 29.054848 MEN
18 382401 16.555408 MC
19 382401 15.309635 RG
20 382401 -4.427563 YP
21 382401 42.076081 MT-287
22 382401 28.672174 MT-771
23 382401 5.723654 RW
24 382401 34.599566 SCWS_maxdot
25 382401 22.888957 AVERAGE
26 482817 12.253120 SL
27 482817 41.991597 WS
28 482817 45.429812 WS-S
29 482817 40.474108 WS-R
.. ... ... ...
243 4316927 49.690723 MT-771
244 4316927 9.440445 RW
245 4316927 42.392824 SCWS_maxdot
246 4316927 47.424968 AVERAGE
247 4695804 18.690433 SL
248 4695804 59.946487 WS
249 4695804 65.249716 WS-S
250 4695804 58.871779 WS-R
251 4695804 59.262845 MEN
252 4695804 55.696486 MC
253 4695804 53.495208 RG
254 4695804 31.160083 YP
255 4695804 62.028293 MT-287
256 4695804 50.072635 MT-771
257 4695804 10.247963 RW
258 4695804 42.548652 SCWS_maxdot
259 4695804 47.701993 AVERAGE
260 4792139 18.891734 SL
261 4792139 60.848186 WS
262 4792139 65.713851 WS-S
263 4792139 59.955224 WS-R
264 4792139 59.281383 MEN
265 4792139 55.585226 MC
266 4792139 51.628766 RG
267 4792139 31.158988 YP
268 4792139 62.346112 MT-287
269 4792139 50.059093 MT-771
270 4792139 9.831788 RW
271 4792139 42.729328 SCWS_maxdot
272 4792139 47.754577 AVERAGE
[273 rows x 3 columns])
In [ ]:
Content source: benathi/word2gm
Similar notebooks: