Experiments with Similarity Encoders

...to show that SimEc can preserve non-metric similarities

In this iPython Notebook are some examples to illustrate the potential of Similarity Encoders (SimEc) for creating similarity preserving embeddings. For further details and theoretical background on this new neural network architecture, please refer to the corresponding paper.


In [1]:
from __future__ import unicode_literals, division, print_function, absolute_import
from builtins import range, str
from glob import glob
import numpy as np
np.random.seed(28)
import matplotlib.pyplot as plt
from unidecode import unidecode
from scipy.spatial.distance import pdist, squareform
from sklearn.decomposition import PCA, KernelPCA
from sklearn.linear_model import Ridge
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
tf.set_random_seed(28)
import keras

# find nlputils at https://github.com/cod3licious/nlputils
from nlputils.dict_utils import invert_dict2, select_copy
from nlputils.features import FeatureTransform, features2mat
from nlputils.simmat import compute_K

from simec import SimilarityEncoder
from utils import center_K, check_embed_match, check_similarity_match
from utils_plotting import get_colors, plot_mnist, plot_mnist2, plot_words

%matplotlib inline
%load_ext autoreload
%autoreload 2
# set this to True if you want to save the figures from the paper
savefigs = False


Using TensorFlow backend.

Proof of Concept

with toy data taken from http://www.jmlr.org/papers/volume5/laub04a/laub04a.pdf


In [2]:
# feature 1: clusters 1:4 and 5:8
D1 = np.array([[0.00, 2.36, 2.59, 1.78, 4.74, 4.82, 4.98, 4.72],
               [2.36, 0.00, 2.39, 1.60, 4.98, 5.06, 5.22, 4.96],
               [2.59, 2.39, 0.00, 2.09, 5.29, 5.37, 5.53, 5.27],
               [1.78, 1.60, 2.09, 0.00, 5.08, 5.16, 5.32, 5.06],
               [4.74, 4.98, 5.29, 5.08, 0.00, 1.20, 1.82, 1.62],
               [4.82, 5.06, 5.37, 5.16, 1.20, 0.00, 2.98, 1.78],
               [4.98, 5.22, 5.53, 5.32, 1.82, 2.98, 0.00, 2.02],
               [4.72, 4.96, 5.27, 5.06, 1.62, 1.78, 2.02, 0.00]])
# feature 2: clusters evens & odds
D2 = np.array([[0.00, 4.15, 2.03, 4.14, 1.26, 4.33, 0.69, 4.85],
               [4.15, 0.00, 4.70, 0.57, 4.37, 1.82, 4.24, 2.02],
               [2.03, 4.70, 0.00, 4.69, 1.85, 4.88, 1.68, 5.40],
               [4.14, 0.57, 4.69, 0.00, 4.36, 1.83, 4.23, 2.67],
               [1.26, 4.37, 1.85, 4.36, 0.00, 4.55, 0.73, 5.07],
               [4.33, 1.82, 4.88, 1.83, 4.55, 0.00, 4.42, 2.14],
               [0.69, 4.24, 1.68, 4.23, 0.73, 4.42, 0.00, 4.94],
               [4.85, 2.02, 5.40, 2.67, 5.07, 2.14, 4.94, 0.00]])
# combined dissimilarity matrix: D1-D2 and transform into similarities
S = -0.5*(D1-D2)

In [3]:
# center
S = center_K(S)
# check out the eigenvalue spectrum - we've got some significant negative eigenvalues!!
eigenvals = np.linalg.eigvalsh(S)[::-1]
print(eigenvals)
plt.figure();
plt.plot(list(range(1, S.shape[0]+1)), eigenvals, '-o', markersize=3);
plt.plot([1, S.shape[0]],[0,0], 'k--', linewidth=0.5);
plt.xlim(0.9, S.shape[0]+0.1);
plt.title('Eigenvalue Spectrum of S');
if savefigs: plt.savefig('fig_nonmetric_toy_evspec.pdf', dpi=300)


[  6.13892284e+00   9.31076445e-01   6.03175455e-01   2.37120876e-01
   8.24275234e-16  -1.86109044e-01  -3.81796102e-01  -5.69489047e+00]

In [4]:
# compute embedding based on eigenvalues and -vectors
D, V = np.linalg.eig(S)
D, V = D[np.argsort(D)[::-1]], V[:,np.argsort(D)[::-1]]
X_embed = np.dot(V.real, np.diag(np.sqrt(np.abs(D.real))))

In [5]:
def plot_8(X, labels):
    x_min, x_max = np.min(X, 0), np.max(X, 0)
    X = (X - x_min) / (x_max - x_min)
    plt.figure()
    for i, l in enumerate(labels):
        plt.text(X[i, 0], X[i, 1], str(l),
                 color='k', fontdict={'weight': 'medium', 'size': 16})
    plt.xticks([]), plt.yticks([])
    plt.xlim(-0.15, 1.15)
    plt.ylim(-0.15, 1.15)

In [6]:
plot_8(X_embed[:,:2], list(range(1, 9)))
plt.title('Largest components');
if savefigs: plt.savefig('fig_nonmetric_toy_largest.pdf', dpi=300)



In [7]:
plot_8(X_embed[:,-2:], list(range(1, 9)))
plt.title('Smallest components');
if savefigs: plt.savefig('fig_nonmetric_toy_smallest.pdf', dpi=300)



In [8]:
plot_8(X_embed[:,[0,-1]], list(range(1, 9)))
plt.title('most extreme components');



In [9]:
# data: 1 hot encoding
X = np.zeros((8,8))
np.fill_diagonal(X, 1)
# embed with simec
simec = SimilarityEncoder(X.shape[1], 2, S.shape[1], opt=keras.optimizers.Adamax(lr=0.05))
simec.fit(X, S, epochs=70)
X_embed_se = simec.transform(X)


Epoch 1/70
8/8 [==============================] - 0s 60ms/step - loss: 1.1639
Epoch 2/70
8/8 [==============================] - 0s 495us/step - loss: 1.1263
Epoch 3/70
8/8 [==============================] - 0s 434us/step - loss: 1.1055
Epoch 4/70
8/8 [==============================] - 0s 418us/step - loss: 1.0837
Epoch 5/70
8/8 [==============================] - 0s 465us/step - loss: 1.0605
Epoch 6/70
8/8 [==============================] - 0s 291us/step - loss: 1.0352
Epoch 7/70
8/8 [==============================] - 0s 290us/step - loss: 1.0071
Epoch 8/70
8/8 [==============================] - 0s 282us/step - loss: 0.9726
Epoch 9/70
8/8 [==============================] - 0s 402us/step - loss: 0.9348
Epoch 10/70
8/8 [==============================] - 0s 360us/step - loss: 0.8962
Epoch 11/70
8/8 [==============================] - 0s 381us/step - loss: 0.8515
Epoch 12/70
8/8 [==============================] - 0s 320us/step - loss: 0.8057
Epoch 13/70
8/8 [==============================] - 0s 359us/step - loss: 0.7568
Epoch 14/70
8/8 [==============================] - 0s 347us/step - loss: 0.7056
Epoch 15/70
8/8 [==============================] - 0s 429us/step - loss: 0.6527
Epoch 16/70
8/8 [==============================] - 0s 363us/step - loss: 0.5990
Epoch 17/70
8/8 [==============================] - 0s 288us/step - loss: 0.5454
Epoch 18/70
8/8 [==============================] - 0s 231us/step - loss: 0.4927
Epoch 19/70
8/8 [==============================] - 0s 393us/step - loss: 0.4416
Epoch 20/70
8/8 [==============================] - 0s 756us/step - loss: 0.3929
Epoch 21/70
8/8 [==============================] - 0s 334us/step - loss: 0.3470
Epoch 22/70
8/8 [==============================] - 0s 546us/step - loss: 0.3041
Epoch 23/70
8/8 [==============================] - 0s 461us/step - loss: 0.2648
Epoch 24/70
8/8 [==============================] - 0s 373us/step - loss: 0.2294
Epoch 25/70
8/8 [==============================] - 0s 785us/step - loss: 0.1983
Epoch 26/70
8/8 [==============================] - 0s 374us/step - loss: 0.1717
Epoch 27/70
8/8 [==============================] - 0s 315us/step - loss: 0.1493
Epoch 28/70
8/8 [==============================] - 0s 325us/step - loss: 0.1309
Epoch 29/70
8/8 [==============================] - 0s 306us/step - loss: 0.1161
Epoch 30/70
8/8 [==============================] - 0s 434us/step - loss: 0.1042
Epoch 31/70
8/8 [==============================] - 0s 285us/step - loss: 0.0945
Epoch 32/70
8/8 [==============================] - 0s 241us/step - loss: 0.0864
Epoch 33/70
8/8 [==============================] - 0s 228us/step - loss: 0.0795
Epoch 34/70
8/8 [==============================] - 0s 259us/step - loss: 0.0731
Epoch 35/70
8/8 [==============================] - 0s 230us/step - loss: 0.0669
Epoch 36/70
8/8 [==============================] - 0s 238us/step - loss: 0.0608
Epoch 37/70
8/8 [==============================] - 0s 238us/step - loss: 0.0549
Epoch 38/70
8/8 [==============================] - 0s 241us/step - loss: 0.0494
Epoch 39/70
8/8 [==============================] - 0s 239us/step - loss: 0.0444
Epoch 40/70
8/8 [==============================] - 0s 222us/step - loss: 0.0401
Epoch 41/70
8/8 [==============================] - 0s 228us/step - loss: 0.0367
Epoch 42/70
8/8 [==============================] - 0s 232us/step - loss: 0.0341
Epoch 43/70
8/8 [==============================] - 0s 308us/step - loss: 0.0324
Epoch 44/70
8/8 [==============================] - 0s 279us/step - loss: 0.0315
Epoch 45/70
8/8 [==============================] - 0s 295us/step - loss: 0.0311
Epoch 46/70
8/8 [==============================] - 0s 411us/step - loss: 0.0312
Epoch 47/70
8/8 [==============================] - 0s 332us/step - loss: 0.0315
Epoch 48/70
8/8 [==============================] - 0s 292us/step - loss: 0.0319
Epoch 49/70
8/8 [==============================] - 0s 388us/step - loss: 0.0323
Epoch 50/70
8/8 [==============================] - 0s 647us/step - loss: 0.0326
Epoch 51/70
8/8 [==============================] - 0s 549us/step - loss: 0.0327
Epoch 52/70
8/8 [==============================] - 0s 331us/step - loss: 0.0327
Epoch 53/70
8/8 [==============================] - 0s 278us/step - loss: 0.0324
Epoch 54/70
8/8 [==============================] - 0s 398us/step - loss: 0.0319
Epoch 55/70
8/8 [==============================] - 0s 319us/step - loss: 0.0313
Epoch 56/70
8/8 [==============================] - 0s 382us/step - loss: 0.0306
Epoch 57/70
8/8 [==============================] - 0s 766us/step - loss: 0.0298
Epoch 58/70
8/8 [==============================] - 0s 429us/step - loss: 0.0290
Epoch 59/70
8/8 [==============================] - 0s 373us/step - loss: 0.0283
Epoch 60/70
8/8 [==============================] - 0s 422us/step - loss: 0.0275
Epoch 61/70
8/8 [==============================] - 0s 331us/step - loss: 0.0269
Epoch 62/70
8/8 [==============================] - 0s 317us/step - loss: 0.0263
Epoch 63/70
8/8 [==============================] - 0s 292us/step - loss: 0.0258
Epoch 64/70
8/8 [==============================] - 0s 279us/step - loss: 0.0254
Epoch 65/70
8/8 [==============================] - 0s 373us/step - loss: 0.0251
Epoch 66/70
8/8 [==============================] - 0s 320us/step - loss: 0.0248
Epoch 67/70
8/8 [==============================] - 0s 740us/step - loss: 0.0246
Epoch 68/70
8/8 [==============================] - 0s 506us/step - loss: 0.0244
Epoch 69/70
8/8 [==============================] - 0s 329us/step - loss: 0.0242
Epoch 70/70
8/8 [==============================] - 0s 277us/step - loss: 0.0241

In [10]:
plot_8(X_embed_se, list(range(1, 9)))
plt.title('SimEc embedding');
if savefigs: plt.savefig('fig_nonmetric_toy_simec.pdf', dpi=300)



In [11]:
# create the embedding by perserving multiple individual similarities at once
S = np.stack([center_K(1.-D1/D1.max()), center_K(1.-D2/D2.max())], axis=2)
simec = SimilarityEncoder(X.shape[1], 2, (S.shape[1], 2), opt=keras.optimizers.Adamax(lr=0.05))
simec.fit(X, S, epochs=70)
X_embed_se = simec.transform(X)


Epoch 1/70
8/8 [==============================] - 0s 9ms/step - loss: 0.1349
Epoch 2/70
8/8 [==============================] - 0s 288us/step - loss: 0.1263
Epoch 3/70
8/8 [==============================] - 0s 252us/step - loss: 0.1180
Epoch 4/70
8/8 [==============================] - 0s 283us/step - loss: 0.1110
Epoch 5/70
8/8 [==============================] - 0s 392us/step - loss: 0.1051
Epoch 6/70
8/8 [==============================] - 0s 564us/step - loss: 0.0999
Epoch 7/70
8/8 [==============================] - 0s 411us/step - loss: 0.0952
Epoch 8/70
8/8 [==============================] - 0s 454us/step - loss: 0.0908
Epoch 9/70
8/8 [==============================] - 0s 336us/step - loss: 0.0866
Epoch 10/70
8/8 [==============================] - 0s 447us/step - loss: 0.0832
Epoch 11/70
8/8 [==============================] - 0s 510us/step - loss: 0.0802
Epoch 12/70
8/8 [==============================] - 0s 552us/step - loss: 0.0776
Epoch 13/70
8/8 [==============================] - 0s 534us/step - loss: 0.0754
Epoch 14/70
8/8 [==============================] - 0s 291us/step - loss: 0.0734
Epoch 15/70
8/8 [==============================] - 0s 293us/step - loss: 0.0716
Epoch 16/70
8/8 [==============================] - 0s 299us/step - loss: 0.0699
Epoch 17/70
8/8 [==============================] - 0s 486us/step - loss: 0.0684
Epoch 18/70
8/8 [==============================] - 0s 438us/step - loss: 0.0668
Epoch 19/70
8/8 [==============================] - 0s 319us/step - loss: 0.0652
Epoch 20/70
8/8 [==============================] - 0s 374us/step - loss: 0.0635
Epoch 21/70
8/8 [==============================] - 0s 295us/step - loss: 0.0618
Epoch 22/70
8/8 [==============================] - 0s 389us/step - loss: 0.0600
Epoch 23/70
8/8 [==============================] - 0s 321us/step - loss: 0.0583
Epoch 24/70
8/8 [==============================] - 0s 636us/step - loss: 0.0565
Epoch 25/70
8/8 [==============================] - 0s 415us/step - loss: 0.0546
Epoch 26/70
8/8 [==============================] - 0s 290us/step - loss: 0.0527
Epoch 27/70
8/8 [==============================] - 0s 274us/step - loss: 0.0507
Epoch 28/70
8/8 [==============================] - 0s 281us/step - loss: 0.0486
Epoch 29/70
8/8 [==============================] - 0s 298us/step - loss: 0.0463
Epoch 30/70
8/8 [==============================] - 0s 317us/step - loss: 0.0438
Epoch 31/70
8/8 [==============================] - 0s 499us/step - loss: 0.0411
Epoch 32/70
8/8 [==============================] - 0s 470us/step - loss: 0.0382
Epoch 33/70
8/8 [==============================] - 0s 407us/step - loss: 0.0353
Epoch 34/70
8/8 [==============================] - 0s 353us/step - loss: 0.0322
Epoch 35/70
8/8 [==============================] - 0s 284us/step - loss: 0.0292
Epoch 36/70
8/8 [==============================] - 0s 390us/step - loss: 0.0264
Epoch 37/70
8/8 [==============================] - 0s 493us/step - loss: 0.0237
Epoch 38/70
8/8 [==============================] - 0s 418us/step - loss: 0.0213
Epoch 39/70
8/8 [==============================] - 0s 347us/step - loss: 0.0192
Epoch 40/70
8/8 [==============================] - 0s 329us/step - loss: 0.0174
Epoch 41/70
8/8 [==============================] - 0s 302us/step - loss: 0.0159
Epoch 42/70
8/8 [==============================] - 0s 386us/step - loss: 0.0147
Epoch 43/70
8/8 [==============================] - 0s 389us/step - loss: 0.0137
Epoch 44/70
8/8 [==============================] - 0s 445us/step - loss: 0.0129
Epoch 45/70
8/8 [==============================] - 0s 358us/step - loss: 0.0122
Epoch 46/70
8/8 [==============================] - 0s 434us/step - loss: 0.0118
Epoch 47/70
8/8 [==============================] - 0s 501us/step - loss: 0.0115
Epoch 48/70
8/8 [==============================] - 0s 269us/step - loss: 0.0113
Epoch 49/70
8/8 [==============================] - 0s 258us/step - loss: 0.0112
Epoch 50/70
8/8 [==============================] - 0s 475us/step - loss: 0.0112
Epoch 51/70
8/8 [==============================] - 0s 317us/step - loss: 0.0113
Epoch 52/70
8/8 [==============================] - 0s 236us/step - loss: 0.0114
Epoch 53/70
8/8 [==============================] - 0s 275us/step - loss: 0.0115
Epoch 54/70
8/8 [==============================] - 0s 265us/step - loss: 0.0116
Epoch 55/70
8/8 [==============================] - 0s 469us/step - loss: 0.0116
Epoch 56/70
8/8 [==============================] - 0s 399us/step - loss: 0.0116
Epoch 57/70
8/8 [==============================] - 0s 433us/step - loss: 0.0115
Epoch 58/70
8/8 [==============================] - 0s 333us/step - loss: 0.0114
Epoch 59/70
8/8 [==============================] - 0s 466us/step - loss: 0.0113
Epoch 60/70
8/8 [==============================] - 0s 489us/step - loss: 0.0111
Epoch 61/70
8/8 [==============================] - 0s 450us/step - loss: 0.0109
Epoch 62/70
8/8 [==============================] - 0s 474us/step - loss: 0.0108
Epoch 63/70
8/8 [==============================] - 0s 367us/step - loss: 0.0106
Epoch 64/70
8/8 [==============================] - 0s 357us/step - loss: 0.0104
Epoch 65/70
8/8 [==============================] - 0s 283us/step - loss: 0.0103
Epoch 66/70
8/8 [==============================] - 0s 283us/step - loss: 0.0102
Epoch 67/70
8/8 [==============================] - 0s 401us/step - loss: 0.0101
Epoch 68/70
8/8 [==============================] - 0s 409us/step - loss: 0.0101
Epoch 69/70
8/8 [==============================] - 0s 371us/step - loss: 0.0100
Epoch 70/70
8/8 [==============================] - 0s 423us/step - loss: 0.0100

In [12]:
plot_8(X_embed_se, list(range(1, 9)))
plt.title('SimEc embedding (2 sim.mat.)');


Simpson Similarity of 0 and 7 Handwritten Digits


In [13]:
# load digits
mnist = fetch_mldata('MNIST original', data_home='data')
X_org = mnist.data/255.  # normalize to 0-1
y = np.array(mnist.target, dtype=int)
# only use 0 and 7
X_org = X_org[(y==0)|(y==7),:]
y = y[(y==0)|(y==7)]
X = np.array(X_org>=0.5, dtype=int)  # binarize
# randomly subsample 5000 and split in train/test
np.random.seed(42)
n_samples = 5000
n_test = 1000
rnd_idx = np.random.permutation(X.shape[0])[:n_samples]
X_test, X_test_org, y_test = X[rnd_idx[:n_test],:], X_org[rnd_idx[:n_test],:], y[rnd_idx[:n_test]]
X, X_org, y = X[rnd_idx[n_test:],:], X_org[rnd_idx[n_test:],:], y[rnd_idx[n_test:]]
n_train, n_features = X.shape

In [14]:
# compute simpson similarity
# s_ij = sum(a==1 & b==1)/min(sum(a==1),sum(b==1))
sum_one = np.tile(np.sum(X, axis=1),(X.shape[0],1))
S = np.dot(X, X.T)/np.minimum(sum_one, sum_one.T, dtype=float)
# center
S = center_K(S)
# same for test data
sum_one = np.tile(np.sum(X_test, axis=1),(X_test.shape[0],1))
S_test = np.dot(X_test, X_test.T)/np.minimum(sum_one, sum_one.T, dtype=float)
# center
S_test = center_K(S_test)

In [15]:
# check out the eigenvalue spectrum - we've got some significant negative eigenvalues!!
eigenvals = np.linalg.eigvalsh(S)[::-1]
print(eigenvals[:10])
print(eigenvals[-10:])
plt.figure();
plt.plot(list(range(1, S.shape[0]+1)), eigenvals, '-o', markersize=3);
plt.plot([1,S.shape[0]],[0,0], 'k--', linewidth=0.5);
plt.xlim(-25, S.shape[0]+15);
plt.title('Eigenvalue Spectrum of S');
if savefigs: plt.savefig('fig_nonmetric_mnist07_evspec.pdf', dpi=300)


[ 424.91268886  189.27935752  172.4782423   122.81287236   98.01329024
   89.98108807   75.1760956    62.296041     51.02963291   48.59421388]
[  -7.48987197   -7.97829788  -10.34106178  -11.54449891  -12.60934772
  -12.93988029  -17.54817457  -27.90307842  -59.33843639 -104.36129652]

Embedding based on EV


In [16]:
# compute embedding based on eigenvalues and -vectors
D, V = np.linalg.eig(S)
# regular kpca embedding: take largest EV
D1, V1 = D[np.argsort(D)[::-1]], V[:,np.argsort(D)[::-1]]
X_embed_largest = np.dot(V1, np.diag(np.sqrt(np.abs(D1))))
# feature discovery: based on absolute value of EV, i.e. also take most negative
D2, V2 = D[np.argsort(np.abs(D))[::-1]], V[:,np.argsort(np.abs(D))[::-1]]
X_embed_abs = np.dot(V2, np.diag(np.sqrt(np.abs(D2))))
# to approximate S, dimensions belonging to negative EV need to be imaginary
X_embed_abs_imag = np.array(np.dot(V2, np.diag(np.sqrt(np.abs(D2)))), dtype=complex)
X_embed_abs_imag[:, D2 < 0] *= 1j

In [17]:
# plot largest
plot_mnist2(X_embed_largest[:,:2], y, X_original=X, title='Embedding with largest components')
plt.xlabel('1st component (class)', fontsize=18);
plt.ylabel('2nd component', fontsize=18);
fig = plt.gcf()
fig.set_size_inches(15.5, 8.5)
if savefigs: plt.savefig('fig_nonmetric_mnist07_largest.png', dpi=300)



In [18]:
# plot smallest
plot_mnist2(X_embed_largest[:,-2:], y, X_original=X, title='Embedding with most negative components')
plt.xlabel('2nd last component', fontsize=18);
plt.ylabel('last component (stroke weight)', fontsize=18);
fig = plt.gcf()
fig.set_size_inches(15.5, 8.5)
if savefigs: plt.savefig('fig_nonmetric_mnist07_smallest.png', dpi=300)



In [19]:
# plot most extreme
plot_mnist2(X_embed_largest[:,[0,-1]], y, X_original=X, title='Embedding with most extreme components')
plt.xlabel('1st component (class)', fontsize=18);
plt.ylabel('last component (stroke weight)', fontsize=18);
fig = plt.gcf()
fig.set_size_inches(15.5, 8.5)



In [20]:
# inspect similarity matrix: it's a combination of two simmats 
# S1 based on positive EV
S1 = np.dot(X_embed_largest[:, D1>=0], X_embed_largest[:, D1>=0].T)
# and S2 based on negative EV
S2 = np.dot(X_embed_largest[:, D1<0], X_embed_largest[:, D1<0].T)
S1pS2 = S1 + S2
print("S = S1-S2: %r " % np.allclose(S, S1-S2))
# with the embeddings based on EV without imaginary parts, we approximate S1+S2
print("X_embed_largest*X_embed_largest^T = S1+S2 : %r" % np.allclose(S1pS2, np.dot(X_embed_largest, X_embed_largest.T)))
print("X_embed_abs*X_embed_abs^T = S1+S2 : %r" % np.allclose(S1pS2, np.dot(X_embed_abs, X_embed_abs.T)))
# while with the embeddings where the negative EV are represented as imaginary numbers.
# we can approximate the real S (i.e. S1-S2)
print("X_embed_abs_imag*X_embed_abs_imag^T = S1-S2 : %r" % np.allclose(S1-S2, np.dot(X_embed_abs_imag, X_embed_abs_imag.T)))


S = S1-S2: True 
X_embed_largest*X_embed_largest^T = S1+S2 : True
X_embed_abs*X_embed_abs^T = S1+S2 : True
X_embed_abs_imag*X_embed_abs_imag^T = S1-S2 : True

In [21]:
# let's see how well S1, S1+S2, and S1-S2 are approximated
# depending on the number of embedding dimensions
colors = get_colors(10)
mseS_largest, mseS_abs, mseS_abs_imag = [], [], []
mseS1_largest, mseS1_abs, mseS1_abs_imag = [], [], []
mseS2_largest, mseS2_abs, mseS2_abs_imag = [], [], []
mseS1pS2_largest, mseS1pS2_abs, mseS1pS2_abs_imag = [], [], []
e_dims = [2, 10, 50, 100, 200, 1000, 2000, 3000, 3800, 3900, 3950, 3990, S.shape[0]]
for e_dim in e_dims:
    print(e_dim, end=' ')
    # largest components
    S_approx = np.dot(X_embed_largest[:,:e_dim], X_embed_largest[:,:e_dim].T)
    mseS_largest.append(check_similarity_match(S_approx, S, True)[0])
    mseS1_largest.append(check_similarity_match(S_approx, S1, True)[0])
    mseS2_largest.append(check_similarity_match(S_approx, S2, True)[0])
    mseS1pS2_largest.append(check_similarity_match(S_approx, S1pS2, True)[0])
    print(".", end=' ')
    # largest components based on absolute ev
    S_approx = np.dot(X_embed_abs[:,:e_dim], X_embed_abs[:,:e_dim].T)
    mseS_abs.append(check_similarity_match(S_approx, S, True)[0])
    mseS1_abs.append(check_similarity_match(S_approx, S1, True)[0])
    mseS2_abs.append(check_similarity_match(S_approx, S2, True)[0])
    mseS1pS2_abs.append(check_similarity_match(S_approx, S1pS2, True)[0])
    print(".", end=' ')
    # largest components based on absolute ev with negative ev as imag
    S_approx = np.dot(X_embed_abs_imag[:,:e_dim], X_embed_abs_imag[:,:e_dim].T).real
    mseS_abs_imag.append(check_similarity_match(S_approx, S, True)[0])
    mseS1_abs_imag.append(check_similarity_match(S_approx, S1, True)[0])
    mseS2_abs_imag.append(check_similarity_match(S_approx, S2, True)[0])
    mseS1pS2_abs_imag.append(check_similarity_match(S_approx, S1pS2, True)[0])
plt.figure();
plt.plot(e_dims, mseS_largest, '-o', markersize=3, c=colors[0], alpha=0.8, label='largest EV');
plt.plot(e_dims, mseS_abs, '-o', markersize=3, c=colors[4], alpha=0.8, label='abs largest EV');
plt.plot(e_dims, mseS_abs_imag, '-o', markersize=3, c=colors[7], alpha=0.8, label='abs largest EV w/imag');
plt.legend(loc=0);
plt.title('MSE approximating $S = S1 - S2$');
plt.plot([0, e_dims[-1]], [0,0], 'k--', linewidth=0.5);
plt.xticks([2, 1000, 2000, 3000, 4000], [2, 1000, 2000, 3000, 4000]);
plt.xlabel('Number of Embedding Dimensions');
plt.ylabel(r'$\frac{1}{N^2} \sum ( S-\hat{S} )^2$');
plt.figure();
plt.plot(e_dims, mseS1_largest, '-o', markersize=3, c=colors[0], alpha=0.8, label='largest EV');
plt.plot(e_dims, mseS1_abs, '-o', markersize=3, c=colors[4], alpha=0.8, label='abs largest EV');
plt.plot(e_dims, mseS1_abs_imag, '-o', markersize=3, c=colors[7], alpha=0.5, label='abs largest EV w/imag');
plt.legend(loc=0);
plt.title('MSE approximating $S1$ (based on positive EV)');
plt.plot([0, e_dims[-1]], [0,0], 'k--', linewidth=0.5);
plt.xticks([2, 1000, 2000, 3000, 4000], [2, 1000, 2000, 3000, 4000]);
plt.xlabel('Number of Embedding Dimensions');
plt.ylabel(r'$\frac{1}{N^2} \sum ( S-\hat{S} )^2$');
plt.figure();
plt.plot(e_dims, mseS2_largest, '-o', markersize=3, c=colors[0], alpha=0.8, label='largest EV');
plt.plot(e_dims, mseS2_abs, '-o', markersize=3, c=colors[4], alpha=0.8, label='abs largest EV');
plt.plot(e_dims, mseS2_abs_imag, '-o', markersize=3, c=colors[7], alpha=0.8, label='abs largest EV w/imag');
plt.legend(loc=0);
plt.title('MSE approximating $S2$ (based on negative EV)');
plt.plot([0, e_dims[-1]], [0,0], 'k--', linewidth=0.5);
plt.xticks([2, 1000, 2000, 3000, 4000], [2, 1000, 2000, 3000, 4000]);
plt.xlabel('Number of Embedding Dimensions');
plt.ylabel(r'$\frac{1}{N^2} \sum ( S-\hat{S} )^2$');
plt.figure();
plt.plot(e_dims, mseS1pS2_largest, '-o', markersize=3, c=colors[0], alpha=0.8, label='largest EV');
plt.plot(e_dims, mseS1pS2_abs, '-o', markersize=3, c=colors[4], alpha=0.8, label='abs largest EV');
plt.plot(e_dims, mseS1pS2_abs_imag, '-o', markersize=3, c=colors[7], alpha=0.8, label='abs largest EV w/imag');
plt.legend(loc=0);
plt.title('MSE approximating $S1 + S2$');
plt.plot([0, e_dims[-1]], [0,0], 'k--', linewidth=0.5);
plt.xticks([2, 1000, 2000, 3000, 4000], [2, 1000, 2000, 3000, 4000]);
plt.xlabel('Number of Embedding Dimensions');
plt.ylabel(r'$\frac{1}{N^2} \sum ( S-\hat{S} )^2$');


2 . . 10 . . 50 . . 100 . . 200 . . 1000 . . 2000 . . 3000 . . 3800 . . 3900 . . 3950 . . 3990 . . 4000 . .

Embedding with Ridge Regression


In [22]:
# adapt the input data
ss = StandardScaler(with_std=False)
X_tf = ss.fit_transform(X_org)
X_test_tf = ss.transform(X_test_org)

In [23]:
# ridge reg
model = Ridge()
model.fit(X_tf, X_embed_abs[:,:10])
X_embed_r = model.predict(X_tf)
X_embed_test_r = model.predict(X_test_tf)
mse, rsq, _ = check_similarity_match(X_embed_r, S1pS2)
print("mse ridgereg: %f" % mse)
print("with kPCA: mse: %f; r^2: %f; rho: %f" % check_embed_match(X_embed_largest[:,:10], X_embed_r))
print("with CS  : mse: %f; r^2: %f; rho: %f" % check_embed_match(X_embed_abs[:,:10], X_embed_r))
plot_mnist2(X_embed_r[:,[0,4]], y, X_embed_test_r[:,[0,4]], y_test, X_original=X, title='Embedding with RidgeReg')
plt.xlabel('1st component', fontsize=18);
plt.ylabel('5th component', fontsize=18);
fig = plt.gcf();
fig.set_size_inches(15.5, 8.5);


mse ridgereg: 0.002598
with kPCA: mse: 0.004777; r^2: 0.835106; rho: 0.897065
with CS  : mse: 0.002591; r^2: 0.879322; rho: 0.919249

Embedding with SimEc


In [24]:
# create embedding with simec
n_targets = 1000
simec = SimilarityEncoder(X_tf.shape[1], 10, n_targets, hidden_layers=[(200, 'tanh')],
                          l2_reg=0.0000001, l2_reg_emb=0.0001, l2_reg_out=0.0000001, 
                          s_ll_reg=0.5, S_ll=S1pS2[:n_targets,:n_targets],
                          opt=keras.optimizers.Adamax(lr=0.0008))
simec.fit(X_tf, S[:, :n_targets])
X_embed = simec.transform(X_tf)
X_embed_test = simec.transform(X_test_tf)
mse, rsq, _ = check_similarity_match(X_embed, S1pS2)
print("mse simec: %f" % mse)
print("with kPCA: mse: %f; r^2: %f; rho: %f" % check_embed_match(X_embed_largest[:,:10], X_embed))
print("with CS  : mse: %f; r^2: %f; rho: %f" % check_embed_match(X_embed_abs[:,:10], X_embed))


Epoch 1/25
4000/4000 [==============================] - 1s 151us/step - loss: 0.0249
Epoch 2/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0178
Epoch 3/25
4000/4000 [==============================] - 0s 110us/step - loss: 0.0155
Epoch 4/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0142
Epoch 5/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0126
Epoch 6/25
4000/4000 [==============================] - 0s 108us/step - loss: 0.0109
Epoch 7/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0091
Epoch 8/25
4000/4000 [==============================] - 0s 103us/step - loss: 0.0075
Epoch 9/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0064
Epoch 10/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0058
Epoch 11/25
4000/4000 [==============================] - 0s 110us/step - loss: 0.0051
Epoch 12/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0046
Epoch 13/25
4000/4000 [==============================] - 0s 100us/step - loss: 0.0041
Epoch 14/25
4000/4000 [==============================] - 0s 104us/step - loss: 0.0037
Epoch 15/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0035
Epoch 16/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0033
Epoch 17/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0032
Epoch 18/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0031
Epoch 19/25
4000/4000 [==============================] - 0s 108us/step - loss: 0.0031
Epoch 20/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0030
Epoch 21/25
4000/4000 [==============================] - 0s 105us/step - loss: 0.0030
Epoch 22/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0030
Epoch 23/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0029
Epoch 24/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0029
Epoch 25/25
4000/4000 [==============================] - 0s 104us/step - loss: 0.0029
mse simec: 0.002092
with kPCA: mse: 0.002552; r^2: 0.916803; rho: 0.953978
with CS  : mse: 0.001910; r^2: 0.932115; rho: 0.959401

In [25]:
plot_mnist2(X_embed[:,:2], y, X_embed_test[:,:2], y_test, X_original=X, title='Embedding with SimEc')
plt.xlabel('1st component', fontsize=18);
plt.ylabel('2nd component', fontsize=18);
fig = plt.gcf();
fig.set_size_inches(15.5, 8.5);



In [26]:
# we need to decorrelate the simec embedding to find what we're looking for
pca = PCA(n_components=10)
X_embed_pca = pca.fit_transform(X_embed)
X_embed_test_pca = pca.transform(X_embed_test)

In [27]:
plot_mnist2(X_embed_pca[:,[0,5]], y, X_embed_test_pca[:,[0,5]], y_test, X_original=X, title='Embedding with SimEc (after PCA)')
plt.xlabel('1st component', fontsize=18);
plt.ylabel('6th component', fontsize=18);
fig = plt.gcf();
fig.set_size_inches(15.5, 8.5);
if savefigs: plt.savefig('fig_nonmetric_mnist07_simec_pca.png', dpi=300)



In [28]:
# with stacked sim mats, we can get the two components directly
# S1: positive EV (--> class difference), S2: negative EV (--> stroke thickness)
# it's very important that the individual sim mats were normalized by their largest EV!
S_stacked = np.stack([S1/np.linalg.norm(S1, ord=2), S2/np.linalg.norm(S2, ord=2)], axis=2)
# and then the whole matrix has to be normalized by the max, otherwise weights will just all go to 0!!
S_stacked /= S_stacked.max()
simec = SimilarityEncoder(X_tf.shape[1], 2, (n_targets, 2), hidden_layers=[(200, 'tanh')],
                          l2_reg=0.0000001, l2_reg_emb=0.0001, l2_reg_out=0.00001, 
                          s_ll_reg=1., S_ll=S_stacked[:n_targets, :n_targets, :])
simec.fit(X_tf, S_stacked[:, :n_targets, :])
X_embed = simec.transform(X_tf)
X_embed_test = simec.transform(X_test_tf)


Epoch 1/25
4000/4000 [==============================] - 1s 191us/step - loss: 0.0045
Epoch 2/25
4000/4000 [==============================] - 1s 128us/step - loss: 0.0040
Epoch 3/25
4000/4000 [==============================] - 1s 135us/step - loss: 0.0035
Epoch 4/25
4000/4000 [==============================] - 0s 124us/step - loss: 0.0031
Epoch 5/25
4000/4000 [==============================] - 1s 125us/step - loss: 0.0027
Epoch 6/25
4000/4000 [==============================] - 1s 128us/step - loss: 0.0025
Epoch 7/25
4000/4000 [==============================] - 0s 123us/step - loss: 0.0023
Epoch 8/25
4000/4000 [==============================] - 0s 123us/step - loss: 0.0022
Epoch 9/25
4000/4000 [==============================] - 1s 130us/step - loss: 0.0022
Epoch 10/25
4000/4000 [==============================] - 1s 134us/step - loss: 0.0022
Epoch 11/25
4000/4000 [==============================] - 1s 128us/step - loss: 0.0022
Epoch 12/25
4000/4000 [==============================] - 1s 127us/step - loss: 0.0022
Epoch 13/25
4000/4000 [==============================] - 1s 132us/step - loss: 0.0022
Epoch 14/25
4000/4000 [==============================] - 1s 128us/step - loss: 0.0022
Epoch 15/25
4000/4000 [==============================] - 1s 133us/step - loss: 0.0021
Epoch 16/25
4000/4000 [==============================] - 1s 134us/step - loss: 0.0021
Epoch 17/25
4000/4000 [==============================] - 1s 134us/step - loss: 0.0021
Epoch 18/25
4000/4000 [==============================] - 1s 134us/step - loss: 0.0021
Epoch 19/25
4000/4000 [==============================] - 1s 140us/step - loss: 0.0021
Epoch 20/25
4000/4000 [==============================] - 1s 134us/step - loss: 0.0021
Epoch 21/25
4000/4000 [==============================] - 1s 136us/step - loss: 0.0021
Epoch 22/25
4000/4000 [==============================] - 1s 132us/step - loss: 0.0021
Epoch 23/25
4000/4000 [==============================] - 0s 122us/step - loss: 0.0021
Epoch 24/25
4000/4000 [==============================] - 1s 127us/step - loss: 0.0021
Epoch 25/25
4000/4000 [==============================] - 1s 132us/step - loss: 0.0021

In [29]:
plot_mnist2(X_embed, y, X_embed_test, y_test, X_original=X, title='Embedding with SimEc (stacked sim mats)')
plt.xlabel('1st component', fontsize=18);
plt.ylabel('2nd component', fontsize=18);
fig = plt.gcf();
fig.set_size_inches(15.5, 8.5);



In [30]:
# the most important thing when trying to embed multiple similarities is that the individual similarity
# matrices were normalized by their largest eigenvalue. then we can also add them to get the same results
S_sum = np.sum(S_stacked, axis=2)
# but again it is important that the values are somewhat in a reasonable range
S_sum /= S_sum.max() 
simec = SimilarityEncoder(X_tf.shape[1], 2, n_targets, hidden_layers=[(200, 'tanh')],
                          l2_reg=0.0000001, l2_reg_emb=0.0001, l2_reg_out=0.00001, 
                          s_ll_reg=1., S_ll=S_sum[:n_targets, :n_targets])
simec.fit(X_tf, S_sum[:, :n_targets])
X_embed = simec.transform(X_tf)
X_embed_test = simec.transform(X_test_tf)


Epoch 1/25
4000/4000 [==============================] - 1s 165us/step - loss: 0.0033
Epoch 2/25
4000/4000 [==============================] - 0s 100us/step - loss: 0.0030
Epoch 3/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0025
Epoch 4/25
4000/4000 [==============================] - 0s 98us/step - loss: 0.0022
Epoch 5/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0021
Epoch 6/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0020
Epoch 7/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0019
Epoch 8/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0018
Epoch 9/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0017
Epoch 10/25
4000/4000 [==============================] - 0s 110us/step - loss: 0.0016
Epoch 11/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0016
Epoch 12/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0015
Epoch 13/25
4000/4000 [==============================] - 0s 104us/step - loss: 0.0015
Epoch 14/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0015
Epoch 15/25
4000/4000 [==============================] - 0s 105us/step - loss: 0.0015
Epoch 16/25
4000/4000 [==============================] - 0s 102us/step - loss: 0.0015
Epoch 17/25
4000/4000 [==============================] - 0s 108us/step - loss: 0.0015
Epoch 18/25
4000/4000 [==============================] - 0s 104us/step - loss: 0.0015
Epoch 19/25
4000/4000 [==============================] - 0s 100us/step - loss: 0.0015
Epoch 20/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0015
Epoch 21/25
4000/4000 [==============================] - 0s 110us/step - loss: 0.0015
Epoch 22/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0015
Epoch 23/25
4000/4000 [==============================] - 0s 105us/step - loss: 0.0015
Epoch 24/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0015
Epoch 25/25
4000/4000 [==============================] - 0s 104us/step - loss: 0.0015

In [31]:
plot_mnist2(X_embed, y, X_embed_test, y_test, X_original=X, title='Embedding with SimEc (added, normalized sim mats)')
plt.xlabel('1st component', fontsize=18);
plt.ylabel('2nd component', fontsize=18);
fig = plt.gcf();
fig.set_size_inches(15.5, 8.5);



In [32]:
mse_kpca, mse_cs, mse_simec = [], [], []
rsq_kpca, rsq_cs, rsq_simec = [], [], []
e_dims = [2, 4, 6, 10, 15, 25, 50, 100]
for e_dim in e_dims:
    print(e_dim)
    # kpca: take only largest components
    mse_k, rsq, _ = check_similarity_match(X_embed_largest[:,:e_dim], S)
    mse_kpca.append(mse_k)
    rsq_kpca.append(rsq)
    # classical scaling: take only largest components based on absolute ev
    mse_c, rsq, _ = check_similarity_match(X_embed_abs_imag[:,:e_dim], S)
    mse_cs.append(mse_c)
    rsq_cs.append(rsq)
    # simec
    simec = SimilarityEncoder(X_tf.shape[1], e_dim, S.shape[1], hidden_layers=[(200, 'tanh')],
                              l2_reg=0.0000001, l2_reg_emb=0.0001, l2_reg_out=0.0000001,
                              opt=keras.optimizers.Adamax(lr=0.0008))
    simec.fit(X_tf, S)
    mse, rsq, _ = check_similarity_match(simec.predict(X_tf), S, X_embed_is_S_approx=True)
    mse_simec.append(mse)
    rsq_simec.append(rsq)
    print("mse kpca: %f , mse cs: %f; mse simec: %f" % (mse_k, mse_c, mse))
keras.backend.clear_session()
colors = get_colors(15)
plt.figure();
plt.plot(e_dims, mse_kpca, '-o', markersize=3, c=colors[0], label='kPCA');
plt.plot(e_dims, mse_cs, '-o', markersize=3, c=colors[2], label='CS');
plt.plot(e_dims, mse_simec, '-o', markersize=3, c=colors[8], label='SimEc');
plt.legend(loc=0);
plt.title('MNIST 0/7 Non-Metric');
plt.plot([0, e_dims[-1]], [0,0], 'k--', linewidth=0.5);
plt.xticks(e_dims, e_dims);
plt.xlabel('Number of Embedding Dimensions ($d$)');
plt.ylabel('Mean Squared Error of $\hat{S}$');
plt.figure();
plt.plot(e_dims, rsq_kpca, '-o', markersize=3, c=colors[0], label='kPCA');
plt.plot(e_dims, rsq_cs, '-o', markersize=3, c=colors[2], label='CS');
plt.plot(e_dims, rsq_simec, '-o', markersize=3, c=colors[8], label='SimEc');
plt.legend(loc=0);
plt.title('$R^2$');
plt.plot([0, e_dims[-1]], [1,1], 'k--', linewidth=0.5);
plt.xticks(e_dims, e_dims);
plt.xlabel('Number of Embedding Dimensions ($d$)');


2
Epoch 1/25
4000/4000 [==============================] - 1s 142us/step - loss: 0.0136
Epoch 2/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0090
Epoch 3/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0077
Epoch 4/25
4000/4000 [==============================] - 0s 103us/step - loss: 0.0075
Epoch 5/25
4000/4000 [==============================] - 0s 105us/step - loss: 0.0074
Epoch 6/25
4000/4000 [==============================] - 0s 101us/step - loss: 0.0073
Epoch 7/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0073
Epoch 8/25
4000/4000 [==============================] - 0s 114us/step - loss: 0.0073
Epoch 9/25
4000/4000 [==============================] - 0s 114us/step - loss: 0.0073
Epoch 10/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0072
Epoch 11/25
4000/4000 [==============================] - 0s 119us/step - loss: 0.0072
Epoch 12/25
4000/4000 [==============================] - 0s 108us/step - loss: 0.0072
Epoch 13/25
4000/4000 [==============================] - 0s 105us/step - loss: 0.0072
Epoch 14/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0072
Epoch 15/25
4000/4000 [==============================] - 0s 110us/step - loss: 0.0072
Epoch 16/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0072
Epoch 17/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0072
Epoch 18/25
4000/4000 [==============================] - 0s 117us/step - loss: 0.0072
Epoch 19/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0072
Epoch 20/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0072
Epoch 21/25
4000/4000 [==============================] - 0s 104us/step - loss: 0.0072
Epoch 22/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0072
Epoch 23/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0072
Epoch 24/25
4000/4000 [==============================] - 0s 114us/step - loss: 0.0071
Epoch 25/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0071
mse kpca: 0.007011 , mse cs: 0.007011; mse simec: 0.007077
4
Epoch 1/25
4000/4000 [==============================] - 1s 168us/step - loss: 0.0126
Epoch 2/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0067
Epoch 3/25
4000/4000 [==============================] - 0s 110us/step - loss: 0.0053
Epoch 4/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0048
Epoch 5/25
4000/4000 [==============================] - 0s 110us/step - loss: 0.0047
Epoch 6/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0046
Epoch 7/25
4000/4000 [==============================] - 0s 116us/step - loss: 0.0046
Epoch 8/25
4000/4000 [==============================] - 0s 115us/step - loss: 0.0046
Epoch 9/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0046
Epoch 10/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0045
Epoch 11/25
4000/4000 [==============================] - 0s 114us/step - loss: 0.0045
Epoch 12/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0045
Epoch 13/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0045
Epoch 14/25
4000/4000 [==============================] - 0s 105us/step - loss: 0.0045
Epoch 15/25
4000/4000 [==============================] - 0s 105us/step - loss: 0.0045
Epoch 16/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0044
Epoch 17/25
4000/4000 [==============================] - 0s 105us/step - loss: 0.0044
Epoch 18/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0044
Epoch 19/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0044
Epoch 20/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0044
Epoch 21/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0044
Epoch 22/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0044
Epoch 23/25
4000/4000 [==============================] - 0s 108us/step - loss: 0.0044
Epoch 24/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0044
Epoch 25/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0044
mse kpca: 0.004209 , mse cs: 0.004209; mse simec: 0.004306
6
Epoch 1/25
4000/4000 [==============================] - 1s 165us/step - loss: 0.0123
Epoch 2/25
4000/4000 [==============================] - 0s 115us/step - loss: 0.0064
Epoch 3/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0049
Epoch 4/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0043
Epoch 5/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0039
Epoch 6/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0036
Epoch 7/25
4000/4000 [==============================] - 0s 116us/step - loss: 0.0035
Epoch 8/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0034
Epoch 9/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0034
Epoch 10/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0034
Epoch 11/25
4000/4000 [==============================] - 0s 101us/step - loss: 0.0033
Epoch 12/25
4000/4000 [==============================] - 0s 105us/step - loss: 0.0033
Epoch 13/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0033
Epoch 14/25
4000/4000 [==============================] - 0s 104us/step - loss: 0.0033
Epoch 15/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0032
Epoch 16/25
4000/4000 [==============================] - 0s 104us/step - loss: 0.0032
Epoch 17/25
4000/4000 [==============================] - 0s 104us/step - loss: 0.0032
Epoch 18/25
4000/4000 [==============================] - 0s 103us/step - loss: 0.0032
Epoch 19/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0032
Epoch 20/25
4000/4000 [==============================] - 0s 110us/step - loss: 0.0032
Epoch 21/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0032
Epoch 22/25
4000/4000 [==============================] - 0s 114us/step - loss: 0.0032
Epoch 23/25
4000/4000 [==============================] - 0s 104us/step - loss: 0.0032
Epoch 24/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0032
Epoch 25/25
4000/4000 [==============================] - 0s 100us/step - loss: 0.0032
mse kpca: 0.003102 , mse cs: 0.002928; mse simec: 0.003052
10
Epoch 1/25
4000/4000 [==============================] - 1s 171us/step - loss: 0.0117
Epoch 2/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0057
Epoch 3/25
4000/4000 [==============================] - 0s 110us/step - loss: 0.0043
Epoch 4/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0035
Epoch 5/25
4000/4000 [==============================] - 0s 108us/step - loss: 0.0031
Epoch 6/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0028
Epoch 7/25
4000/4000 [==============================] - 0s 103us/step - loss: 0.0025
Epoch 8/25
4000/4000 [==============================] - 0s 101us/step - loss: 0.0024
Epoch 9/25
4000/4000 [==============================] - 0s 108us/step - loss: 0.0023
Epoch 10/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0022
Epoch 11/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0021
Epoch 12/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0021
Epoch 13/25
4000/4000 [==============================] - 0s 101us/step - loss: 0.0021
Epoch 14/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0021
Epoch 15/25
4000/4000 [==============================] - 0s 102us/step - loss: 0.0020
Epoch 16/25
4000/4000 [==============================] - 0s 104us/step - loss: 0.0020
Epoch 17/25
4000/4000 [==============================] - 0s 105us/step - loss: 0.0020
Epoch 18/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0020
Epoch 19/25
4000/4000 [==============================] - 0s 108us/step - loss: 0.0020
Epoch 20/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0020
Epoch 21/25
4000/4000 [==============================] - 0s 115us/step - loss: 0.0020
Epoch 22/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0020
Epoch 23/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0020
Epoch 24/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0019
Epoch 25/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0019
mse kpca: 0.002196 , mse cs: 0.001606; mse simec: 0.001821
15
Epoch 1/25
4000/4000 [==============================] - 1s 181us/step - loss: 0.0114
Epoch 2/25
4000/4000 [==============================] - 0s 118us/step - loss: 0.0051
Epoch 3/25
4000/4000 [==============================] - 0s 115us/step - loss: 0.0037
Epoch 4/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0031
Epoch 5/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0027
Epoch 6/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0025
Epoch 7/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0023
Epoch 8/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0021
Epoch 9/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0020
Epoch 10/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0019
Epoch 11/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0018
Epoch 12/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0018
Epoch 13/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0017
Epoch 14/25
4000/4000 [==============================] - 0s 117us/step - loss: 0.0017
Epoch 15/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0016
Epoch 16/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0016
Epoch 17/25
4000/4000 [==============================] - 0s 118us/step - loss: 0.0015
Epoch 18/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0015
Epoch 19/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0015
Epoch 20/25
4000/4000 [==============================] - 0s 114us/step - loss: 0.0015
Epoch 21/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0014
Epoch 22/25
4000/4000 [==============================] - 0s 108us/step - loss: 0.0014
Epoch 23/25
4000/4000 [==============================] - 0s 110us/step - loss: 0.0014
Epoch 24/25
4000/4000 [==============================] - 0s 115us/step - loss: 0.0014
Epoch 25/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0014
mse kpca: 0.001723 , mse cs: 0.000964; mse simec: 0.001202
25
Epoch 1/25
4000/4000 [==============================] - 1s 178us/step - loss: 0.0119
Epoch 2/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0055
Epoch 3/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0038
Epoch 4/25
4000/4000 [==============================] - 0s 116us/step - loss: 0.0030
Epoch 5/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0026
Epoch 6/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0023
Epoch 7/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0020
Epoch 8/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0019
Epoch 9/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0018
Epoch 10/25
4000/4000 [==============================] - 0s 107us/step - loss: 0.0017
Epoch 11/25
4000/4000 [==============================] - 0s 116us/step - loss: 0.0016
Epoch 12/25
4000/4000 [==============================] - 0s 116us/step - loss: 0.0016
Epoch 13/25
4000/4000 [==============================] - 0s 114us/step - loss: 0.0015
Epoch 14/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0015
Epoch 15/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0014
Epoch 16/25
4000/4000 [==============================] - 0s 115us/step - loss: 0.0014
Epoch 17/25
4000/4000 [==============================] - 0s 117us/step - loss: 0.0013
Epoch 18/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0012
Epoch 19/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0012
Epoch 20/25
4000/4000 [==============================] - 0s 120us/step - loss: 0.0011
Epoch 21/25
4000/4000 [==============================] - 0s 106us/step - loss: 0.0011
Epoch 22/25
4000/4000 [==============================] - 0s 102us/step - loss: 0.0011
Epoch 23/25
4000/4000 [==============================] - 0s 110us/step - loss: 0.0010
Epoch 24/25
4000/4000 [==============================] - 0s 116us/step - loss: 0.0010
Epoch 25/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0010
mse kpca: 0.001381 , mse cs: 0.000499; mse simec: 0.000818
50
Epoch 1/25
4000/4000 [==============================] - 1s 190us/step - loss: 0.0135
Epoch 2/25
4000/4000 [==============================] - 0s 119us/step - loss: 0.0061
Epoch 3/25
4000/4000 [==============================] - 0s 117us/step - loss: 0.0041
Epoch 4/25
4000/4000 [==============================] - 0s 116us/step - loss: 0.0031
Epoch 5/25
4000/4000 [==============================] - 0s 119us/step - loss: 0.0024
Epoch 6/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0021
Epoch 7/25
4000/4000 [==============================] - 0s 115us/step - loss: 0.0019
Epoch 8/25
4000/4000 [==============================] - 0s 113us/step - loss: 0.0017
Epoch 9/25
4000/4000 [==============================] - 0s 119us/step - loss: 0.0016
Epoch 10/25
4000/4000 [==============================] - 0s 115us/step - loss: 0.0015
Epoch 11/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0015
Epoch 12/25
4000/4000 [==============================] - 0s 105us/step - loss: 0.0014
Epoch 13/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0013
Epoch 14/25
4000/4000 [==============================] - 0s 112us/step - loss: 0.0012
Epoch 15/25
4000/4000 [==============================] - 0s 109us/step - loss: 0.0012
Epoch 16/25
4000/4000 [==============================] - 0s 118us/step - loss: 0.0011
Epoch 17/25
4000/4000 [==============================] - 0s 108us/step - loss: 0.0011
Epoch 18/25
4000/4000 [==============================] - 0s 110us/step - loss: 0.0011
Epoch 19/25
4000/4000 [==============================] - 0s 111us/step - loss: 0.0010
Epoch 20/25
4000/4000 [==============================] - 0s 111us/step - loss: 9.9445e-04
Epoch 21/25
4000/4000 [==============================] - 0s 107us/step - loss: 9.7200e-04
Epoch 22/25
4000/4000 [==============================] - 0s 103us/step - loss: 9.4672e-04
Epoch 23/25
4000/4000 [==============================] - 0s 109us/step - loss: 9.2677e-04
Epoch 24/25
4000/4000 [==============================] - 0s 108us/step - loss: 9.0880e-04
Epoch 25/25
4000/4000 [==============================] - 0s 108us/step - loss: 8.9304e-04
mse kpca: 0.001170 , mse cs: 0.000200; mse simec: 0.000677
100
Epoch 1/25
4000/4000 [==============================] - 1s 193us/step - loss: 0.0163
Epoch 2/25
4000/4000 [==============================] - 0s 114us/step - loss: 0.0074
Epoch 3/25
4000/4000 [==============================] - 0s 114us/step - loss: 0.0045
Epoch 4/25
4000/4000 [==============================] - 0s 121us/step - loss: 0.0031
Epoch 5/25
4000/4000 [==============================] - 0s 121us/step - loss: 0.0024
Epoch 6/25
4000/4000 [==============================] - 0s 118us/step - loss: 0.0020
Epoch 7/25
4000/4000 [==============================] - 0s 117us/step - loss: 0.0018
Epoch 8/25
4000/4000 [==============================] - 0s 122us/step - loss: 0.0016
Epoch 9/25
4000/4000 [==============================] - 0s 119us/step - loss: 0.0015
Epoch 10/25
4000/4000 [==============================] - 0s 115us/step - loss: 0.0014
Epoch 11/25
4000/4000 [==============================] - 0s 117us/step - loss: 0.0013
Epoch 12/25
4000/4000 [==============================] - 0s 116us/step - loss: 0.0013
Epoch 13/25
4000/4000 [==============================] - 0s 116us/step - loss: 0.0012
Epoch 14/25
4000/4000 [==============================] - 0s 119us/step - loss: 0.0011
Epoch 15/25
4000/4000 [==============================] - 0s 117us/step - loss: 0.0011
Epoch 16/25
4000/4000 [==============================] - 0s 117us/step - loss: 0.0010
Epoch 17/25
4000/4000 [==============================] - 0s 120us/step - loss: 0.0010
Epoch 18/25
4000/4000 [==============================] - 0s 110us/step - loss: 9.7942e-04
Epoch 19/25
4000/4000 [==============================] - 0s 123us/step - loss: 9.5833e-04
Epoch 20/25
4000/4000 [==============================] - 0s 123us/step - loss: 9.3195e-04
Epoch 21/25
4000/4000 [==============================] - 0s 120us/step - loss: 9.1309e-04
Epoch 22/25
4000/4000 [==============================] - 0s 117us/step - loss: 8.9578e-04
Epoch 23/25
4000/4000 [==============================] - 0s 117us/step - loss: 8.8195e-04
Epoch 24/25
4000/4000 [==============================] - 0s 123us/step - loss: 8.6679e-04
Epoch 25/25
4000/4000 [==============================] - 0s 119us/step - loss: 8.5446e-04
mse kpca: 0.001091 , mse cs: 0.000082; mse simec: 0.000638

Similarities of Words from Grimm's Fairy Tales and NASA News

load grimm's fairy tales and nasa news articles


In [33]:
# get 62 grimm fairy tales
# originally from https://www.gutenberg.org/files/2591/2591-0.txt
# but removing beginning and end and fixing a few newlines
with open('data/nonmetric/grimm_all.txt') as f:
    grimm = f.read()
grimm = unidecode(grimm.decode('utf8'))
grimm = grimm.replace('\r','')
grimm = grimm.split('\n\n\n\n\n')
grimm = [g.replace('\n', ' ') for g in grimm]
n_words = [len(g.split()) for g in grimm]
print("%i fairy tales with %i - %i words" % (len(grimm), min(n_words), max(n_words)))


62 fairy tales with 255 - 3793 words

In [34]:
# get 62 nasa news articles from 2002
# originally from here: https://www.nasa.gov/audience/formedia/archives/MP_Archive_02.html
nasa = []
for fname in glob('data/nonmetric/nasa/*.txt'):
    with open(fname) as f:
        nasa.append(unidecode(f.read().decode('utf8')).replace('\r',''))
nasa = [n.replace('\n', ' ') for n in nasa]
n_words = [len(n.split()) for n in nasa]    
print("%i news articles with %i - %i words" % (len(nasa), min(n_words), max(n_words)))


62 news articles with 275 - 1032 words

transform the texts into features and select frequent words


In [35]:
# transform into dictionaries for nlputils
grimm = {'grimm %i'%i:g for i, g in enumerate(grimm, 1)}
nasa = {'nasa %i'%i:n for i, n in enumerate(nasa, 1)}
# transform into tf features (unweighted!)
ft = FeatureTransform(norm='max', weight=False, renorm=None, identify_bigrams=False, norm_num=False)
grimm_feats = ft.texts2features(grimm)
nasa_feats = ft.texts2features(nasa)

In [36]:
# invert dictionaries to get for every word the number of docs it occurred in
grimm_words = invert_dict2(grimm_feats)
nasa_words = invert_dict2(nasa_feats)
# accumulate the counts to get the most frequent words
grimm_words_acc = {w: sum([grimm_words[w][d] for d in grimm_words[w]]) for w in grimm_words}
nasa_words_acc = {w: sum([nasa_words[w][d] for d in nasa_words[w]]) for w in nasa_words}
print("Grimm's Fairy Tale")
print('\n'.join(sorted(grimm_words_acc, key=grimm_words_acc.get, reverse=True)[200:220]))
print("NASA news articles")
print('\n'.join(sorted(nasa_words_acc, key=nasa_words_acc.get, reverse=True)[40:60]))
# manually filter and select some of the most frequent words (50 each)
grimm_selected = ['little', 'king', 'away', 'old', 'good', 'man', 'time', 'day', 'home', 'father', 
                  'thought', 'fox', 'last', 'mother', 'cried', 'wife', 'long', 'way', 'nothing', 
                  'door', 'great', 'cat', 'gretel', 'eat', 'head', 'beautiful', 'son', 'house', 
                  'morning', 'water', 'horse', 'child', 'woman', 'wood', 'wolf', 'bird', 'princess', 
                  'tree', 'daughter', 'master', 'night', 'gold', 'forest', 'evening', 'castle', 'heart', 
                  'eyes', 'fire', 'queen', 'bed']
nasa_selected = ['space', 'nasa', 'flight', 'earth', 'phone', 'mission', 'research', 'science', 
                 'washington', 'technology', 'program', 'station', 'data', 'system', 'headquarters', 
                 'information', 'spacecraft', 'first', 'university', 'launch', 'international', 'office', 
                 'shuttle', 'operations', 'agency', 'orbit', 'astronaut', 'atmosphere', 'solar', 'years', 
                 'future', 'missions', 'additional', 'scientists', 'safety', 'national', 'team', 'laboratory', 
                 'scientific', 'internet', 'crew', 'engineers', 'field', 'air', 'software', 'work', 
                 'satellite', 'environment', 'control', 'hubble']


Grimm's Fairy Tale
stood
us
something
children
every
heart
while
through
eyes
fire
hand
look
find
tell
bed
girl
queen
cook
going
world
NASA news articles
data
have
system
headquarters
information
2002
spacecraft
its
release
than
we
202
can
which
about
first
university
available
administrator
also

In [37]:
words_selected = grimm_selected+nasa_selected
#words_selected = ['courtyard','talers','church','fortune','pearls','ravens','sword','thieves','dragon','handsome','princes','cottage','robbers','apples','frog','village','dwarfs','words','moment','ashes','hands','huntsmen','shepherd','grandmother','death','length','word','land','lie','witch','palace','servant','piece','face','kingdom','table','wish','youth','soldier','drink','money','window','master','fine','tailor','cat','wolf','heart','bed','queen','wood','round','castle','fox','poor','house','beautiful','door','man','space','earth','right','all','years','solar','time','shuttle','science','station','other','sun','first','water','astronauts','orbit','mission','image','scientists','research','center','news','way','system','data','planet','long','light','gravity','high','good','radiation','meteor','propulsion','satellite','well','little','comet','people','together','air','work','telescope','day','place','morning','rockets','life','hard','large','energy','old','temperature','robot','engine','end','thing','physics','nuclear','hydrogen','head','cosmic','home','environment','computer','scientist','hand','atoms','night','exploration','body','great','fire','world','tree','forest','eyes','young','sound','country']
# get one dict with the selected words and the documents they occur in to compute their similarities
grimm_feats.update(nasa_feats)
words_dict = select_copy(invert_dict2(grimm_feats), words_selected)
words_selected = list(words_dict.keys())

In [38]:
# get similarity matrix of words where 
# s_ij = #{docs with i AND j}/#{docs with i OR j}
def compute_rocha_sim(word1_dict, word2_dict):
    s = float(len(set(word1_dict.keys()) & set(word2_dict.keys())))
    if not s:
        return np.finfo(float).eps
    return s/len(set(word1_dict.keys()) | set(word2_dict.keys()))
N = len(words_selected)
S = np.zeros((N, N))
for i, did in enumerate(words_selected):
    for j in range(i + 1):
        similarity = np.log(compute_rocha_sim(words_dict[did], words_dict[words_selected[j]]))
        S[i, j], S[j, i] = similarity, similarity
# center matrix
S /= np.max(np.abs(S))
S = center_K(S)

In [39]:
# check out the eigenvalue spectrum - we've got some significant negative eigenvalues!!
eigenvals = np.linalg.eigvalsh(S)[::-1]
print(eigenvals[:5])
print(eigenvals[-5:])
plt.figure()
plt.plot(list(range(1, S.shape[0]+1)), eigenvals, '-o', markersize=3)
plt.plot([1,S.shape[0]],[0,0], 'k--', linewidth=0.5)
plt.xlim(0, S.shape[0]+1)
plt.title('Eigenvalue Spectrum of S')


[ 31.33604797   4.95236018   4.79711511   4.05009333   3.65669978]
[-3.31373975 -3.53055135 -4.10922523 -4.76011161 -7.40005291]
Out[39]:
Text(0.5,1,u'Eigenvalue Spectrum of S')

In [40]:
# compute embedding based on eigenvalues and -vectors
D, V = np.linalg.eig(S)  # guard against spurious complex e-vals from roundoff
D, V = D[np.argsort(D)[::-1]], V[:,np.argsort(D)[::-1]]
X_embed = np.dot(V.real, np.diag(np.sqrt(np.abs(D.real))))
# plot largest
plot_words(X_embed[:,:2], words_selected, 'Embedding with largest components')
plt.xlabel('1st component (Grimm - NASA)');
plt.ylabel('2nd component');
fig = plt.gcf();
fig.set_size_inches(18.5, 10.5);



In [41]:
plot_words(X_embed[:,-2:], words_selected, 'Embedding with smallest components')
plt.xlabel('2nd last component');
plt.ylabel('last component (specificity?)');
fig = plt.gcf();
fig.set_size_inches(18.5, 10.5);



In [42]:
plot_words(X_embed[:,[0,-1]], words_selected, 'Embedding with most extreme components')
plt.xlabel('1st component (NASA- Grimm)');
plt.ylabel('last component (specificity?)');
fig = plt.gcf();
fig.set_size_inches(18.5, 10.5);



In [43]:
# train simec
X, _ = features2mat(words_dict, words_selected)
X = X.toarray()
simec = SimilarityEncoder(X.shape[1], 2, S.shape[1], hidden_layers=[(10, 'tanh')], l2_reg=0.00001, 
                          l2_reg_emb=0.0001, opt=keras.optimizers.Adamax(lr=0.03))
simec.fit(X, S, epochs=400)
X_embed = simec.transform(X)
plot_words(X_embed, words_selected, 'Embedding with SimEc')
plt.xlabel('1st component');
plt.ylabel('2nd component');
fig = plt.gcf();
fig.set_size_inches(18.5, 10.5);


Epoch 1/400
100/100 [==============================] - 0s 898us/step - loss: 0.1352
Epoch 2/400
100/100 [==============================] - 0s 148us/step - loss: 0.1323
Epoch 3/400
100/100 [==============================] - 0s 205us/step - loss: 0.1221
Epoch 4/400
100/100 [==============================] - 0s 119us/step - loss: 0.1060
Epoch 5/400
100/100 [==============================] - 0s 109us/step - loss: 0.0926
Epoch 6/400
100/100 [==============================] - 0s 96us/step - loss: 0.0853
Epoch 7/400
100/100 [==============================] - 0s 135us/step - loss: 0.0805
Epoch 8/400
100/100 [==============================] - 0s 133us/step - loss: 0.0773
Epoch 9/400
100/100 [==============================] - 0s 180us/step - loss: 0.0741
Epoch 10/400
100/100 [==============================] - 0s 117us/step - loss: 0.0727
Epoch 11/400
100/100 [==============================] - 0s 192us/step - loss: 0.0715
Epoch 12/400
100/100 [==============================] - 0s 111us/step - loss: 0.0695
Epoch 13/400
100/100 [==============================] - 0s 101us/step - loss: 0.0676
Epoch 14/400
100/100 [==============================] - 0s 104us/step - loss: 0.0661
Epoch 15/400
100/100 [==============================] - 0s 106us/step - loss: 0.0650
Epoch 16/400
100/100 [==============================] - 0s 108us/step - loss: 0.0641
Epoch 17/400
100/100 [==============================] - 0s 104us/step - loss: 0.0632
Epoch 18/400
100/100 [==============================] - 0s 102us/step - loss: 0.0628
Epoch 19/400
100/100 [==============================] - 0s 106us/step - loss: 0.0620
Epoch 20/400
100/100 [==============================] - 0s 117us/step - loss: 0.0604
Epoch 21/400
100/100 [==============================] - 0s 109us/step - loss: 0.0595
Epoch 22/400
100/100 [==============================] - 0s 143us/step - loss: 0.0588
Epoch 23/400
100/100 [==============================] - 0s 168us/step - loss: 0.0580
Epoch 24/400
100/100 [==============================] - 0s 120us/step - loss: 0.0572
Epoch 25/400
100/100 [==============================] - 0s 125us/step - loss: 0.0563
Epoch 26/400
100/100 [==============================] - 0s 143us/step - loss: 0.0561
Epoch 27/400
100/100 [==============================] - 0s 151us/step - loss: 0.0559
Epoch 28/400
100/100 [==============================] - 0s 177us/step - loss: 0.0546
Epoch 29/400
100/100 [==============================] - 0s 152us/step - loss: 0.0544
Epoch 30/400
100/100 [==============================] - 0s 159us/step - loss: 0.0534
Epoch 31/400
100/100 [==============================] - 0s 120us/step - loss: 0.0532
Epoch 32/400
100/100 [==============================] - 0s 163us/step - loss: 0.0526
Epoch 33/400
100/100 [==============================] - 0s 196us/step - loss: 0.0518
Epoch 34/400
100/100 [==============================] - 0s 143us/step - loss: 0.0514
Epoch 35/400
100/100 [==============================] - 0s 162us/step - loss: 0.0509
Epoch 36/400
100/100 [==============================] - 0s 122us/step - loss: 0.0505
Epoch 37/400
100/100 [==============================] - 0s 136us/step - loss: 0.0503
Epoch 38/400
100/100 [==============================] - 0s 124us/step - loss: 0.0499
Epoch 39/400
100/100 [==============================] - 0s 142us/step - loss: 0.0493
Epoch 40/400
100/100 [==============================] - 0s 119us/step - loss: 0.0490
Epoch 41/400
100/100 [==============================] - 0s 172us/step - loss: 0.0491
Epoch 42/400
100/100 [==============================] - 0s 131us/step - loss: 0.0483
Epoch 43/400
100/100 [==============================] - 0s 138us/step - loss: 0.0478
Epoch 44/400
100/100 [==============================] - 0s 116us/step - loss: 0.0476
Epoch 45/400
100/100 [==============================] - 0s 150us/step - loss: 0.0474
Epoch 46/400
100/100 [==============================] - 0s 107us/step - loss: 0.0473
Epoch 47/400
100/100 [==============================] - 0s 145us/step - loss: 0.0470
Epoch 48/400
100/100 [==============================] - 0s 100us/step - loss: 0.0463
Epoch 49/400
100/100 [==============================] - 0s 113us/step - loss: 0.0460
Epoch 50/400
100/100 [==============================] - 0s 115us/step - loss: 0.0457
Epoch 51/400
100/100 [==============================] - 0s 164us/step - loss: 0.0456
Epoch 52/400
100/100 [==============================] - 0s 130us/step - loss: 0.0454
Epoch 53/400
100/100 [==============================] - 0s 145us/step - loss: 0.0459
Epoch 54/400
100/100 [==============================] - 0s 108us/step - loss: 0.0454
Epoch 55/400
100/100 [==============================] - ETA: 0s - loss: 0.044 - 0s 153us/step - loss: 0.0446
Epoch 56/400
100/100 [==============================] - 0s 132us/step - loss: 0.0447
Epoch 57/400
100/100 [==============================] - 0s 126us/step - loss: 0.0446
Epoch 58/400
100/100 [==============================] - 0s 176us/step - loss: 0.0441
Epoch 59/400
100/100 [==============================] - 0s 123us/step - loss: 0.0442
Epoch 60/400
100/100 [==============================] - 0s 156us/step - loss: 0.0439
Epoch 61/400
100/100 [==============================] - 0s 132us/step - loss: 0.0438
Epoch 62/400
100/100 [==============================] - 0s 142us/step - loss: 0.0434
Epoch 63/400
100/100 [==============================] - 0s 163us/step - loss: 0.0434
Epoch 64/400
100/100 [==============================] - 0s 165us/step - loss: 0.0431
Epoch 65/400
100/100 [==============================] - 0s 150us/step - loss: 0.0431
Epoch 66/400
100/100 [==============================] - 0s 204us/step - loss: 0.0428
Epoch 67/400
100/100 [==============================] - 0s 133us/step - loss: 0.0425
Epoch 68/400
100/100 [==============================] - 0s 144us/step - loss: 0.0424
Epoch 69/400
100/100 [==============================] - 0s 152us/step - loss: 0.0423
Epoch 70/400
100/100 [==============================] - 0s 126us/step - loss: 0.0423
Epoch 71/400
100/100 [==============================] - 0s 132us/step - loss: 0.0421
Epoch 72/400
100/100 [==============================] - 0s 148us/step - loss: 0.0420
Epoch 73/400
100/100 [==============================] - 0s 138us/step - loss: 0.0420
Epoch 74/400
100/100 [==============================] - 0s 166us/step - loss: 0.0418
Epoch 75/400
100/100 [==============================] - 0s 131us/step - loss: 0.0416
Epoch 76/400
100/100 [==============================] - 0s 146us/step - loss: 0.0419
Epoch 77/400
100/100 [==============================] - 0s 130us/step - loss: 0.0416
Epoch 78/400
100/100 [==============================] - 0s 156us/step - loss: 0.0417
Epoch 79/400
100/100 [==============================] - 0s 128us/step - loss: 0.0412
Epoch 80/400
100/100 [==============================] - 0s 114us/step - loss: 0.0411
Epoch 81/400
100/100 [==============================] - 0s 126us/step - loss: 0.0412
Epoch 82/400
100/100 [==============================] - 0s 122us/step - loss: 0.0410
Epoch 83/400
100/100 [==============================] - 0s 163us/step - loss: 0.0412
Epoch 84/400
100/100 [==============================] - 0s 122us/step - loss: 0.0411
Epoch 85/400
100/100 [==============================] - 0s 116us/step - loss: 0.0413
Epoch 86/400
100/100 [==============================] - 0s 128us/step - loss: 0.0411
Epoch 87/400
100/100 [==============================] - 0s 110us/step - loss: 0.0412
Epoch 88/400
100/100 [==============================] - 0s 136us/step - loss: 0.0404
Epoch 89/400
100/100 [==============================] - 0s 137us/step - loss: 0.0409
Epoch 90/400
100/100 [==============================] - 0s 153us/step - loss: 0.0403
Epoch 91/400
100/100 [==============================] - 0s 121us/step - loss: 0.0404
Epoch 92/400
100/100 [==============================] - 0s 118us/step - loss: 0.0404
Epoch 93/400
100/100 [==============================] - 0s 161us/step - loss: 0.0402
Epoch 94/400
100/100 [==============================] - 0s 168us/step - loss: 0.0402
Epoch 95/400
100/100 [==============================] - 0s 113us/step - loss: 0.0401
Epoch 96/400
100/100 [==============================] - 0s 145us/step - loss: 0.0399
Epoch 97/400
100/100 [==============================] - 0s 122us/step - loss: 0.0399
Epoch 98/400
100/100 [==============================] - 0s 104us/step - loss: 0.0402
Epoch 99/400
100/100 [==============================] - 0s 123us/step - loss: 0.0399
Epoch 100/400
100/100 [==============================] - 0s 137us/step - loss: 0.0400
Epoch 101/400
100/100 [==============================] - 0s 109us/step - loss: 0.0399
Epoch 102/400
100/100 [==============================] - 0s 120us/step - loss: 0.0396
Epoch 103/400
100/100 [==============================] - 0s 113us/step - loss: 0.0401
Epoch 104/400
100/100 [==============================] - 0s 111us/step - loss: 0.0395
Epoch 105/400
100/100 [==============================] - 0s 150us/step - loss: 0.0400
Epoch 106/400
100/100 [==============================] - 0s 142us/step - loss: 0.0396
Epoch 107/400
100/100 [==============================] - 0s 149us/step - loss: 0.0399
Epoch 108/400
100/100 [==============================] - 0s 170us/step - loss: 0.0395
Epoch 109/400
100/100 [==============================] - 0s 179us/step - loss: 0.0397
Epoch 110/400
100/100 [==============================] - 0s 173us/step - loss: 0.0397
Epoch 111/400
100/100 [==============================] - 0s 174us/step - loss: 0.0394
Epoch 112/400
100/100 [==============================] - 0s 174us/step - loss: 0.0392
Epoch 113/400
100/100 [==============================] - 0s 132us/step - loss: 0.0393
Epoch 114/400
100/100 [==============================] - 0s 134us/step - loss: 0.0391
Epoch 115/400
100/100 [==============================] - 0s 122us/step - loss: 0.0392
Epoch 116/400
100/100 [==============================] - 0s 110us/step - loss: 0.0391
Epoch 117/400
100/100 [==============================] - 0s 120us/step - loss: 0.0390
Epoch 118/400
100/100 [==============================] - 0s 151us/step - loss: 0.0390
Epoch 119/400
100/100 [==============================] - 0s 109us/step - loss: 0.0389
Epoch 120/400
100/100 [==============================] - ETA: 0s - loss: 0.033 - 0s 121us/step - loss: 0.0390
Epoch 121/400
100/100 [==============================] - 0s 142us/step - loss: 0.0393
Epoch 122/400
100/100 [==============================] - 0s 140us/step - loss: 0.0389
Epoch 123/400
100/100 [==============================] - 0s 130us/step - loss: 0.0388
Epoch 124/400
100/100 [==============================] - 0s 213us/step - loss: 0.0392
Epoch 125/400
100/100 [==============================] - 0s 135us/step - loss: 0.0393
Epoch 126/400
100/100 [==============================] - 0s 177us/step - loss: 0.0391
Epoch 127/400
100/100 [==============================] - 0s 122us/step - loss: 0.0390
Epoch 128/400
100/100 [==============================] - 0s 152us/step - loss: 0.0390
Epoch 129/400
100/100 [==============================] - 0s 111us/step - loss: 0.0388
Epoch 130/400
100/100 [==============================] - 0s 122us/step - loss: 0.0389
Epoch 131/400
100/100 [==============================] - 0s 112us/step - loss: 0.0389
Epoch 132/400
100/100 [==============================] - 0s 126us/step - loss: 0.0386
Epoch 133/400
100/100 [==============================] - 0s 141us/step - loss: 0.0386
Epoch 134/400
100/100 [==============================] - 0s 103us/step - loss: 0.0386
Epoch 135/400
100/100 [==============================] - 0s 104us/step - loss: 0.0384
Epoch 136/400
100/100 [==============================] - 0s 130us/step - loss: 0.0384
Epoch 137/400
100/100 [==============================] - 0s 142us/step - loss: 0.0383
Epoch 138/400
100/100 [==============================] - 0s 159us/step - loss: 0.0384
Epoch 139/400
100/100 [==============================] - 0s 130us/step - loss: 0.0382
Epoch 140/400
100/100 [==============================] - 0s 106us/step - loss: 0.0382
Epoch 141/400
100/100 [==============================] - 0s 105us/step - loss: 0.0383
Epoch 142/400
100/100 [==============================] - 0s 103us/step - loss: 0.0383
Epoch 143/400
100/100 [==============================] - 0s 102us/step - loss: 0.0382
Epoch 144/400
100/100 [==============================] - 0s 108us/step - loss: 0.0382
Epoch 145/400
100/100 [==============================] - 0s 144us/step - loss: 0.0383
Epoch 146/400
100/100 [==============================] - 0s 130us/step - loss: 0.0383
Epoch 147/400
100/100 [==============================] - 0s 124us/step - loss: 0.0386
Epoch 148/400
100/100 [==============================] - 0s 157us/step - loss: 0.0382
Epoch 149/400
100/100 [==============================] - 0s 135us/step - loss: 0.0383
Epoch 150/400
100/100 [==============================] - 0s 155us/step - loss: 0.0383
Epoch 151/400
100/100 [==============================] - 0s 147us/step - loss: 0.0379
Epoch 152/400
100/100 [==============================] - 0s 122us/step - loss: 0.0380
Epoch 153/400
100/100 [==============================] - 0s 141us/step - loss: 0.0380
Epoch 154/400
100/100 [==============================] - 0s 126us/step - loss: 0.0379
Epoch 155/400
100/100 [==============================] - 0s 122us/step - loss: 0.0380
Epoch 156/400
100/100 [==============================] - 0s 175us/step - loss: 0.0380
Epoch 157/400
100/100 [==============================] - 0s 134us/step - loss: 0.0381
Epoch 158/400
100/100 [==============================] - 0s 147us/step - loss: 0.0379
Epoch 159/400
100/100 [==============================] - 0s 150us/step - loss: 0.0381
Epoch 160/400
100/100 [==============================] - 0s 173us/step - loss: 0.0378
Epoch 161/400
100/100 [==============================] - 0s 125us/step - loss: 0.0383
Epoch 162/400
100/100 [==============================] - 0s 94us/step - loss: 0.0378
Epoch 163/400
100/100 [==============================] - 0s 110us/step - loss: 0.0382
Epoch 164/400
100/100 [==============================] - 0s 110us/step - loss: 0.0377
Epoch 165/400
100/100 [==============================] - 0s 106us/step - loss: 0.0382
Epoch 166/400
100/100 [==============================] - 0s 103us/step - loss: 0.0377
Epoch 167/400
100/100 [==============================] - 0s 102us/step - loss: 0.0378
Epoch 168/400
100/100 [==============================] - 0s 95us/step - loss: 0.0376
Epoch 169/400
100/100 [==============================] - 0s 106us/step - loss: 0.0379
Epoch 170/400
100/100 [==============================] - 0s 90us/step - loss: 0.0376
Epoch 171/400
100/100 [==============================] - 0s 112us/step - loss: 0.0375
Epoch 172/400
100/100 [==============================] - 0s 97us/step - loss: 0.0374
Epoch 173/400
100/100 [==============================] - 0s 96us/step - loss: 0.0375
Epoch 174/400
100/100 [==============================] - 0s 148us/step - loss: 0.0375
Epoch 175/400
100/100 [==============================] - 0s 101us/step - loss: 0.0375
Epoch 176/400
100/100 [==============================] - 0s 93us/step - loss: 0.0374
Epoch 177/400
100/100 [==============================] - 0s 98us/step - loss: 0.0373
Epoch 178/400
100/100 [==============================] - 0s 124us/step - loss: 0.0375
Epoch 179/400
100/100 [==============================] - 0s 164us/step - loss: 0.0374
Epoch 180/400
100/100 [==============================] - 0s 110us/step - loss: 0.0375
Epoch 181/400
100/100 [==============================] - 0s 98us/step - loss: 0.0373
Epoch 182/400
100/100 [==============================] - 0s 107us/step - loss: 0.0375
Epoch 183/400
100/100 [==============================] - 0s 128us/step - loss: 0.0374
Epoch 184/400
100/100 [==============================] - 0s 136us/step - loss: 0.0372
Epoch 185/400
100/100 [==============================] - 0s 123us/step - loss: 0.0371
Epoch 186/400
100/100 [==============================] - 0s 150us/step - loss: 0.0372
Epoch 187/400
100/100 [==============================] - 0s 161us/step - loss: 0.0372
Epoch 188/400
100/100 [==============================] - 0s 101us/step - loss: 0.0374
Epoch 189/400
100/100 [==============================] - 0s 113us/step - loss: 0.0373
Epoch 190/400
100/100 [==============================] - 0s 150us/step - loss: 0.0372
Epoch 191/400
100/100 [==============================] - 0s 170us/step - loss: 0.0373
Epoch 192/400
100/100 [==============================] - 0s 92us/step - loss: 0.0371
Epoch 193/400
100/100 [==============================] - 0s 101us/step - loss: 0.0371
Epoch 194/400
100/100 [==============================] - 0s 98us/step - loss: 0.0370
Epoch 195/400
100/100 [==============================] - 0s 175us/step - loss: 0.0370
Epoch 196/400
100/100 [==============================] - 0s 149us/step - loss: 0.0370
Epoch 197/400
100/100 [==============================] - 0s 150us/step - loss: 0.0369
Epoch 198/400
100/100 [==============================] - 0s 95us/step - loss: 0.0369
Epoch 199/400
100/100 [==============================] - 0s 121us/step - loss: 0.0368
Epoch 200/400
100/100 [==============================] - 0s 160us/step - loss: 0.0368
Epoch 201/400
100/100 [==============================] - 0s 98us/step - loss: 0.0369
Epoch 202/400
100/100 [==============================] - 0s 106us/step - loss: 0.0368
Epoch 203/400
100/100 [==============================] - 0s 104us/step - loss: 0.0367
Epoch 204/400
100/100 [==============================] - 0s 110us/step - loss: 0.0368
Epoch 205/400
100/100 [==============================] - 0s 91us/step - loss: 0.0376
Epoch 206/400
100/100 [==============================] - 0s 98us/step - loss: 0.0374
Epoch 207/400
100/100 [==============================] - 0s 90us/step - loss: 0.0371
Epoch 208/400
100/100 [==============================] - 0s 99us/step - loss: 0.0374
Epoch 209/400
100/100 [==============================] - 0s 100us/step - loss: 0.0376
Epoch 210/400
100/100 [==============================] - 0s 99us/step - loss: 0.0371
Epoch 211/400
100/100 [==============================] - 0s 96us/step - loss: 0.0366
Epoch 212/400
100/100 [==============================] - 0s 99us/step - loss: 0.0366
Epoch 213/400
100/100 [==============================] - 0s 96us/step - loss: 0.0367
Epoch 214/400
100/100 [==============================] - 0s 98us/step - loss: 0.0365
Epoch 215/400
100/100 [==============================] - 0s 92us/step - loss: 0.0366
Epoch 216/400
100/100 [==============================] - 0s 113us/step - loss: 0.0366
Epoch 217/400
100/100 [==============================] - 0s 93us/step - loss: 0.0365
Epoch 218/400
100/100 [==============================] - 0s 123us/step - loss: 0.0368
Epoch 219/400
100/100 [==============================] - 0s 133us/step - loss: 0.0366
Epoch 220/400
100/100 [==============================] - 0s 121us/step - loss: 0.0373
Epoch 221/400
100/100 [==============================] - 0s 176us/step - loss: 0.0367
Epoch 222/400
100/100 [==============================] - 0s 118us/step - loss: 0.0371
Epoch 223/400
100/100 [==============================] - 0s 105us/step - loss: 0.0370
Epoch 224/400
100/100 [==============================] - 0s 107us/step - loss: 0.0369
Epoch 225/400
100/100 [==============================] - 0s 107us/step - loss: 0.0367
Epoch 226/400
100/100 [==============================] - 0s 141us/step - loss: 0.0366
Epoch 227/400
100/100 [==============================] - 0s 167us/step - loss: 0.0367
Epoch 228/400
100/100 [==============================] - 0s 121us/step - loss: 0.0367
Epoch 229/400
100/100 [==============================] - 0s 147us/step - loss: 0.0367
Epoch 230/400
100/100 [==============================] - 0s 109us/step - loss: 0.0363
Epoch 231/400
100/100 [==============================] - 0s 116us/step - loss: 0.0366
Epoch 232/400
100/100 [==============================] - 0s 141us/step - loss: 0.0363
Epoch 233/400
100/100 [==============================] - 0s 141us/step - loss: 0.0364
Epoch 234/400
100/100 [==============================] - 0s 159us/step - loss: 0.0363
Epoch 235/400
100/100 [==============================] - 0s 142us/step - loss: 0.0362
Epoch 236/400
100/100 [==============================] - 0s 125us/step - loss: 0.0362
Epoch 237/400
100/100 [==============================] - 0s 128us/step - loss: 0.0360
Epoch 238/400
100/100 [==============================] - 0s 136us/step - loss: 0.0361
Epoch 239/400
100/100 [==============================] - 0s 180us/step - loss: 0.0361
Epoch 240/400
100/100 [==============================] - 0s 108us/step - loss: 0.0361
Epoch 241/400
100/100 [==============================] - 0s 116us/step - loss: 0.0362
Epoch 242/400
100/100 [==============================] - 0s 101us/step - loss: 0.0361
Epoch 243/400
100/100 [==============================] - 0s 122us/step - loss: 0.0360
Epoch 244/400
100/100 [==============================] - 0s 98us/step - loss: 0.0359
Epoch 245/400
100/100 [==============================] - 0s 142us/step - loss: 0.0359
Epoch 246/400
100/100 [==============================] - 0s 152us/step - loss: 0.0357
Epoch 247/400
100/100 [==============================] - 0s 140us/step - loss: 0.0356
Epoch 248/400
100/100 [==============================] - 0s 122us/step - loss: 0.0356
Epoch 249/400
100/100 [==============================] - 0s 134us/step - loss: 0.0356
Epoch 250/400
100/100 [==============================] - 0s 141us/step - loss: 0.0356
Epoch 251/400
100/100 [==============================] - 0s 134us/step - loss: 0.0355
Epoch 252/400
100/100 [==============================] - 0s 144us/step - loss: 0.0355
Epoch 253/400
100/100 [==============================] - 0s 147us/step - loss: 0.0355
Epoch 254/400
100/100 [==============================] - 0s 151us/step - loss: 0.0354
Epoch 255/400
100/100 [==============================] - 0s 140us/step - loss: 0.0355
Epoch 256/400
100/100 [==============================] - 0s 130us/step - loss: 0.0354
Epoch 257/400
100/100 [==============================] - 0s 150us/step - loss: 0.0353
Epoch 258/400
100/100 [==============================] - ETA: 0s - loss: 0.033 - 0s 154us/step - loss: 0.0353
Epoch 259/400
100/100 [==============================] - 0s 125us/step - loss: 0.0353
Epoch 260/400
100/100 [==============================] - 0s 128us/step - loss: 0.0353
Epoch 261/400
100/100 [==============================] - 0s 127us/step - loss: 0.0352
Epoch 262/400
100/100 [==============================] - 0s 156us/step - loss: 0.0353
Epoch 263/400
100/100 [==============================] - 0s 157us/step - loss: 0.0355
Epoch 264/400
100/100 [==============================] - 0s 154us/step - loss: 0.0357
Epoch 265/400
100/100 [==============================] - 0s 147us/step - loss: 0.0352
Epoch 266/400
100/100 [==============================] - 0s 162us/step - loss: 0.0352
Epoch 267/400
100/100 [==============================] - 0s 130us/step - loss: 0.0352
Epoch 268/400
100/100 [==============================] - 0s 149us/step - loss: 0.0353
Epoch 269/400
100/100 [==============================] - 0s 161us/step - loss: 0.0354
Epoch 270/400
100/100 [==============================] - 0s 149us/step - loss: 0.0353
Epoch 271/400
100/100 [==============================] - 0s 117us/step - loss: 0.0351
Epoch 272/400
100/100 [==============================] - 0s 148us/step - loss: 0.0351
Epoch 273/400
100/100 [==============================] - ETA: 0s - loss: 0.031 - 0s 110us/step - loss: 0.0349
Epoch 274/400
100/100 [==============================] - 0s 183us/step - loss: 0.0348
Epoch 275/400
100/100 [==============================] - 0s 132us/step - loss: 0.0351
Epoch 276/400
100/100 [==============================] - 0s 129us/step - loss: 0.0353
Epoch 277/400
100/100 [==============================] - 0s 94us/step - loss: 0.0351
Epoch 278/400
100/100 [==============================] - 0s 100us/step - loss: 0.0351
Epoch 279/400
100/100 [==============================] - 0s 93us/step - loss: 0.0351
Epoch 280/400
100/100 [==============================] - 0s 120us/step - loss: 0.0349
Epoch 281/400
100/100 [==============================] - 0s 100us/step - loss: 0.0347
Epoch 282/400
100/100 [==============================] - 0s 98us/step - loss: 0.0348
Epoch 283/400
100/100 [==============================] - 0s 91us/step - loss: 0.0350
Epoch 284/400
100/100 [==============================] - 0s 128us/step - loss: 0.0347
Epoch 285/400
100/100 [==============================] - 0s 119us/step - loss: 0.0348
Epoch 286/400
100/100 [==============================] - 0s 106us/step - loss: 0.0351
Epoch 287/400
100/100 [==============================] - 0s 111us/step - loss: 0.0348
Epoch 288/400
100/100 [==============================] - 0s 100us/step - loss: 0.0347
Epoch 289/400
100/100 [==============================] - 0s 124us/step - loss: 0.0347
Epoch 290/400
100/100 [==============================] - 0s 110us/step - loss: 0.0345
Epoch 291/400
100/100 [==============================] - 0s 119us/step - loss: 0.0346
Epoch 292/400
100/100 [==============================] - 0s 136us/step - loss: 0.0345
Epoch 293/400
100/100 [==============================] - 0s 135us/step - loss: 0.0345
Epoch 294/400
100/100 [==============================] - 0s 156us/step - loss: 0.0344
Epoch 295/400
100/100 [==============================] - 0s 135us/step - loss: 0.0343
Epoch 296/400
100/100 [==============================] - 0s 141us/step - loss: 0.0344
Epoch 297/400
100/100 [==============================] - 0s 123us/step - loss: 0.0343
Epoch 298/400
100/100 [==============================] - 0s 141us/step - loss: 0.0343
Epoch 299/400
100/100 [==============================] - 0s 209us/step - loss: 0.0343
Epoch 300/400
100/100 [==============================] - 0s 140us/step - loss: 0.0342
Epoch 301/400
100/100 [==============================] - 0s 153us/step - loss: 0.0343
Epoch 302/400
100/100 [==============================] - 0s 136us/step - loss: 0.0343
Epoch 303/400
100/100 [==============================] - 0s 154us/step - loss: 0.0341
Epoch 304/400
100/100 [==============================] - 0s 149us/step - loss: 0.0342
Epoch 305/400
100/100 [==============================] - 0s 180us/step - loss: 0.0341
Epoch 306/400
100/100 [==============================] - 0s 113us/step - loss: 0.0342
Epoch 307/400
100/100 [==============================] - 0s 104us/step - loss: 0.0341
Epoch 308/400
100/100 [==============================] - 0s 124us/step - loss: 0.0341
Epoch 309/400
100/100 [==============================] - 0s 164us/step - loss: 0.0340
Epoch 310/400
100/100 [==============================] - 0s 131us/step - loss: 0.0341
Epoch 311/400
100/100 [==============================] - 0s 158us/step - loss: 0.0341
Epoch 312/400
100/100 [==============================] - 0s 143us/step - loss: 0.0340
Epoch 313/400
100/100 [==============================] - 0s 136us/step - loss: 0.0340
Epoch 314/400
100/100 [==============================] - 0s 156us/step - loss: 0.0344
Epoch 315/400
100/100 [==============================] - 0s 112us/step - loss: 0.0342
Epoch 316/400
100/100 [==============================] - 0s 180us/step - loss: 0.0342
Epoch 317/400
100/100 [==============================] - 0s 158us/step - loss: 0.0342
Epoch 318/400
100/100 [==============================] - 0s 124us/step - loss: 0.0342
Epoch 319/400
100/100 [==============================] - 0s 135us/step - loss: 0.0340
Epoch 320/400
100/100 [==============================] - 0s 185us/step - loss: 0.0339
Epoch 321/400
100/100 [==============================] - 0s 171us/step - loss: 0.0339
Epoch 322/400
100/100 [==============================] - 0s 162us/step - loss: 0.0339
Epoch 323/400
100/100 [==============================] - 0s 146us/step - loss: 0.0340
Epoch 324/400
100/100 [==============================] - 0s 153us/step - loss: 0.0340
Epoch 325/400
100/100 [==============================] - 0s 111us/step - loss: 0.0342
Epoch 326/400
100/100 [==============================] - 0s 174us/step - loss: 0.0342
Epoch 327/400
100/100 [==============================] - 0s 172us/step - loss: 0.0344
Epoch 328/400
100/100 [==============================] - 0s 123us/step - loss: 0.0341
Epoch 329/400
100/100 [==============================] - 0s 128us/step - loss: 0.0340
Epoch 330/400
100/100 [==============================] - 0s 104us/step - loss: 0.0341
Epoch 331/400
100/100 [==============================] - 0s 130us/step - loss: 0.0341
Epoch 332/400
100/100 [==============================] - 0s 110us/step - loss: 0.0338
Epoch 333/400
100/100 [==============================] - 0s 139us/step - loss: 0.0339
Epoch 334/400
100/100 [==============================] - 0s 110us/step - loss: 0.0339
Epoch 335/400
100/100 [==============================] - 0s 110us/step - loss: 0.0338
Epoch 336/400
100/100 [==============================] - 0s 116us/step - loss: 0.0338
Epoch 337/400
100/100 [==============================] - 0s 143us/step - loss: 0.0338
Epoch 338/400
100/100 [==============================] - 0s 147us/step - loss: 0.0337
Epoch 339/400
100/100 [==============================] - 0s 127us/step - loss: 0.0337
Epoch 340/400
100/100 [==============================] - 0s 135us/step - loss: 0.0337
Epoch 341/400
100/100 [==============================] - 0s 129us/step - loss: 0.0336
Epoch 342/400
100/100 [==============================] - 0s 123us/step - loss: 0.0336
Epoch 343/400
100/100 [==============================] - 0s 159us/step - loss: 0.0337
Epoch 344/400
100/100 [==============================] - 0s 146us/step - loss: 0.0338
Epoch 345/400
100/100 [==============================] - 0s 165us/step - loss: 0.0336
Epoch 346/400
100/100 [==============================] - 0s 121us/step - loss: 0.0337
Epoch 347/400
100/100 [==============================] - 0s 128us/step - loss: 0.0335
Epoch 348/400
100/100 [==============================] - 0s 135us/step - loss: 0.0336
Epoch 349/400
100/100 [==============================] - 0s 123us/step - loss: 0.0335
Epoch 350/400
100/100 [==============================] - 0s 125us/step - loss: 0.0336
Epoch 351/400
100/100 [==============================] - 0s 109us/step - loss: 0.0335
Epoch 352/400
100/100 [==============================] - 0s 103us/step - loss: 0.0335
Epoch 353/400
100/100 [==============================] - 0s 123us/step - loss: 0.0336
Epoch 354/400
100/100 [==============================] - 0s 99us/step - loss: 0.0335
Epoch 355/400
100/100 [==============================] - 0s 121us/step - loss: 0.0336
Epoch 356/400
100/100 [==============================] - 0s 107us/step - loss: 0.0337
Epoch 357/400
100/100 [==============================] - 0s 107us/step - loss: 0.0338
Epoch 358/400
100/100 [==============================] - 0s 120us/step - loss: 0.0339
Epoch 359/400
100/100 [==============================] - 0s 130us/step - loss: 0.0337
Epoch 360/400
100/100 [==============================] - 0s 129us/step - loss: 0.0337
Epoch 361/400
100/100 [==============================] - 0s 134us/step - loss: 0.0337
Epoch 362/400
100/100 [==============================] - 0s 147us/step - loss: 0.0335
Epoch 363/400
100/100 [==============================] - 0s 108us/step - loss: 0.0335
Epoch 364/400
100/100 [==============================] - 0s 99us/step - loss: 0.0335
Epoch 365/400
100/100 [==============================] - 0s 126us/step - loss: 0.0334
Epoch 366/400
100/100 [==============================] - 0s 158us/step - loss: 0.0335
Epoch 367/400
100/100 [==============================] - 0s 127us/step - loss: 0.0335
Epoch 368/400
100/100 [==============================] - 0s 141us/step - loss: 0.0334
Epoch 369/400
100/100 [==============================] - 0s 127us/step - loss: 0.0334
Epoch 370/400
100/100 [==============================] - 0s 122us/step - loss: 0.0334
Epoch 371/400
100/100 [==============================] - 0s 233us/step - loss: 0.0333
Epoch 372/400
100/100 [==============================] - 0s 115us/step - loss: 0.0333
Epoch 373/400
100/100 [==============================] - 0s 169us/step - loss: 0.0333
Epoch 374/400
100/100 [==============================] - 0s 135us/step - loss: 0.0333
Epoch 375/400
100/100 [==============================] - 0s 129us/step - loss: 0.0333
Epoch 376/400
100/100 [==============================] - 0s 136us/step - loss: 0.0334
Epoch 377/400
100/100 [==============================] - 0s 117us/step - loss: 0.0334
Epoch 378/400
100/100 [==============================] - 0s 136us/step - loss: 0.0333
Epoch 379/400
100/100 [==============================] - 0s 136us/step - loss: 0.0333
Epoch 380/400
100/100 [==============================] - 0s 151us/step - loss: 0.0334
Epoch 381/400
100/100 [==============================] - 0s 146us/step - loss: 0.0334
Epoch 382/400
100/100 [==============================] - 0s 151us/step - loss: 0.0334
Epoch 383/400
100/100 [==============================] - 0s 148us/step - loss: 0.0335
Epoch 384/400
100/100 [==============================] - 0s 109us/step - loss: 0.0334
Epoch 385/400
100/100 [==============================] - 0s 110us/step - loss: 0.0336
Epoch 386/400
100/100 [==============================] - 0s 106us/step - loss: 0.0335
Epoch 387/400
100/100 [==============================] - 0s 92us/step - loss: 0.0333
Epoch 388/400
100/100 [==============================] - 0s 108us/step - loss: 0.0333
Epoch 389/400
100/100 [==============================] - 0s 161us/step - loss: 0.0334
Epoch 390/400
100/100 [==============================] - 0s 143us/step - loss: 0.0333
Epoch 391/400
100/100 [==============================] - 0s 223us/step - loss: 0.0333
Epoch 392/400
100/100 [==============================] - 0s 185us/step - loss: 0.0332
Epoch 393/400
100/100 [==============================] - 0s 148us/step - loss: 0.0332
Epoch 394/400
100/100 [==============================] - 0s 122us/step - loss: 0.0332
Epoch 395/400
100/100 [==============================] - 0s 127us/step - loss: 0.0332
Epoch 396/400
100/100 [==============================] - 0s 120us/step - loss: 0.0332
Epoch 397/400
100/100 [==============================] - 0s 129us/step - loss: 0.0332
Epoch 398/400
100/100 [==============================] - 0s 155us/step - loss: 0.0333
Epoch 399/400
100/100 [==============================] - 0s 103us/step - loss: 0.0334
Epoch 400/400
100/100 [==============================] - 0s 161us/step - loss: 0.0334

In [44]:
pca = PCA(n_components=2)
X_embed_pca = pca.fit_transform(X_embed)
plot_words(X_embed_pca, words_selected, 'Embedding with SimEc (after PCA)')
plt.xlabel('1st component');
plt.ylabel('2nd component');
fig = plt.gcf();
fig.set_size_inches(18.5, 10.5);



In [ ]: