Word2Vec (Word Embedding)

Implement Word2Vec algorithm to compute vector representations of words. This example is using a small chunk of Wikipedia articles to train from.

More info: Mikolov, Tomas et al. "Efficient Estimation of Word Representations in Vector Space.", 2013


In [1]:
from __future__ import division, print_function, absolute_import

import collections
import os
import random
import urllib
import zipfile

import numpy as np
import tensorflow as tf

In [2]:
# Training Parameters
learning_rate = 0.1
batch_size = 128
num_steps = 3000000
display_step = 10000
eval_step = 200000

# Evaluation Parameters
eval_words = ['five', 'of', 'going', 'hardware', 'american', 'britain']

# Word2Vec Parameters
embedding_size = 200 # Dimension of the embedding vector
max_vocabulary_size = 50000 # Total number of different words in the vocabulary
min_occurrence = 10 # Remove all words that does not appears at least n times
skip_window = 3 # How many words to consider left and right
num_skips = 2 # How many times to reuse an input to generate a label
num_sampled = 64 # Number of negative examples to sample

In [3]:
# Download a small chunk of Wikipedia articles collection
url = 'http://mattmahoney.net/dc/text8.zip'
data_path = 'text8.zip'
if not os.path.exists(data_path):
    print("Downloading the dataset... (It may take some time)")
    filename, _ = urllib.urlretrieve(url, data_path)
    print("Done!")
# Unzip the dataset file. Text has already been processed
with zipfile.ZipFile(data_path) as f:
    text_words = f.read(f.namelist()[0]).lower().split()


Downloading the dataset... (It may take some time)
Done!

In [4]:
# Build the dictionary and replace rare words with UNK token
count = [('UNK', -1)]
# Retrieve the most common words
count.extend(collections.Counter(text_words).most_common(max_vocabulary_size - 1))
# Remove samples with less than 'min_occurrence' occurrences
for i in range(len(count) - 1, -1, -1):
    if count[i][1] < min_occurrence:
        count.pop(i)
    else:
        # The collection is ordered, so stop when 'min_occurrence' is reached
        break
# Compute the vocabulary size
vocabulary_size = len(count)
# Assign an id to each word
word2id = dict()
for i, (word, _)in enumerate(count):
    word2id[word] = i

data = list()
unk_count = 0
for word in text_words:
    # Retrieve a word id, or assign it index 0 ('UNK') if not in dictionary
    index = word2id.get(word, 0)
    if index == 0:
        unk_count += 1
    data.append(index)
count[0] = ('UNK', unk_count)
id2word = dict(zip(word2id.values(), word2id.keys()))

print("Words count:", len(text_words))
print("Unique words:", len(set(text_words)))
print("Vocabulary size:", vocabulary_size)
print("Most common words:", count[:10])


Words count: 17005207
Unique words: 253854
Vocabulary size: 50000
Most common words: [('UNK', 418391), ('the', 1061396), ('of', 593677), ('and', 416629), ('one', 411764), ('in', 372201), ('a', 325873), ('to', 316376), ('zero', 264975), ('nine', 250430)]

In [5]:
data_index = 0
# Generate training batch for the skip-gram model
def next_batch(batch_size, num_skips, skip_window):
    global data_index
    assert batch_size % num_skips == 0
    assert num_skips <= 2 * skip_window
    batch = np.ndarray(shape=(batch_size), dtype=np.int32)
    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
    # get window size (words left and right + current one)
    span = 2 * skip_window + 1
    buffer = collections.deque(maxlen=span)
    if data_index + span > len(data):
        data_index = 0
    buffer.extend(data[data_index:data_index + span])
    data_index += span
    for i in range(batch_size // num_skips):
        context_words = [w for w in range(span) if w != skip_window]
        words_to_use = random.sample(context_words, num_skips)
        for j, context_word in enumerate(words_to_use):
            batch[i * num_skips + j] = buffer[skip_window]
            labels[i * num_skips + j, 0] = buffer[context_word]
        if data_index == len(data):
            buffer.extend(data[0:span])
            data_index = span
        else:
            buffer.append(data[data_index])
            data_index += 1
    # Backtrack a little bit to avoid skipping words in the end of a batch
    data_index = (data_index + len(data) - span) % len(data)
    return batch, labels

In [6]:
# Input data
X = tf.placeholder(tf.int32, shape=[None])
# Input label
Y = tf.placeholder(tf.int32, shape=[None, 1])

# Ensure the following ops & var are assigned on CPU
# (some ops are not compatible on GPU)
with tf.device('/cpu:0'):
    # Create the embedding variable (each row represent a word embedding vector)
    embedding = tf.Variable(tf.random_normal([vocabulary_size, embedding_size]))
    # Lookup the corresponding embedding vectors for each sample in X
    X_embed = tf.nn.embedding_lookup(embedding, X)

    # Construct the variables for the NCE loss
    nce_weights = tf.Variable(tf.random_normal([vocabulary_size, embedding_size]))
    nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

# Compute the average NCE loss for the batch
loss_op = tf.reduce_mean(
    tf.nn.nce_loss(weights=nce_weights,
                   biases=nce_biases,
                   labels=Y,
                   inputs=X_embed,
                   num_sampled=num_sampled,
                   num_classes=vocabulary_size))

# Define the optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluation
# Compute the cosine similarity between input data embedding and every embedding vectors
X_embed_norm = X_embed / tf.sqrt(tf.reduce_sum(tf.square(X_embed)))
embedding_norm = embedding / tf.sqrt(tf.reduce_sum(tf.square(embedding), 1, keepdims=True))
cosine_sim_op = tf.matmul(X_embed_norm, embedding_norm, transpose_b=True)

In [7]:
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    # Testing data
    x_test = np.array([word2id[w] for w in eval_words])

    average_loss = 0
    for step in xrange(1, num_steps + 1):
        # Get a new batch of data
        batch_x, batch_y = next_batch(batch_size, num_skips, skip_window)
        # Run training op
        _, loss = sess.run([train_op, loss_op], feed_dict={X: batch_x, Y: batch_y})
        average_loss += loss

        if step % display_step == 0 or step == 1:
            if step > 1:
                average_loss /= display_step
            print("Step " + str(step) + ", Average Loss= " + \
                  "{:.4f}".format(average_loss))
            average_loss = 0

        # Evaluation
        if step % eval_step == 0 or step == 1:
            print("Evaluation...")
            sim = sess.run(cosine_sim_op, feed_dict={X: x_test})
            for i in xrange(len(eval_words)):
                top_k = 8  # number of nearest neighbors
                nearest = (-sim[i, :]).argsort()[1:top_k + 1]
                log_str = '"%s" nearest neighbors:' % eval_words[i]
                for k in xrange(top_k):
                    log_str = '%s %s,' % (log_str, id2word[nearest[k]])
                print(log_str)


Step 1, Average Loss= 520.3188
Evaluation...
"five" nearest neighbors: brothers, swinging, dissemination, fruitful, trichloride, dll, timur, torre,
"of" nearest neighbors: malting, vaginal, cecil, xiaoping, arrangers, hydras, exhibits, splits,
"going" nearest neighbors: besht, xps, sdtv, mississippi, frequencies, tora, reciprocating, tursiops,
"hardware" nearest neighbors: burgh, residences, mares, attested, whirlwind, isomerism, admiration, ties,
"american" nearest neighbors: tensile, months, baffling, cricket, kodak, risky, nicomedia, jura,
"britain" nearest neighbors: superstring, interpretations, genealogical, munition, boer, occasional, psychologists, turbofan,
Step 10000, Average Loss= 202.2640
Step 20000, Average Loss= 96.5149
Step 30000, Average Loss= 67.2858
Step 40000, Average Loss= 52.5055
Step 50000, Average Loss= 42.6301
Step 60000, Average Loss= 37.3644
Step 70000, Average Loss= 33.1220
Step 80000, Average Loss= 30.5835
Step 90000, Average Loss= 28.2243
Step 100000, Average Loss= 25.5532
Step 110000, Average Loss= 24.0891
Step 120000, Average Loss= 21.8576
Step 130000, Average Loss= 21.2192
Step 140000, Average Loss= 19.8834
Step 150000, Average Loss= 19.3362
Step 160000, Average Loss= 18.3129
Step 170000, Average Loss= 17.4952
Step 180000, Average Loss= 16.8531
Step 190000, Average Loss= 15.9615
Step 200000, Average Loss= 15.0718
Evaluation...
"five" nearest neighbors: three, four, eight, six, seven, two, nine, one,
"of" nearest neighbors: the, is, a, was, with, in, and, on,
"going" nearest neighbors: time, military, called, with, used, state, most, new,
"hardware" nearest neighbors: deaths, system, three, at, zero, two, s, UNK,
"american" nearest neighbors: UNK, and, s, about, in, when, from, after,
"britain" nearest neighbors: years, were, from, both, of, these, is, many,
Step 210000, Average Loss= 14.9267
Step 220000, Average Loss= 15.4700
Step 230000, Average Loss= 14.0867
Step 240000, Average Loss= 14.5337
Step 250000, Average Loss= 13.2458
Step 260000, Average Loss= 13.2944
Step 270000, Average Loss= 13.0396
Step 280000, Average Loss= 12.1902
Step 290000, Average Loss= 11.7444
Step 300000, Average Loss= 11.8473
Step 310000, Average Loss= 11.1306
Step 320000, Average Loss= 11.1699
Step 330000, Average Loss= 10.8638
Step 340000, Average Loss= 10.7910
Step 350000, Average Loss= 11.0721
Step 360000, Average Loss= 10.6309
Step 370000, Average Loss= 10.4836
Step 380000, Average Loss= 10.3482
Step 390000, Average Loss= 10.0679
Step 400000, Average Loss= 10.0070
Evaluation...
"five" nearest neighbors: four, three, six, seven, eight, two, one, zero,
"of" nearest neighbors: and, in, the, a, for, by, is, while,
"going" nearest neighbors: name, called, made, military, music, people, city, was,
"hardware" nearest neighbors: power, a, john, the, has, see, and, system,
"american" nearest neighbors: s, british, UNK, john, in, during, and, from,
"britain" nearest neighbors: from, general, are, before, first, after, history, was,
Step 410000, Average Loss= 10.1151
Step 420000, Average Loss= 9.5719
Step 430000, Average Loss= 9.8267
Step 440000, Average Loss= 9.4704
Step 450000, Average Loss= 9.5561
Step 460000, Average Loss= 9.1479
Step 470000, Average Loss= 8.8914
Step 480000, Average Loss= 9.0281
Step 490000, Average Loss= 9.3139
Step 500000, Average Loss= 9.1559
Step 510000, Average Loss= 8.8257
Step 520000, Average Loss= 8.9081
Step 530000, Average Loss= 8.8572
Step 540000, Average Loss= 8.5835
Step 550000, Average Loss= 8.4495
Step 560000, Average Loss= 8.4193
Step 570000, Average Loss= 8.3399
Step 580000, Average Loss= 8.1633
Step 590000, Average Loss= 8.2914
Step 600000, Average Loss= 8.0268
Evaluation...
"five" nearest neighbors: three, four, six, two, seven, eight, one, zero,
"of" nearest neighbors: and, the, in, including, with, for, on, or,
"going" nearest neighbors: popular, king, his, music, and, time, name, being,
"hardware" nearest neighbors: power, over, then, than, became, at, less, for,
"american" nearest neighbors: english, s, german, in, french, since, john, between,
"britain" nearest neighbors: however, were, state, first, group, general, from, second,
Step 610000, Average Loss= 8.1733
Step 620000, Average Loss= 8.2522
Step 630000, Average Loss= 8.0434
Step 640000, Average Loss= 8.0930
Step 650000, Average Loss= 7.8770
Step 660000, Average Loss= 7.9221
Step 670000, Average Loss= 7.7645
Step 680000, Average Loss= 7.9534
Step 690000, Average Loss= 7.7507
Step 700000, Average Loss= 7.7499
Step 710000, Average Loss= 7.6629
Step 720000, Average Loss= 7.6055
Step 730000, Average Loss= 7.4779
Step 740000, Average Loss= 7.3182
Step 750000, Average Loss= 7.6399
Step 760000, Average Loss= 7.4364
Step 770000, Average Loss= 7.6509
Step 780000, Average Loss= 7.3204
Step 790000, Average Loss= 7.4101
Step 800000, Average Loss= 7.4354
Evaluation...
"five" nearest neighbors: three, four, six, seven, eight, two, one, nine,
"of" nearest neighbors: and, the, its, a, with, at, in, for,
"going" nearest neighbors: were, man, music, now, great, support, popular, her,
"hardware" nearest neighbors: power, system, then, military, high, against, since, international,
"american" nearest neighbors: english, british, born, b, john, french, d, german,
"britain" nearest neighbors: government, second, before, from, state, several, the, at,
Step 810000, Average Loss= 7.2603
Step 820000, Average Loss= 7.1646
Step 830000, Average Loss= 7.3155
Step 840000, Average Loss= 7.1274
Step 850000, Average Loss= 7.1237
Step 860000, Average Loss= 7.1528
Step 870000, Average Loss= 7.0673
Step 880000, Average Loss= 7.2167
Step 890000, Average Loss= 7.1359
Step 900000, Average Loss= 7.0940
Step 910000, Average Loss= 7.1114
Step 920000, Average Loss= 6.9328
Step 930000, Average Loss= 7.0108
Step 940000, Average Loss= 7.0630
Step 950000, Average Loss= 6.8371
Step 960000, Average Loss= 7.0466
Step 970000, Average Loss= 6.8331
Step 980000, Average Loss= 6.9670
Step 990000, Average Loss= 6.7357
Step 1000000, Average Loss= 6.6453
Evaluation...
"five" nearest neighbors: four, three, six, eight, seven, two, nine, zero,
"of" nearest neighbors: the, became, including, first, second, from, following, and,
"going" nearest neighbors: near, music, popular, made, while, his, works, most,
"hardware" nearest neighbors: power, system, before, its, using, for, thus, an,
"american" nearest neighbors: b, born, d, UNK, nine, john, english, seven,
"britain" nearest neighbors: of, following, government, home, from, state, end, several,
Step 1010000, Average Loss= 6.7193
Step 1020000, Average Loss= 6.9297
Step 1030000, Average Loss= 6.7905
Step 1040000, Average Loss= 6.7709
Step 1050000, Average Loss= 6.7337
Step 1060000, Average Loss= 6.7617
Step 1070000, Average Loss= 6.7489
Step 1080000, Average Loss= 6.6259
Step 1090000, Average Loss= 6.6415
Step 1100000, Average Loss= 6.7209
Step 1110000, Average Loss= 6.5471
Step 1120000, Average Loss= 6.6508
Step 1130000, Average Loss= 6.5184
Step 1140000, Average Loss= 6.6202
Step 1150000, Average Loss= 6.7205
Step 1160000, Average Loss= 6.5821
Step 1170000, Average Loss= 6.6200
Step 1180000, Average Loss= 6.5089
Step 1190000, Average Loss= 6.5587
Step 1200000, Average Loss= 6.4930
Evaluation...
"five" nearest neighbors: three, four, six, seven, eight, two, nine, zero,
"of" nearest neighbors: the, and, including, in, first, with, following, from,
"going" nearest neighbors: near, popular, works, today, large, now, when, both,
"hardware" nearest neighbors: power, system, computer, its, both, for, using, which,
"american" nearest neighbors: born, d, john, german, b, UNK, english, s,
"britain" nearest neighbors: state, following, government, home, became, people, were, the,
Step 1210000, Average Loss= 6.5985
Step 1220000, Average Loss= 6.4534
Step 1230000, Average Loss= 6.5083
Step 1240000, Average Loss= 6.4913
Step 1250000, Average Loss= 6.4326
Step 1260000, Average Loss= 6.3891
Step 1270000, Average Loss= 6.1601
Step 1280000, Average Loss= 6.4479
Step 1290000, Average Loss= 6.3813
Step 1300000, Average Loss= 6.5335
Step 1310000, Average Loss= 6.2971
Step 1320000, Average Loss= 6.3723
Step 1330000, Average Loss= 6.4234
Step 1340000, Average Loss= 6.3130
Step 1350000, Average Loss= 6.2867
Step 1360000, Average Loss= 6.3505
Step 1370000, Average Loss= 6.2990
Step 1380000, Average Loss= 6.3012
Step 1390000, Average Loss= 6.3112
Step 1400000, Average Loss= 6.2680
Evaluation...
"five" nearest neighbors: four, three, six, two, seven, eight, one, zero,
"of" nearest neighbors: the, its, and, including, in, with, see, for,
"going" nearest neighbors: near, great, like, today, began, called, an, another,
"hardware" nearest neighbors: power, computer, system, for, program, high, control, small,
"american" nearest neighbors: english, german, french, born, john, british, s, references,
"britain" nearest neighbors: state, great, government, people, following, became, along, home,
Step 1410000, Average Loss= 6.3157
Step 1420000, Average Loss= 6.3466
Step 1430000, Average Loss= 6.3090
Step 1440000, Average Loss= 6.3330
Step 1450000, Average Loss= 6.2072
Step 1460000, Average Loss= 6.2363
Step 1470000, Average Loss= 6.2736
Step 1480000, Average Loss= 6.1793
Step 1490000, Average Loss= 6.2977
Step 1500000, Average Loss= 6.1899
Step 1510000, Average Loss= 6.2381
Step 1520000, Average Loss= 6.1027
Step 1530000, Average Loss= 6.0046
Step 1540000, Average Loss= 6.0747
Step 1550000, Average Loss= 6.2524
Step 1560000, Average Loss= 6.1247
Step 1570000, Average Loss= 6.1937
Step 1580000, Average Loss= 6.0450
Step 1590000, Average Loss= 6.1556
Step 1600000, Average Loss= 6.1765
Evaluation...
"five" nearest neighbors: three, four, six, two, seven, eight, one, zero,
"of" nearest neighbors: the, and, its, for, from, modern, in, part,
"going" nearest neighbors: great, today, once, now, while, her, like, by,
"hardware" nearest neighbors: power, system, high, program, control, computer, typically, making,
"american" nearest neighbors: born, english, british, german, john, french, b, d,
"britain" nearest neighbors: country, state, home, government, first, following, during, from,
Step 1610000, Average Loss= 6.1029
Step 1620000, Average Loss= 6.0501
Step 1630000, Average Loss= 6.1536
Step 1640000, Average Loss= 6.0483
Step 1650000, Average Loss= 6.1197
Step 1660000, Average Loss= 6.0261
Step 1670000, Average Loss= 6.1012
Step 1680000, Average Loss= 6.1795
Step 1690000, Average Loss= 6.1224
Step 1700000, Average Loss= 6.0896
Step 1710000, Average Loss= 6.0418
Step 1720000, Average Loss= 6.0626
Step 1730000, Average Loss= 6.0214
Step 1740000, Average Loss= 6.1206
Step 1750000, Average Loss= 5.9721
Step 1760000, Average Loss= 6.0782
Step 1770000, Average Loss= 6.0291
Step 1780000, Average Loss= 6.0187
Step 1790000, Average Loss= 5.9761
Step 1800000, Average Loss= 5.7518
Evaluation...
"five" nearest neighbors: four, three, six, seven, eight, nine, two, zero,
"of" nearest neighbors: the, from, in, became, and, second, first, including,
"going" nearest neighbors: today, which, once, little, made, before, now, etc,
"hardware" nearest neighbors: computer, power, program, system, high, typically, current, eventually,
"american" nearest neighbors: b, d, born, actor, UNK, robert, william, english,
"britain" nearest neighbors: government, state, country, from, world, great, of, in,
Step 1810000, Average Loss= 5.9839
Step 1820000, Average Loss= 5.9931
Step 1830000, Average Loss= 6.0794
Step 1840000, Average Loss= 5.9072
Step 1850000, Average Loss= 5.9831
Step 1860000, Average Loss= 6.0023
Step 1870000, Average Loss= 5.9375
Step 1880000, Average Loss= 5.9250
Step 1890000, Average Loss= 5.9422
Step 1900000, Average Loss= 5.9339
Step 1910000, Average Loss= 5.9235
Step 1920000, Average Loss= 5.9692
Step 1930000, Average Loss= 5.9022
Step 1940000, Average Loss= 5.9599
Step 1950000, Average Loss= 6.0174
Step 1960000, Average Loss= 5.9530
Step 1970000, Average Loss= 5.9479
Step 1980000, Average Loss= 5.8870
Step 1990000, Average Loss= 5.9271
Step 2000000, Average Loss= 5.8774
Evaluation...
"five" nearest neighbors: four, three, six, seven, eight, two, nine, zero,
"of" nearest neighbors: and, the, from, in, within, first, including, with,
"going" nearest neighbors: today, before, another, little, work, etc, now, him,
"hardware" nearest neighbors: computer, program, system, both, making, designed, power, simple,
"american" nearest neighbors: actor, born, d, robert, john, b, german, writer,
"britain" nearest neighbors: government, state, following, great, england, became, country, from,
Step 2010000, Average Loss= 5.9373
Step 2020000, Average Loss= 5.9113
Step 2030000, Average Loss= 5.9158
Step 2040000, Average Loss= 5.9020
Step 2050000, Average Loss= 5.8608
Step 2060000, Average Loss= 5.7379
Step 2070000, Average Loss= 5.7143
Step 2080000, Average Loss= 5.9379
Step 2090000, Average Loss= 5.8201
Step 2100000, Average Loss= 5.9390
Step 2110000, Average Loss= 5.7295
Step 2120000, Average Loss= 5.8290
Step 2130000, Average Loss= 5.9042
Step 2140000, Average Loss= 5.8367
Step 2150000, Average Loss= 5.7760
Step 2160000, Average Loss= 5.8664
Step 2170000, Average Loss= 5.7974
Step 2180000, Average Loss= 5.8523
Step 2190000, Average Loss= 5.8047
Step 2200000, Average Loss= 5.8172
Evaluation...
"five" nearest neighbors: three, four, six, eight, two, seven, one, zero,
"of" nearest neighbors: the, with, group, in, its, and, from, including,
"going" nearest neighbors: produced, when, today, while, little, before, had, like,
"hardware" nearest neighbors: computer, system, power, technology, program, simple, for, designed,
"american" nearest neighbors: english, canadian, german, french, author, british, film, born,
"britain" nearest neighbors: government, great, state, established, british, england, country, army,
Step 2210000, Average Loss= 5.8847
Step 2220000, Average Loss= 5.8622
Step 2230000, Average Loss= 5.8295
Step 2240000, Average Loss= 5.8484
Step 2250000, Average Loss= 5.7917
Step 2260000, Average Loss= 5.7846
Step 2270000, Average Loss= 5.8307
Step 2280000, Average Loss= 5.7341
Step 2290000, Average Loss= 5.8519
Step 2300000, Average Loss= 5.7792
Step 2310000, Average Loss= 5.8277
Step 2320000, Average Loss= 5.7196
Step 2330000, Average Loss= 5.5469
Step 2340000, Average Loss= 5.7177
Step 2350000, Average Loss= 5.8139
Step 2360000, Average Loss= 5.7849
Step 2370000, Average Loss= 5.7022
Step 2380000, Average Loss= 5.7447
Step 2390000, Average Loss= 5.7667
Step 2400000, Average Loss= 5.7625
Evaluation...
"five" nearest neighbors: three, four, six, seven, two, eight, zero, nine,
"of" nearest neighbors: the, and, from, part, in, following, within, including,
"going" nearest neighbors: where, once, little, now, again, while, off, produced,
"hardware" nearest neighbors: system, computer, high, power, using, designed, systems, simple,
"american" nearest neighbors: author, actor, english, born, writer, british, b, d,
"britain" nearest neighbors: great, established, government, england, country, state, army, former,
Step 2410000, Average Loss= 5.6953
Step 2420000, Average Loss= 5.7413
Step 2430000, Average Loss= 5.7242
Step 2440000, Average Loss= 5.7397
Step 2450000, Average Loss= 5.7755
Step 2460000, Average Loss= 5.6881
Step 2470000, Average Loss= 5.7471
Step 2480000, Average Loss= 5.8159
Step 2490000, Average Loss= 5.7452
Step 2500000, Average Loss= 5.7547
Step 2510000, Average Loss= 5.6945
Step 2520000, Average Loss= 5.7318
Step 2530000, Average Loss= 5.6682
Step 2540000, Average Loss= 5.7660
Step 2550000, Average Loss= 5.6956
Step 2560000, Average Loss= 5.7307
Step 2570000, Average Loss= 5.7015
Step 2580000, Average Loss= 5.6932
Step 2590000, Average Loss= 5.6386
Step 2600000, Average Loss= 5.4734
Evaluation...
"five" nearest neighbors: four, three, six, seven, eight, nine, two, zero,
"of" nearest neighbors: the, and, in, from, became, including, for, with,
"going" nearest neighbors: little, again, just, a, now, where, to, for,
"hardware" nearest neighbors: computer, program, system, software, designed, systems, technology, current,
"american" nearest neighbors: actor, d, writer, b, born, singer, author, robert,
"britain" nearest neighbors: great, established, government, england, country, in, from, state,
Step 2610000, Average Loss= 5.7291
Step 2620000, Average Loss= 5.6412
Step 2630000, Average Loss= 5.7485
Step 2640000, Average Loss= 5.5833
Step 2650000, Average Loss= 5.6548
Step 2660000, Average Loss= 5.7159
Step 2670000, Average Loss= 5.6569
Step 2680000, Average Loss= 5.6080
Step 2690000, Average Loss= 5.7037
Step 2700000, Average Loss= 5.6360
Step 2710000, Average Loss= 5.6707
Step 2720000, Average Loss= 5.6811
Step 2730000, Average Loss= 5.6237
Step 2740000, Average Loss= 5.7050
Step 2750000, Average Loss= 5.6991
Step 2760000, Average Loss= 5.6691
Step 2770000, Average Loss= 5.7057
Step 2780000, Average Loss= 5.6162
Step 2790000, Average Loss= 5.6484
Step 2800000, Average Loss= 5.6627
Evaluation...
"five" nearest neighbors: four, six, three, seven, eight, nine, two, one,
"of" nearest neighbors: the, in, following, including, part, and, from, under,
"going" nearest neighbors: again, before, little, away, once, when, eventually, then,
"hardware" nearest neighbors: computer, system, software, program, systems, designed, for, design,
"american" nearest neighbors: actor, writer, singer, author, born, robert, d, john,
"britain" nearest neighbors: established, england, great, government, france, army, the, throughout,
Step 2810000, Average Loss= 5.5900
Step 2820000, Average Loss= 5.7053
Step 2830000, Average Loss= 5.6064
Step 2840000, Average Loss= 5.6891
Step 2850000, Average Loss= 5.5571
Step 2860000, Average Loss= 5.4490
Step 2870000, Average Loss= 5.5428
Step 2880000, Average Loss= 5.6832
Step 2890000, Average Loss= 5.5973
Step 2900000, Average Loss= 5.5816
Step 2910000, Average Loss= 5.5647
Step 2920000, Average Loss= 5.6001
Step 2930000, Average Loss= 5.6459
Step 2940000, Average Loss= 5.5622
Step 2950000, Average Loss= 5.5707
Step 2960000, Average Loss= 5.6492
Step 2970000, Average Loss= 5.5633
Step 2980000, Average Loss= 5.6323
Step 2990000, Average Loss= 5.5440
Step 3000000, Average Loss= 5.6209
Evaluation...
"five" nearest neighbors: four, three, six, eight, seven, two, zero, one,
"of" nearest neighbors: the, in, and, including, group, includes, part, from,
"going" nearest neighbors: once, again, when, quickly, before, eventually, little, had,
"hardware" nearest neighbors: computer, system, software, designed, program, simple, systems, sound,
"american" nearest neighbors: canadian, english, author, german, french, british, irish, australian,
"britain" nearest neighbors: established, england, great, government, throughout, france, british, northern,