In [1]:
import numpy as np
import theano
import six.moves.cPickle
import os, re, json
from keras.preprocessing import sequence, text
from keras.optimizers import SGD, RMSprop, Adagrad
from keras.utils import np_utils, generic_utils
from keras.models import Sequential
from keras.layers.embeddings import WordContextProduct, Embedding
from six.moves import range
from six.moves import zip
Using gpu device 0: GRID K520
In [2]:
max_features = 50000 # vocabulary size: top 50,000 most common words in data
skip_top = 100 # ignore top 100 most common words
nb_epoch = 1
dim_proj = 256 # embedding space dimension
save = True
load_model = False
load_tokenizer = True
train_model = True
save_dir = os.path.expanduser("~/.keras/models")
model_load_fname = "HN_skipgram_model.pkl"
model_save_fname = "HN_skipgram_model.pkl"
tokenizer_fname = "HN_tokenizer.pkl"
data_path = os.path.expanduser("~/")+"HNCommentsAll.1perline.json"
In [3]:
# text preprocessing utils
html_tags = re.compile(r'<.*?>')
to_replace = [(''', "'")]
hex_tags = re.compile(r'&.*?;')
def clean_comment(comment):
c = str(comment.encode("utf-8"))
c = html_tags.sub(' ', c)
for tag, char in to_replace:
c = c.replace(tag, char)
c = hex_tags.sub(' ', c)
return c
def text_generator(path=data_path):
f = open(path)
for i, l in enumerate(f):
comment_data = json.loads(l)
comment_text = comment_data["comment_text"]
comment_text = clean_comment(comment_text)
if i % 10000 == 0:
print(i)
yield comment_text
f.close()
In [5]:
# model management
if load_tokenizer:
print('Load tokenizer...')
tokenizer = six.moves.cPickle.load(open(os.path.join(save_dir, tokenizer_fname), 'rb'))
else:
print("Fit tokenizer...")
tokenizer = text.Tokenizer(nb_words=max_features)
tokenizer.fit_on_texts(text_generator())
if save:
print("Save tokenizer...")
if not os.path.exists(save_dir):
os.makedirs(save_dir)
six.moves.cPickle.dump(tokenizer, open(os.path.join(save_dir, tokenizer_fname), "wb"))
Fit tokenizer...
0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
210000
220000
230000
240000
250000
260000
270000
280000
290000
300000
310000
320000
330000
340000
350000
360000
370000
380000
390000
400000
410000
420000
430000
440000
450000
460000
470000
480000
490000
500000
510000
520000
530000
540000
550000
560000
570000
580000
590000
600000
610000
620000
630000
640000
650000
660000
670000
680000
690000
700000
710000
720000
730000
740000
750000
760000
770000
780000
790000
800000
810000
820000
830000
840000
850000
860000
870000
880000
890000
900000
910000
920000
930000
940000
950000
960000
970000
980000
990000
1000000
1010000
1020000
1030000
1040000
1050000
1060000
1070000
1080000
1090000
1100000
1110000
1120000
1130000
1140000
1150000
1160000
1170000
1180000
1190000
1200000
1210000
1220000
1230000
1240000
1250000
1260000
1270000
1280000
1290000
1300000
1310000
1320000
1330000
1340000
1350000
1360000
1370000
1380000
1390000
1400000
1410000
1420000
1430000
1440000
1450000
1460000
1470000
1480000
1490000
1500000
1510000
1520000
1530000
1540000
1550000
1560000
1570000
1580000
1590000
1600000
1610000
1620000
1630000
1640000
1650000
1660000
1670000
1680000
1690000
1700000
1710000
1720000
1730000
1740000
1750000
1760000
1770000
1780000
1790000
1800000
1810000
1820000
1830000
1840000
1850000
1860000
1870000
1880000
1890000
1900000
1910000
1920000
1930000
1940000
1950000
1960000
1970000
1980000
1990000
2000000
2010000
2020000
2030000
2040000
2050000
2060000
2070000
2080000
2090000
2100000
2110000
2120000
2130000
2140000
2150000
2160000
2170000
2180000
2190000
2200000
2210000
2220000
2230000
2240000
2250000
2260000
2270000
2280000
2290000
2300000
2310000
2320000
2330000
2340000
2350000
2360000
2370000
2380000
2390000
2400000
2410000
2420000
2430000
2440000
2450000
2460000
2470000
2480000
2490000
2500000
2510000
2520000
2530000
2540000
2550000
2560000
2570000
2580000
2590000
2600000
2610000
2620000
2630000
2640000
2650000
2660000
2670000
2680000
2690000
2700000
2710000
2720000
2730000
2740000
2750000
2760000
2770000
2780000
2790000
2800000
2810000
2820000
2830000
2840000
2850000
2860000
2870000
2880000
2890000
2900000
2910000
2920000
2930000
2940000
2950000
2960000
2970000
2980000
2990000
3000000
3010000
3020000
3030000
3040000
3050000
3060000
3070000
3080000
3090000
3100000
3110000
3120000
3130000
3140000
3150000
3160000
3170000
3180000
3190000
3200000
3210000
3220000
3230000
3240000
3250000
3260000
3270000
3280000
3290000
3300000
3310000
3320000
3330000
3340000
3350000
3360000
3370000
3380000
3390000
3400000
3410000
3420000
3430000
3440000
3450000
3460000
3470000
3480000
3490000
3500000
3510000
3520000
3530000
3540000
3550000
3560000
3570000
3580000
3590000
3600000
3610000
3620000
3630000
3640000
3650000
3660000
3670000
3680000
3690000
3700000
3710000
3720000
3730000
3740000
3750000
3760000
3770000
3780000
3790000
3800000
3810000
3820000
3830000
3840000
3850000
3860000
3870000
3880000
3890000
3900000
3910000
3920000
3930000
3940000
3950000
3960000
3970000
3980000
3990000
4000000
4010000
4020000
4030000
4040000
4050000
4060000
4070000
4080000
4090000
4100000
4110000
4120000
4130000
4140000
4150000
4160000
4170000
4180000
4190000
4200000
4210000
4220000
4230000
4240000
4250000
4260000
4270000
4280000
4290000
4300000
4310000
4320000
4330000
4340000
4350000
4360000
4370000
4380000
4390000
4400000
4410000
4420000
4430000
4440000
4450000
4460000
4470000
4480000
4490000
4500000
4510000
4520000
4530000
4540000
4550000
4560000
4570000
4580000
4590000
4600000
4610000
4620000
4630000
4640000
4650000
4660000
4670000
4680000
4690000
4700000
4710000
4720000
4730000
4740000
4750000
4760000
4770000
4780000
4790000
4800000
4810000
4820000
4830000
4840000
4850000
4860000
4870000
4880000
4890000
4900000
4910000
4920000
4930000
4940000
4950000
4960000
4970000
4980000
4990000
5000000
5010000
5020000
5030000
5040000
5050000
5060000
5070000
5080000
5090000
5100000
5110000
5120000
5130000
5140000
5150000
5160000
5170000
5180000
5190000
5200000
5210000
5220000
5230000
5240000
5250000
5260000
5270000
5280000
5290000
5300000
5310000
5320000
5330000
5340000
5350000
5360000
5370000
5380000
5390000
5400000
5410000
5420000
5430000
5440000
5450000
5460000
5470000
5480000
5490000
5500000
5510000
5520000
5530000
5540000
5550000
5560000
5570000
5580000
5590000
5600000
5610000
5620000
5630000
5640000
5650000
5660000
5670000
5680000
5690000
5700000
5710000
5720000
5730000
5740000
5750000
5760000
5770000
5780000
5790000
5800000
5810000
5820000
5830000
5840000
Save tokenizer...
In [8]:
ls ~/.keras/models
HN_tokenizer.pkl
In [4]:
tokenizer = six.moves.cPickle.load(open(os.path.join(save_dir, tokenizer_fname), 'rb'))
In [5]:
tokenizer.document_count
Out[5]:
5845908
In [6]:
tokenizer.filters
Out[6]:
'!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n'
In [12]:
for key in tokenizer.word_counts.keys():
print key
print tokenizer.word_counts[key]
break
ftdna
5
In [13]:
for key in tokenizer.word_index.keys():
print key
print tokenizer.word_index[key]
break
ftdna
197942
In [14]:
for key in tokenizer.word_docs.keys():
print key
print tokenizer.word_docs[key]
break
ftdna
5
In [15]:
sampling_table = sequence.make_sampling_table(max_features)
for i, seq in enumerate(tokenizer.texts_to_sequences_generator(text_generator())):
print i
print seq
print
couples, labels = sequence.skipgrams(seq, max_features, window_size=4, negative_samples=1., sampling_table=sampling_table)
print couples
print labels
break
0
0
[67, 11, 44, 20, 2, 2087, 13, 3, 943, 11399, 2, 587, 1, 140, 11597, 46, 1, 81, 831, 56, 2, 880, 12, 10814, 1197, 22, 11, 91, 2, 277, 53, 9, 839, 55, 61, 5, 41, 10]
[[10814, 9256], [11399, 2], [11399, 13003], [10814, 91], [10814, 880], [943, 1], [91, 48182], [11399, 943], [11399, 13], [943, 4838], [91, 35764], [11399, 2087], [10814, 37241], [11597, 46], [91, 45176], [91, 10814], [943, 49865], [11399, 140], [91, 2], [91, 10042], [943, 13], [11399, 33764], [11597, 27125], [91, 4077], [943, 2], [943, 26688], [11399, 1], [11597, 1], [10814, 28034], [91, 53], [10814, 56], [10814, 22], [10814, 40207], [10814, 12], [10814, 31487], [11597, 20483], [11597, 587], [11399, 587], [11399, 44736], [11597, 1], [943, 2], [11597, 29849], [91, 48270], [11597, 25261], [10814, 11], [91, 44786], [943, 11756], [943, 3], [11399, 36687], [11597, 35004], [91, 1197], [943, 11399], [943, 587], [11597, 6931], [943, 28406], [10814, 45564], [10814, 118], [11399, 13712], [91, 277], [11399, 39915], [943, 29603], [91, 22], [943, 2087], [943, 21037], [91, 11], [91, 1821], [11399, 3], [11597, 41391], [11399, 43550], [11597, 140], [10814, 1197], [91, 9], [10814, 18400], [11597, 19987], [11597, 2], [11597, 81], [943, 48595], [11399, 34314], [11597, 831], [10814, 2]]
[0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]
In [17]:
# training process
if train_model:
if load_model:
print('Load model...')
model = six.moves.cPickle.load(open(os.path.join(save_dir, model_load_fname), 'rb'))
else:
print('Build model...')
model = Sequential()
model.add(WordContextProduct(max_features, proj_dim=dim_proj, init="uniform"))
model.compile(loss='mse', optimizer='rmsprop')
sampling_table = sequence.make_sampling_table(max_features)
for e in range(nb_epoch):
print('-'*40)
print('Epoch', e)
print('-'*40)
progbar = generic_utils.Progbar(tokenizer.document_count)
samples_seen = 0
losses = []
for i, seq in enumerate(tokenizer.texts_to_sequences_generator(text_generator())):
# get skipgram couples for one text in the dataset
couples, labels = sequence.skipgrams(seq, max_features, window_size=4, negative_samples=1., sampling_table=sampling_table)
if couples:
# one gradient update per sentence (one sentence = a few 1000s of word couples)
X = np.array(couples, dtype="int32")
loss = model.train(X, labels)
losses.append(loss)
if len(losses) % 100 == 0:
progbar.update(i, values=[("loss", np.mean(losses))])
losses = []
samples_seen += len(labels)
print('Samples seen:', samples_seen)
print("Training completed!")
if save:
print("Saving model...")
if not os.path.exists(save_dir):
os.makedirs(save_dir)
six.moves.cPickle.dump(model, open(os.path.join(save_dir, model_save_fname), "wb"))
Build model...
----------------------------------------
('Epoch', 0)
----------------------------------------
0
9955/5845908 [..............................] - ETA: 86679s - loss: 0.249910000
19969/5845908 [..............................] - ETA: 86977s - loss: 0.249520000
29957/5845908 [..............................] - ETA: 87000s - loss: 0.247530000
39894/5845908 [..............................] - ETA: 86817s - loss: 0.243540000
49917/5845908 [..............................] - ETA: 86646s - loss: 0.238650000
59958/5845908 [..............................] - ETA: 86460s - loss: 0.233560000
69969/5845908 [..............................] - ETA: 86289s - loss: 0.228870000
79918/5845908 [..............................] - ETA: 86136s - loss: 0.224480000
89986/5845908 [..............................] - ETA: 85902s - loss: 0.220390000
99908/5845908 [..............................] - ETA: 85736s - loss: 0.2165100000
109939/5845908 [..............................] - ETA: 85614s - loss: 0.2132110000
119950/5845908 [..............................] - ETA: 85540s - loss: 0.2100120000
129971/5845908 [..............................] - ETA: 85385s - loss: 0.2071130000
139944/5845908 [..............................] - ETA: 85201s - loss: 0.2043140000
149999/5845908 [..............................] - ETA: 85025s - loss: 0.2017150000
159922/5845908 [..............................] - ETA: 84869s - loss: 0.1993160000
169940/5845908 [..............................] - ETA: 84751s - loss: 0.1970170000
179957/5845908 [..............................] - ETA: 84596s - loss: 0.1948180000
189986/5845908 [..............................] - ETA: 84435s - loss: 0.1926190000
199916/5845908 [>.............................] - ETA: 84293s - loss: 0.1906200000
209956/5845908 [>.............................] - ETA: 84110s - loss: 0.1886210000
219927/5845908 [>.............................] - ETA: 83926s - loss: 0.1866220000
229971/5845908 [>.............................] - ETA: 83776s - loss: 0.1848230000
239972/5845908 [>.............................] - ETA: 83644s - loss: 0.1829240000
249930/5845908 [>.............................] - ETA: 83481s - loss: 0.1811250000
259946/5845908 [>.............................] - ETA: 83352s - loss: 0.1794260000
269962/5845908 [>.............................] - ETA: 83205s - loss: 0.1776270000
279927/5845908 [>.............................] - ETA: 83031s - loss: 0.1760280000
289985/5845908 [>.............................] - ETA: 82858s - loss: 0.1743290000
299907/5845908 [>.............................] - ETA: 82716s - loss: 0.1727300000
309999/5845908 [>.............................] - ETA: 82531s - loss: 0.1711310000
319993/5845908 [>.............................] - ETA: 82355s - loss: 0.1696320000
329940/5845908 [>.............................] - ETA: 82211s - loss: 0.1682330000
339985/5845908 [>.............................] - ETA: 82053s - loss: 0.1667340000
349973/5845908 [>.............................] - ETA: 81882s - loss: 0.1653350000
359985/5845908 [>.............................] - ETA: 81700s - loss: 0.1639360000
369966/5845908 [>.............................] - ETA: 81540s - loss: 0.1626370000
379954/5845908 [>.............................] - ETA: 81369s - loss: 0.1612380000
389982/5845908 [=>............................] - ETA: 81226s - loss: 0.1599390000
399935/5845908 [=>............................] - ETA: 81085s - loss: 0.1587400000
409942/5845908 [=>............................] - ETA: 80911s - loss: 0.1574410000
419984/5845908 [=>............................] - ETA: 80727s - loss: 0.1562420000
429940/5845908 [=>............................] - ETA: 80568s - loss: 0.1550430000
439948/5845908 [=>............................] - ETA: 80394s - loss: 0.1538440000
449954/5845908 [=>............................] - ETA: 80223s - loss: 0.1526450000
459944/5845908 [=>............................] - ETA: 80067s - loss: 0.1515460000
469950/5845908 [=>............................] - ETA: 79909s - loss: 0.1504470000
479944/5845908 [=>............................] - ETA: 79751s - loss: 0.1493480000
489945/5845908 [=>............................] - ETA: 79588s - loss: 0.1482490000
499928/5845908 [=>............................] - ETA: 79437s - loss: 0.1472500000
509937/5845908 [=>............................] - ETA: 79274s - loss: 0.1462510000
519894/5845908 [=>............................] - ETA: 79118s - loss: 0.1452520000
529940/5845908 [=>............................] - ETA: 78975s - loss: 0.1442530000
539931/5845908 [=>............................] - ETA: 78816s - loss: 0.1432540000
549905/5845908 [=>............................] - ETA: 78660s - loss: 0.1423550000
559965/5845908 [=>............................] - ETA: 78511s - loss: 0.1414560000
569975/5845908 [=>............................] - ETA: 78350s - loss: 0.1405570000
579954/5845908 [=>............................] - ETA: 78198s - loss: 0.1396580000
589898/5845908 [==>...........................] - ETA: 78052s - loss: 0.1388590000
599979/5845908 [==>...........................] - ETA: 77901s - loss: 0.1380600000
609925/5845908 [==>...........................] - ETA: 77752s - loss: 0.1372610000
619992/5845908 [==>...........................] - ETA: 77597s - loss: 0.1364620000
629927/5845908 [==>...........................] - ETA: 77454s - loss: 0.1356630000
639992/5845908 [==>...........................] - ETA: 77302s - loss: 0.1348640000
649910/5845908 [==>...........................] - ETA: 77162s - loss: 0.1341650000
659928/5845908 [==>...........................] - ETA: 77021s - loss: 0.1334660000
669957/5845908 [==>...........................] - ETA: 76885s - loss: 0.1327670000
679972/5845908 [==>...........................] - ETA: 76750s - loss: 0.1320680000
689950/5845908 [==>...........................] - ETA: 76603s - loss: 0.1313690000
699983/5845908 [==>...........................] - ETA: 76459s - loss: 0.1306700000
709966/5845908 [==>...........................] - ETA: 76309s - loss: 0.1300710000
719902/5845908 [==>...........................] - ETA: 76161s - loss: 0.1293720000
729916/5845908 [==>...........................] - ETA: 76018s - loss: 0.1287730000
739905/5845908 [==>...........................] - ETA: 75869s - loss: 0.1281740000
749983/5845908 [==>...........................] - ETA: 75713s - loss: 0.1275750000
759961/5845908 [==>...........................] - ETA: 75560s - loss: 0.1269760000
769909/5845908 [==>...........................] - ETA: 75417s - loss: 0.1263770000
779976/5845908 [===>..........................] - ETA: 75268s - loss: 0.1258780000
789990/5845908 [===>..........................] - ETA: 75111s - loss: 0.1252790000
799941/5845908 [===>..........................] - ETA: 74962s - loss: 0.1247800000
809932/5845908 [===>..........................] - ETA: 74814s - loss: 0.1241810000
819908/5845908 [===>..........................] - ETA: 74661s - loss: 0.1236820000
829953/5845908 [===>..........................] - ETA: 74510s - loss: 0.1230830000
839891/5845908 [===>..........................] - ETA: 74364s - loss: 0.1225840000
849907/5845908 [===>..........................] - ETA: 74201s - loss: 0.1220850000
859932/5845908 [===>..........................] - ETA: 74040s - loss: 0.1215860000
869945/5845908 [===>..........................] - ETA: 73885s - loss: 0.1210870000
879935/5845908 [===>..........................] - ETA: 73728s - loss: 0.1205880000
889941/5845908 [===>..........................] - ETA: 73573s - loss: 0.1200890000
899988/5845908 [===>..........................] - ETA: 73424s - loss: 0.1195900000
909965/5845908 [===>..........................] - ETA: 73273s - loss: 0.1191910000
919933/5845908 [===>..........................] - ETA: 73122s - loss: 0.1186920000
929971/5845908 [===>..........................] - ETA: 72962s - loss: 0.1181930000
939956/5845908 [===>..........................] - ETA: 72813s - loss: 0.1177940000
949958/5845908 [===>..........................] - ETA: 72662s - loss: 0.1173950000
959950/5845908 [===>..........................] - ETA: 72510s - loss: 0.1169960000
969997/5845908 [===>..........................] - ETA: 72363s - loss: 0.1165970000
979957/5845908 [====>.........................] - ETA: 72213s - loss: 0.1161980000
989921/5845908 [====>.........................] - ETA: 72064s - loss: 0.1157990000
999906/5845908 [====>.........................] - ETA: 71911s - loss: 0.11521000000
1009944/5845908 [====>.........................] - ETA: 71757s - loss: 0.11491010000
1019976/5845908 [====>.........................] - ETA: 71612s - loss: 0.11451020000
1029970/5845908 [====>.........................] - ETA: 71462s - loss: 0.11411030000
1039916/5845908 [====>.........................] - ETA: 71317s - loss: 0.11371040000
1049989/5845908 [====>.........................] - ETA: 71165s - loss: 0.11341050000
1059941/5845908 [====>.........................] - ETA: 71016s - loss: 0.11301060000
1069939/5845908 [====>.........................] - ETA: 70876s - loss: 0.11261070000
1079999/5845908 [====>.........................] - ETA: 70721s - loss: 0.11231080000
1089949/5845908 [====>.........................] - ETA: 70578s - loss: 0.11191090000
1099987/5845908 [====>.........................] - ETA: 70432s - loss: 0.11161100000
1109915/5845908 [====>.........................] - ETA: 70286s - loss: 0.11131110000
1119913/5845908 [====>.........................] - ETA: 70133s - loss: 0.11091120000
1129953/5845908 [====>.........................] - ETA: 69987s - loss: 0.11061130000
1139899/5845908 [====>.........................] - ETA: 69840s - loss: 0.11031140000
1149963/5845908 [====>.........................] - ETA: 69690s - loss: 0.11001150000
1159960/5845908 [====>.........................] - ETA: 69547s - loss: 0.10971160000
1169936/5845908 [=====>........................] - ETA: 69399s - loss: 0.10941170000
1179997/5845908 [=====>........................] - ETA: 69249s - loss: 0.10911180000
1189939/5845908 [=====>........................] - ETA: 69102s - loss: 0.10881190000
1199957/5845908 [=====>........................] - ETA: 68959s - loss: 0.10851200000
1209988/5845908 [=====>........................] - ETA: 68812s - loss: 0.10821210000
1219978/5845908 [=====>........................] - ETA: 68661s - loss: 0.10791220000
1229972/5845908 [=====>........................] - ETA: 68522s - loss: 0.10761230000
1239933/5845908 [=====>........................] - ETA: 68373s - loss: 0.10741240000
1249960/5845908 [=====>........................] - ETA: 68227s - loss: 0.10711250000
1259924/5845908 [=====>........................] - ETA: 68081s - loss: 0.10681260000
1269944/5845908 [=====>........................] - ETA: 67935s - loss: 0.10661270000
1279918/5845908 [=====>........................] - ETA: 67793s - loss: 0.10631280000
1289951/5845908 [=====>........................] - ETA: 67648s - loss: 0.10601290000
1299970/5845908 [=====>........................] - ETA: 67495s - loss: 0.10581300000
1309947/5845908 [=====>........................] - ETA: 67345s - loss: 0.10551310000
1319979/5845908 [=====>........................] - ETA: 67198s - loss: 0.10531320000
1329949/5845908 [=====>........................] - ETA: 67048s - loss: 0.10501330000
1339972/5845908 [=====>........................] - ETA: 66901s - loss: 0.10481340000
1349909/5845908 [=====>........................] - ETA: 66754s - loss: 0.10451350000
1359963/5845908 [=====>........................] - ETA: 66604s - loss: 0.10431360000
1369991/5845908 [======>.......................] - ETA: 66459s - loss: 0.10411370000
1379922/5845908 [======>.......................] - ETA: 66315s - loss: 0.10381380000
1389902/5845908 [======>.......................] - ETA: 66165s - loss: 0.10361390000
1399965/5845908 [======>.......................] - ETA: 66015s - loss: 0.10341400000
1409916/5845908 [======>.......................] - ETA: 65867s - loss: 0.10321410000
1419967/5845908 [======>.......................] - ETA: 65718s - loss: 0.10291420000
1429914/5845908 [======>.......................] - ETA: 65569s - loss: 0.10271430000
1439972/5845908 [======>.......................] - ETA: 65419s - loss: 0.10251440000
1449957/5845908 [======>.......................] - ETA: 65269s - loss: 0.10231450000
1459988/5845908 [======>.......................] - ETA: 65122s - loss: 0.10211460000
1469916/5845908 [======>.......................] - ETA: 64977s - loss: 0.10191470000
1479940/5845908 [======>.......................] - ETA: 64832s - loss: 0.10171480000
1489900/5845908 [======>.......................] - ETA: 64684s - loss: 0.10151490000
1499963/5845908 [======>.......................] - ETA: 64534s - loss: 0.10131500000
1509907/5845908 [======>.......................] - ETA: 64391s - loss: 0.10111510000
1519903/5845908 [======>.......................] - ETA: 64240s - loss: 0.10091520000
1529995/5845908 [======>.......................] - ETA: 64088s - loss: 0.10071530000
1539949/5845908 [======>.......................] - ETA: 63941s - loss: 0.10051540000
1549941/5845908 [======>.......................] - ETA: 63790s - loss: 0.10031550000
1559965/5845908 [=======>......................] - ETA: 63641s - loss: 0.10011560000
1569922/5845908 [=======>......................] - ETA: 63494s - loss: 0.09991570000
1579975/5845908 [=======>......................] - ETA: 63344s - loss: 0.09971580000
1589997/5845908 [=======>......................] - ETA: 63191s - loss: 0.09951590000
1599995/5845908 [=======>......................] - ETA: 63039s - loss: 0.09931600000
1609967/5845908 [=======>......................] - ETA: 62891s - loss: 0.09911610000
1619931/5845908 [=======>......................] - ETA: 62741s - loss: 0.09901620000
1629982/5845908 [=======>......................] - ETA: 62593s - loss: 0.09881630000
1639899/5845908 [=======>......................] - ETA: 62450s - loss: 0.09861640000
1649896/5845908 [=======>......................] - ETA: 62300s - loss: 0.09841650000
1659949/5845908 [=======>......................] - ETA: 62151s - loss: 0.09831660000
1669998/5845908 [=======>......................] - ETA: 62003s - loss: 0.09811670000
1679998/5845908 [=======>......................] - ETA: 61851s - loss: 0.09791680000
1689961/5845908 [=======>......................] - ETA: 61702s - loss: 0.09781690000
1699939/5845908 [=======>......................] - ETA: 61552s - loss: 0.09761700000
1709949/5845908 [=======>......................] - ETA: 61400s - loss: 0.09741710000
1719998/5845908 [=======>......................] - ETA: 61251s - loss: 0.09721720000
1729938/5845908 [=======>......................] - ETA: 61104s - loss: 0.09711730000
1739949/5845908 [=======>......................] - ETA: 60953s - loss: 0.09691740000
1749963/5845908 [=======>......................] - ETA: 60802s - loss: 0.09681750000
1759967/5845908 [========>.....................] - ETA: 60652s - loss: 0.09661760000
1769979/5845908 [========>.....................] - ETA: 60502s - loss: 0.09651770000
1779933/5845908 [========>.....................] - ETA: 60354s - loss: 0.09631780000
1789986/5845908 [========>.....................] - ETA: 60206s - loss: 0.09621790000
1799917/5845908 [========>.....................] - ETA: 60059s - loss: 0.09601800000
1809925/5845908 [========>.....................] - ETA: 59908s - loss: 0.09591810000
1819937/5845908 [========>.....................] - ETA: 59759s - loss: 0.09571820000
1829913/5845908 [========>.....................] - ETA: 59605s - loss: 0.09561830000
1839963/5845908 [========>.....................] - ETA: 59457s - loss: 0.09541840000
1849948/5845908 [========>.....................] - ETA: 59308s - loss: 0.09531850000
1859950/5845908 [========>.....................] - ETA: 59157s - loss: 0.09511860000
1869918/5845908 [========>.....................] - ETA: 59009s - loss: 0.09501870000
1879963/5845908 [========>.....................] - ETA: 58859s - loss: 0.09481880000
1889909/5845908 [========>.....................] - ETA: 58712s - loss: 0.09471890000
1899978/5845908 [========>.....................] - ETA: 58563s - loss: 0.09461900000
1909968/5845908 [========>.....................] - ETA: 58414s - loss: 0.09441910000
1919899/5845908 [========>.....................] - ETA: 58268s - loss: 0.09431920000
1929965/5845908 [========>.....................] - ETA: 58118s - loss: 0.09411930000
1939925/5845908 [========>.....................] - ETA: 57972s - loss: 0.09401940000
1949965/5845908 [=========>....................] - ETA: 57823s - loss: 0.09391950000
1959965/5845908 [=========>....................] - ETA: 57673s - loss: 0.09371960000
1969968/5845908 [=========>....................] - ETA: 57523s - loss: 0.09361970000
1979972/5845908 [=========>....................] - ETA: 57373s - loss: 0.09351980000
1989997/5845908 [=========>....................] - ETA: 57220s - loss: 0.09331990000
1999944/5845908 [=========>....................] - ETA: 57073s - loss: 0.09322000000
2009921/5845908 [=========>....................] - ETA: 56924s - loss: 0.09312010000
2019922/5845908 [=========>....................] - ETA: 56773s - loss: 0.09292020000
2029973/5845908 [=========>....................] - ETA: 56625s - loss: 0.09282030000
2039939/5845908 [=========>....................] - ETA: 56479s - loss: 0.09272040000
2049919/5845908 [=========>....................] - ETA: 56331s - loss: 0.09262050000
2059964/5845908 [=========>....................] - ETA: 56182s - loss: 0.09242060000
2069902/5845908 [=========>....................] - ETA: 56035s - loss: 0.09232070000
2079941/5845908 [=========>....................] - ETA: 55886s - loss: 0.09222080000
2089994/5845908 [=========>....................] - ETA: 55736s - loss: 0.09212090000
2099991/5845908 [=========>....................] - ETA: 55586s - loss: 0.09202100000
2109948/5845908 [=========>....................] - ETA: 55439s - loss: 0.09192110000
2119987/5845908 [=========>....................] - ETA: 55292s - loss: 0.09172120000
2129961/5845908 [=========>....................] - ETA: 55145s - loss: 0.09162130000
2139946/5845908 [=========>....................] - ETA: 54995s - loss: 0.09152140000
2149995/5845908 [==========>...................] - ETA: 54845s - loss: 0.09142150000
2159952/5845908 [==========>...................] - ETA: 54699s - loss: 0.09132160000
2169928/5845908 [==========>...................] - ETA: 54551s - loss: 0.09122170000
2179896/5845908 [==========>...................] - ETA: 54402s - loss: 0.09112180000
2189963/5845908 [==========>...................] - ETA: 54253s - loss: 0.09102190000
2199921/5845908 [==========>...................] - ETA: 54105s - loss: 0.09092200000
2209974/5845908 [==========>...................] - ETA: 53957s - loss: 0.09082210000
2219924/5845908 [==========>...................] - ETA: 53810s - loss: 0.09072220000
2229905/5845908 [==========>...................] - ETA: 53661s - loss: 0.09062230000
2239957/5845908 [==========>...................] - ETA: 53513s - loss: 0.09052240000
2249900/5845908 [==========>...................] - ETA: 53366s - loss: 0.09042250000
2259983/5845908 [==========>...................] - ETA: 53216s - loss: 0.09022260000
2269954/5845908 [==========>...................] - ETA: 53068s - loss: 0.09012270000
2279949/5845908 [==========>...................] - ETA: 52918s - loss: 0.09002280000
2289996/5845908 [==========>...................] - ETA: 52770s - loss: 0.08992290000
2299975/5845908 [==========>...................] - ETA: 52623s - loss: 0.08982300000
2309950/5845908 [==========>...................] - ETA: 52475s - loss: 0.08972310000
2319930/5845908 [==========>...................] - ETA: 52327s - loss: 0.08962320000
2329983/5845908 [==========>...................] - ETA: 52178s - loss: 0.08962330000
2339997/5845908 [===========>..................] - ETA: 52031s - loss: 0.08952340000
2349978/5845908 [===========>..................] - ETA: 51882s - loss: 0.08942350000
2359984/5845908 [===========>..................] - ETA: 51732s - loss: 0.08932360000
2369974/5845908 [===========>..................] - ETA: 51584s - loss: 0.08922370000
2379985/5845908 [===========>..................] - ETA: 51433s - loss: 0.08912380000
2389955/5845908 [===========>..................] - ETA: 51284s - loss: 0.08902390000
2399922/5845908 [===========>..................] - ETA: 51137s - loss: 0.08892400000
2409925/5845908 [===========>..................] - ETA: 50987s - loss: 0.08882410000
2419977/5845908 [===========>..................] - ETA: 50838s - loss: 0.08872420000
2429952/5845908 [===========>..................] - ETA: 50689s - loss: 0.08862430000
2439969/5845908 [===========>..................] - ETA: 50542s - loss: 0.08852440000
2449955/5845908 [===========>..................] - ETA: 50392s - loss: 0.08842450000
2459945/5845908 [===========>..................] - ETA: 50242s - loss: 0.08832460000
2469927/5845908 [===========>..................] - ETA: 50093s - loss: 0.08822470000
2479896/5845908 [===========>..................] - ETA: 49944s - loss: 0.08822480000
2489968/5845908 [===========>..................] - ETA: 49795s - loss: 0.08812490000
2499952/5845908 [===========>..................] - ETA: 49647s - loss: 0.08802500000
2509919/5845908 [===========>..................] - ETA: 49498s - loss: 0.08792510000
2519944/5845908 [===========>..................] - ETA: 49350s - loss: 0.08782520000
2529912/5845908 [===========>..................] - ETA: 49201s - loss: 0.08772530000
2539983/5845908 [============>.................] - ETA: 49050s - loss: 0.08762540000
2549939/5845908 [============>.................] - ETA: 48902s - loss: 0.08752550000
2559916/5845908 [============>.................] - ETA: 48753s - loss: 0.08752560000
2569930/5845908 [============>.................] - ETA: 48603s - loss: 0.08742570000
2579994/5845908 [============>.................] - ETA: 48453s - loss: 0.08732580000
2589933/5845908 [============>.................] - ETA: 48305s - loss: 0.08722590000
2599953/5845908 [============>.................] - ETA: 48157s - loss: 0.08712600000
2609899/5845908 [============>.................] - ETA: 48010s - loss: 0.08702610000
2619999/5845908 [============>.................] - ETA: 47859s - loss: 0.08702620000
2629939/5845908 [============>.................] - ETA: 47711s - loss: 0.08692630000
2639925/5845908 [============>.................] - ETA: 47562s - loss: 0.08682640000
2649901/5845908 [============>.................] - ETA: 47413s - loss: 0.08672650000
2659952/5845908 [============>.................] - ETA: 47264s - loss: 0.08662660000
2669983/5845908 [============>.................] - ETA: 47115s - loss: 0.08652670000
2679938/5845908 [============>.................] - ETA: 46967s - loss: 0.08652680000
2689974/5845908 [============>.................] - ETA: 46820s - loss: 0.08642690000
2699949/5845908 [============>.................] - ETA: 46672s - loss: 0.08632700000
2709929/5845908 [============>.................] - ETA: 46525s - loss: 0.08622710000
2719999/5845908 [============>.................] - ETA: 46376s - loss: 0.08622720000
2729944/5845908 [=============>................] - ETA: 46230s - loss: 0.08612730000
2739905/5845908 [=============>................] - ETA: 46083s - loss: 0.08602740000
2749967/5845908 [=============>................] - ETA: 45934s - loss: 0.08592750000
2759936/5845908 [=============>................] - ETA: 45787s - loss: 0.08592760000
2769994/5845908 [=============>................] - ETA: 45638s - loss: 0.08582770000
2779970/5845908 [=============>................] - ETA: 45489s - loss: 0.08572780000
2789902/5845908 [=============>................] - ETA: 45343s - loss: 0.08562790000
2799919/5845908 [=============>................] - ETA: 45194s - loss: 0.08562800000
2809950/5845908 [=============>................] - ETA: 45046s - loss: 0.08552810000
2819927/5845908 [=============>................] - ETA: 44899s - loss: 0.08542820000
2829932/5845908 [=============>................] - ETA: 44750s - loss: 0.08542830000
2839994/5845908 [=============>................] - ETA: 44601s - loss: 0.08532840000
2849965/5845908 [=============>................] - ETA: 44452s - loss: 0.08522850000
2859957/5845908 [=============>................] - ETA: 44306s - loss: 0.08512860000
2869947/5845908 [=============>................] - ETA: 44157s - loss: 0.08512870000
2879974/5845908 [=============>................] - ETA: 44009s - loss: 0.08502880000
2889973/5845908 [=============>................] - ETA: 43860s - loss: 0.08492890000
2899904/5845908 [=============>................] - ETA: 43713s - loss: 0.08492900000
2909954/5845908 [=============>................] - ETA: 43565s - loss: 0.08482910000
2919921/5845908 [=============>................] - ETA: 43417s - loss: 0.08472920000
2929977/5845908 [==============>...............] - ETA: 43268s - loss: 0.08472930000
2939965/5845908 [==============>...............] - ETA: 43119s - loss: 0.08462940000
2949997/5845908 [==============>...............] - ETA: 42970s - loss: 0.08452950000
2959944/5845908 [==============>...............] - ETA: 42823s - loss: 0.08442960000
2969930/5845908 [==============>...............] - ETA: 42676s - loss: 0.08442970000
2979995/5845908 [==============>...............] - ETA: 42526s - loss: 0.08432980000
2989976/5845908 [==============>...............] - ETA: 42379s - loss: 0.08432990000
2999932/5845908 [==============>...............] - ETA: 42232s - loss: 0.08423000000
3009907/5845908 [==============>...............] - ETA: 42083s - loss: 0.08413010000
3019953/5845908 [==============>...............] - ETA: 41934s - loss: 0.08413020000
3029942/5845908 [==============>...............] - ETA: 41786s - loss: 0.08403030000
3039986/5845908 [==============>...............] - ETA: 41637s - loss: 0.08393040000
3049935/5845908 [==============>...............] - ETA: 41489s - loss: 0.08393050000
3059996/5845908 [==============>...............] - ETA: 41339s - loss: 0.08383060000
3069971/5845908 [==============>...............] - ETA: 41190s - loss: 0.08383070000
3079954/5845908 [==============>...............] - ETA: 41042s - loss: 0.08373080000
3089998/5845908 [==============>...............] - ETA: 40893s - loss: 0.08363090000
3099952/5845908 [==============>...............] - ETA: 40746s - loss: 0.08363100000
3109986/5845908 [==============>...............] - ETA: 40597s - loss: 0.08353110000
3119919/5845908 [===============>..............] - ETA: 40450s - loss: 0.08343120000
3129969/5845908 [===============>..............] - ETA: 40300s - loss: 0.08343130000
3139947/5845908 [===============>..............] - ETA: 40152s - loss: 0.08333140000
3149954/5845908 [===============>..............] - ETA: 40003s - loss: 0.08333150000
3159897/5845908 [===============>..............] - ETA: 39855s - loss: 0.08323160000
3169988/5845908 [===============>..............] - ETA: 39705s - loss: 0.08313170000
3179960/5845908 [===============>..............] - ETA: 39557s - loss: 0.08313180000
3189946/5845908 [===============>..............] - ETA: 39408s - loss: 0.08303190000
3199918/5845908 [===============>..............] - ETA: 39259s - loss: 0.08293200000
3209979/5845908 [===============>..............] - ETA: 39110s - loss: 0.08293210000
3219917/5845908 [===============>..............] - ETA: 38963s - loss: 0.08283220000
3229993/5845908 [===============>..............] - ETA: 38813s - loss: 0.08283230000
3239944/5845908 [===============>..............] - ETA: 38665s - loss: 0.08273240000
3249924/5845908 [===============>..............] - ETA: 38517s - loss: 0.08273250000
3259927/5845908 [===============>..............] - ETA: 38368s - loss: 0.08263260000
3269942/5845908 [===============>..............] - ETA: 38220s - loss: 0.08253270000
3279974/5845908 [===============>..............] - ETA: 38073s - loss: 0.08253280000
3289977/5845908 [===============>..............] - ETA: 37925s - loss: 0.08243290000
3299991/5845908 [===============>..............] - ETA: 37779s - loss: 0.08243300000
3309933/5845908 [===============>..............] - ETA: 37632s - loss: 0.08233310000
3319970/5845908 [================>.............] - ETA: 37483s - loss: 0.08233320000
3329993/5845908 [================>.............] - ETA: 37335s - loss: 0.08223330000
3339968/5845908 [================>.............] - ETA: 37189s - loss: 0.08223340000
3349923/5845908 [================>.............] - ETA: 37042s - loss: 0.08213350000
3359983/5845908 [================>.............] - ETA: 36893s - loss: 0.08213360000
3369991/5845908 [================>.............] - ETA: 36745s - loss: 0.08203370000
3379909/5845908 [================>.............] - ETA: 36599s - loss: 0.08203380000
3389916/5845908 [================>.............] - ETA: 36451s - loss: 0.08193390000
3399933/5845908 [================>.............] - ETA: 36303s - loss: 0.08193400000
3409898/5845908 [================>.............] - ETA: 36155s - loss: 0.08183410000
3419924/5845908 [================>.............] - ETA: 36007s - loss: 0.08173420000
3429955/5845908 [================>.............] - ETA: 35859s - loss: 0.08173430000
3439946/5845908 [================>.............] - ETA: 35711s - loss: 0.08163440000
3449919/5845908 [================>.............] - ETA: 35565s - loss: 0.08163450000
3459970/5845908 [================>.............] - ETA: 35416s - loss: 0.08153460000
3469971/5845908 [================>.............] - ETA: 35268s - loss: 0.08153470000
3479963/5845908 [================>.............] - ETA: 35122s - loss: 0.08143480000
3489962/5845908 [================>.............] - ETA: 34975s - loss: 0.08143490000
3499911/5845908 [================>.............] - ETA: 34828s - loss: 0.08133500000
3509935/5845908 [=================>............] - ETA: 34681s - loss: 0.08133510000
3519993/5845908 [=================>............] - ETA: 34533s - loss: 0.08133520000
3529933/5845908 [=================>............] - ETA: 34387s - loss: 0.08123530000
3539955/5845908 [=================>............] - ETA: 34239s - loss: 0.08123540000
3549964/5845908 [=================>............] - ETA: 34092s - loss: 0.08113550000
3559961/5845908 [=================>............] - ETA: 33944s - loss: 0.08113560000
3569994/5845908 [=================>............] - ETA: 33795s - loss: 0.08103570000
3579928/5845908 [=================>............] - ETA: 33648s - loss: 0.08103580000
3589899/5845908 [=================>............] - ETA: 33500s - loss: 0.08093590000
3599921/5845908 [=================>............] - ETA: 33352s - loss: 0.08093600000
3609965/5845908 [=================>............] - ETA: 33204s - loss: 0.08083610000
3619934/5845908 [=================>............] - ETA: 33056s - loss: 0.08083620000
3629966/5845908 [=================>............] - ETA: 32908s - loss: 0.08073630000
3639992/5845908 [=================>............] - ETA: 32759s - loss: 0.08073640000
3649927/5845908 [=================>............] - ETA: 32612s - loss: 0.08073650000
3659979/5845908 [=================>............] - ETA: 32463s - loss: 0.08063660000
3669961/5845908 [=================>............] - ETA: 32314s - loss: 0.08063670000
3679995/5845908 [=================>............] - ETA: 32166s - loss: 0.08053680000
3689934/5845908 [=================>............] - ETA: 32019s - loss: 0.08053690000
3699980/5845908 [=================>............] - ETA: 31870s - loss: 0.08043700000
3709998/5845908 [==================>...........] - ETA: 31722s - loss: 0.08043710000
3719936/5845908 [==================>...........] - ETA: 31576s - loss: 0.08033720000
3729904/5845908 [==================>...........] - ETA: 31428s - loss: 0.08033730000
3739972/5845908 [==================>...........] - ETA: 31278s - loss: 0.08023740000
3749947/5845908 [==================>...........] - ETA: 31130s - loss: 0.08023750000
3759974/5845908 [==================>...........] - ETA: 30981s - loss: 0.08023760000
3769917/5845908 [==================>...........] - ETA: 30833s - loss: 0.08013770000
3779996/5845908 [==================>...........] - ETA: 30682s - loss: 0.08013780000
3789906/5845908 [==================>...........] - ETA: 30536s - loss: 0.08003790000
3799918/5845908 [==================>...........] - ETA: 30386s - loss: 0.08003800000
3809937/5845908 [==================>...........] - ETA: 30237s - loss: 0.07993810000
3819984/5845908 [==================>...........] - ETA: 30088s - loss: 0.07993820000
3829924/5845908 [==================>...........] - ETA: 29940s - loss: 0.07983830000
3839973/5845908 [==================>...........] - ETA: 29791s - loss: 0.07983840000
3849914/5845908 [==================>...........] - ETA: 29643s - loss: 0.07983850000
3859988/5845908 [==================>...........] - ETA: 29493s - loss: 0.07973860000
3869937/5845908 [==================>...........] - ETA: 29345s - loss: 0.07973870000
3879901/5845908 [==================>...........] - ETA: 29196s - loss: 0.07963880000
3889905/5845908 [==================>...........] - ETA: 29048s - loss: 0.07963890000
3899971/5845908 [===================>..........] - ETA: 28898s - loss: 0.07953900000
3909903/5845908 [===================>..........] - ETA: 28751s - loss: 0.07953910000
3919966/5845908 [===================>..........] - ETA: 28601s - loss: 0.07953920000
3929976/5845908 [===================>..........] - ETA: 28454s - loss: 0.07943930000
3939909/5845908 [===================>..........] - ETA: 28306s - loss: 0.07943940000
3949952/5845908 [===================>..........] - ETA: 28157s - loss: 0.07933950000
3959937/5845908 [===================>..........] - ETA: 28007s - loss: 0.07933960000
3969940/5845908 [===================>..........] - ETA: 27860s - loss: 0.07933970000
3979973/5845908 [===================>..........] - ETA: 27710s - loss: 0.07923980000
3989987/5845908 [===================>..........] - ETA: 27560s - loss: 0.07923990000
3999904/5845908 [===================>..........] - ETA: 27412s - loss: 0.07914000000
4009968/5845908 [===================>..........] - ETA: 27261s - loss: 0.07914010000
4019917/5845908 [===================>..........] - ETA: 27112s - loss: 0.07914020000
4029953/5845908 [===================>..........] - ETA: 26963s - loss: 0.07904030000
4039999/5845908 [===================>..........] - ETA: 26814s - loss: 0.07904040000
4049972/5845908 [===================>..........] - ETA: 26665s - loss: 0.07894050000
4059941/5845908 [===================>..........] - ETA: 26517s - loss: 0.07894060000
4069931/5845908 [===================>..........] - ETA: 26368s - loss: 0.07894070000
4079966/5845908 [===================>..........] - ETA: 26279s - loss: 0.07884080000
4089924/5845908 [===================>..........] - ETA: 26131s - loss: 0.07884090000
4099979/5845908 [====================>.........] - ETA: 25981s - loss: 0.07884100000
4109920/5845908 [====================>.........] - ETA: 25833s - loss: 0.07874110000
4119995/5845908 [====================>.........] - ETA: 25683s - loss: 0.07874120000
4129991/5845908 [====================>.........] - ETA: 25534s - loss: 0.07864130000
4139900/5845908 [====================>.........] - ETA: 25386s - loss: 0.07864140000
4149998/5845908 [====================>.........] - ETA: 25235s - loss: 0.07864150000
4159922/5845908 [====================>.........] - ETA: 25087s - loss: 0.07854160000
4169952/5845908 [====================>.........] - ETA: 24938s - loss: 0.07854170000
4179995/5845908 [====================>.........] - ETA: 24789s - loss: 0.07854180000
4189973/5845908 [====================>.........] - ETA: 24640s - loss: 0.07844190000
4199942/5845908 [====================>.........] - ETA: 24491s - loss: 0.07844200000
4209912/5845908 [====================>.........] - ETA: 24343s - loss: 0.07844210000
4219927/5845908 [====================>.........] - ETA: 24194s - loss: 0.07834220000
4229996/5845908 [====================>.........] - ETA: 24044s - loss: 0.07834230000
4239971/5845908 [====================>.........] - ETA: 23895s - loss: 0.07824240000
4249985/5845908 [====================>.........] - ETA: 23747s - loss: 0.07824250000
4259978/5845908 [====================>.........] - ETA: 23597s - loss: 0.07824260000
4269965/5845908 [====================>.........] - ETA: 23448s - loss: 0.07814270000
4279933/5845908 [====================>.........] - ETA: 23299s - loss: 0.07814280000
4289926/5845908 [=====================>........] - ETA: 23150s - loss: 0.07814290000
4299923/5845908 [=====================>........] - ETA: 23001s - loss: 0.07804300000
4309958/5845908 [=====================>........] - ETA: 22852s - loss: 0.07804310000
4319951/5845908 [=====================>........] - ETA: 22703s - loss: 0.07804320000
4329965/5845908 [=====================>........] - ETA: 22553s - loss: 0.07794330000
4339970/5845908 [=====================>........] - ETA: 22404s - loss: 0.07794340000
4349989/5845908 [=====================>........] - ETA: 22254s - loss: 0.07794350000
4359948/5845908 [=====================>........] - ETA: 22106s - loss: 0.07784360000
4369936/5845908 [=====================>........] - ETA: 21957s - loss: 0.07784370000
4379985/5845908 [=====================>........] - ETA: 21807s - loss: 0.07784380000
4389951/5845908 [=====================>........] - ETA: 21659s - loss: 0.07774390000
4399901/5845908 [=====================>........] - ETA: 21511s - loss: 0.07774400000
4409960/5845908 [=====================>........] - ETA: 21361s - loss: 0.07774410000
4419904/5845908 [=====================>........] - ETA: 21213s - loss: 0.07764420000
4429996/5845908 [=====================>........] - ETA: 21062s - loss: 0.07764430000
4439954/5845908 [=====================>........] - ETA: 20914s - loss: 0.07764440000
4449924/5845908 [=====================>........] - ETA: 20765s - loss: 0.07754450000
4459983/5845908 [=====================>........] - ETA: 20615s - loss: 0.07754460000
4469928/5845908 [=====================>........] - ETA: 20467s - loss: 0.07754470000
4479909/5845908 [=====================>........] - ETA: 20319s - loss: 0.07744480000
4489958/5845908 [======================>.......] - ETA: 20169s - loss: 0.07744490000
4499995/5845908 [======================>.......] - ETA: 20020s - loss: 0.07744500000
4509938/5845908 [======================>.......] - ETA: 19872s - loss: 0.07734510000
4519919/5845908 [======================>.......] - ETA: 19723s - loss: 0.07734520000
4529951/5845908 [======================>.......] - ETA: 19574s - loss: 0.07734530000
4539929/5845908 [======================>.......] - ETA: 19426s - loss: 0.07734540000
4549992/5845908 [======================>.......] - ETA: 19276s - loss: 0.07724550000
4559980/5845908 [======================>.......] - ETA: 19127s - loss: 0.07724560000
4569923/5845908 [======================>.......] - ETA: 18979s - loss: 0.07724570000
4579900/5845908 [======================>.......] - ETA: 18830s - loss: 0.07714580000
4589981/5845908 [======================>.......] - ETA: 18680s - loss: 0.07714590000
4599972/5845908 [======================>.......] - ETA: 18531s - loss: 0.07714600000
4609931/5845908 [======================>.......] - ETA: 18383s - loss: 0.07704610000
4619901/5845908 [======================>.......] - ETA: 18234s - loss: 0.07704620000
4629963/5845908 [======================>.......] - ETA: 18085s - loss: 0.07704630000
4639909/5845908 [======================>.......] - ETA: 17937s - loss: 0.07704640000
4649939/5845908 [======================>.......] - ETA: 17787s - loss: 0.07694650000
4659972/5845908 [======================>.......] - ETA: 17638s - loss: 0.07694660000
4669946/5845908 [======================>.......] - ETA: 17491s - loss: 0.07694670000
4679973/5845908 [=======================>......] - ETA: 17345s - loss: 0.07684680000
4689957/5845908 [=======================>......] - ETA: 17198s - loss: 0.07684690000
4699987/5845908 [=======================>......] - ETA: 17051s - loss: 0.07684700000
4709965/5845908 [=======================>......] - ETA: 16905s - loss: 0.07684710000
4719996/5845908 [=======================>......] - ETA: 16758s - loss: 0.07674720000
4729907/5845908 [=======================>......] - ETA: 16610s - loss: 0.07674730000
4739960/5845908 [=======================>......] - ETA: 16461s - loss: 0.07674740000
4749943/5845908 [=======================>......] - ETA: 16312s - loss: 0.07664750000
4759949/5845908 [=======================>......] - ETA: 16165s - loss: 0.07664760000
4769899/5845908 [=======================>......] - ETA: 16019s - loss: 0.07664770000
4779996/5845908 [=======================>......] - ETA: 15871s - loss: 0.07654780000
4789965/5845908 [=======================>......] - ETA: 15724s - loss: 0.07654790000
4799993/5845908 [=======================>......] - ETA: 15577s - loss: 0.07654800000
4809937/5845908 [=======================>......] - ETA: 15429s - loss: 0.07654810000
4819963/5845908 [=======================>......] - ETA: 15280s - loss: 0.07644820000
4829922/5845908 [=======================>......] - ETA: 15131s - loss: 0.07644830000
4839967/5845908 [=======================>......] - ETA: 14982s - loss: 0.07644840000
4849932/5845908 [=======================>......] - ETA: 14833s - loss: 0.07634850000
4859969/5845908 [=======================>......] - ETA: 14684s - loss: 0.07634860000
4869955/5845908 [=======================>......] - ETA: 14535s - loss: 0.07634870000
4879919/5845908 [========================>.....] - ETA: 14387s - loss: 0.07634880000
4889960/5845908 [========================>.....] - ETA: 14238s - loss: 0.07624890000
4899933/5845908 [========================>.....] - ETA: 14090s - loss: 0.07624900000
4909931/5845908 [========================>.....] - ETA: 13941s - loss: 0.07624910000
4919987/5845908 [========================>.....] - ETA: 13792s - loss: 0.07624920000
4929984/5845908 [========================>.....] - ETA: 13644s - loss: 0.07614930000
4939910/5845908 [========================>.....] - ETA: 13496s - loss: 0.07614940000
4949974/5845908 [========================>.....] - ETA: 13346s - loss: 0.07614950000
4959938/5845908 [========================>.....] - ETA: 13197s - loss: 0.07614960000
4969971/5845908 [========================>.....] - ETA: 13048s - loss: 0.07604970000
4979922/5845908 [========================>.....] - ETA: 12900s - loss: 0.07604980000
4989971/5845908 [========================>.....] - ETA: 12750s - loss: 0.07604990000
4999918/5845908 [========================>.....] - ETA: 12601s - loss: 0.07605000000
5009907/5845908 [========================>.....] - ETA: 12453s - loss: 0.07595010000
5019902/5845908 [========================>.....] - ETA: 12303s - loss: 0.07595020000
5029899/5845908 [========================>.....] - ETA: 12154s - loss: 0.07595030000
5039942/5845908 [========================>.....] - ETA: 12005s - loss: 0.07595040000
5049913/5845908 [========================>.....] - ETA: 11856s - loss: 0.07585050000
5059956/5845908 [========================>.....] - ETA: 11706s - loss: 0.07585060000
5069943/5845908 [=========================>....] - ETA: 11557s - loss: 0.07585070000
5079951/5845908 [=========================>....] - ETA: 11408s - loss: 0.07585080000
5089931/5845908 [=========================>....] - ETA: 11259s - loss: 0.07575090000
5099984/5845908 [=========================>....] - ETA: 11109s - loss: 0.07575100000
5109942/5845908 [=========================>....] - ETA: 10961s - loss: 0.07575110000
5119925/5845908 [=========================>....] - ETA: 10812s - loss: 0.07575120000
5129962/5845908 [=========================>....] - ETA: 10662s - loss: 0.07565130000
5139903/5845908 [=========================>....] - ETA: 10514s - loss: 0.07565140000
5149895/5845908 [=========================>....] - ETA: 10365s - loss: 0.07565150000
5159973/5845908 [=========================>....] - ETA: 10215s - loss: 0.07565160000
5169965/5845908 [=========================>....] - ETA: 10066s - loss: 0.07555170000
5179951/5845908 [=========================>....] - ETA: 9917s - loss: 0.07555180000
5189972/5845908 [=========================>....] - ETA: 9768s - loss: 0.07555190000
5199932/5845908 [=========================>....] - ETA: 9619s - loss: 0.07555200000
5209903/5845908 [=========================>....] - ETA: 9471s - loss: 0.07545210000
5219981/5845908 [=========================>....] - ETA: 9321s - loss: 0.07545220000
5229919/5845908 [=========================>....] - ETA: 9173s - loss: 0.07545230000
5239982/5845908 [=========================>....] - ETA: 9023s - loss: 0.07545240000
5249910/5845908 [=========================>....] - ETA: 8875s - loss: 0.07535250000
5259928/5845908 [=========================>....] - ETA: 8726s - loss: 0.07535260000
5269966/5845908 [==========================>...] - ETA: 8576s - loss: 0.07535270000
5279977/5845908 [==========================>...] - ETA: 8427s - loss: 0.07535280000
5289913/5845908 [==========================>...] - ETA: 8279s - loss: 0.07525290000
5299996/5845908 [==========================>...] - ETA: 8129s - loss: 0.07525300000
5309924/5845908 [==========================>...] - ETA: 7981s - loss: 0.07525310000
5319985/5845908 [==========================>...] - ETA: 7831s - loss: 0.07525320000
5329944/5845908 [==========================>...] - ETA: 7683s - loss: 0.07525330000
5339965/5845908 [==========================>...] - ETA: 7534s - loss: 0.07515340000
5349941/5845908 [==========================>...] - ETA: 7385s - loss: 0.07515350000
5359976/5845908 [==========================>...] - ETA: 7236s - loss: 0.07515360000
5369907/5845908 [==========================>...] - ETA: 7088s - loss: 0.07515370000
5379929/5845908 [==========================>...] - ETA: 6939s - loss: 0.07505380000
5389992/5845908 [==========================>...] - ETA: 6789s - loss: 0.07505390000
5399936/5845908 [==========================>...] - ETA: 6641s - loss: 0.07505400000
5409978/5845908 [==========================>...] - ETA: 6491s - loss: 0.07505410000
5419969/5845908 [==========================>...] - ETA: 6342s - loss: 0.07505420000
5429914/5845908 [==========================>...] - ETA: 6194s - loss: 0.07495430000
5439901/5845908 [==========================>...] - ETA: 6045s - loss: 0.07495440000
5449956/5845908 [==========================>...] - ETA: 5896s - loss: 0.07495450000
5459912/5845908 [===========================>..] - ETA: 5747s - loss: 0.07495460000
5469995/5845908 [===========================>..] - ETA: 5597s - loss: 0.07485470000
5479915/5845908 [===========================>..] - ETA: 5449s - loss: 0.07485480000
5489977/5845908 [===========================>..] - ETA: 5299s - loss: 0.07485490000
5499965/5845908 [===========================>..] - ETA: 5151s - loss: 0.07485500000
5509940/5845908 [===========================>..] - ETA: 5002s - loss: 0.07475510000
5519974/5845908 [===========================>..] - ETA: 4853s - loss: 0.07475520000
5529915/5845908 [===========================>..] - ETA: 4705s - loss: 0.07475530000
5539924/5845908 [===========================>..] - ETA: 4556s - loss: 0.07475540000
5549953/5845908 [===========================>..] - ETA: 4406s - loss: 0.07475550000
5559971/5845908 [===========================>..] - ETA: 4257s - loss: 0.07465560000
5569904/5845908 [===========================>..] - ETA: 4109s - loss: 0.07465570000
5579979/5845908 [===========================>..] - ETA: 3959s - loss: 0.07465580000
5589904/5845908 [===========================>..] - ETA: 3811s - loss: 0.07465590000
5599985/5845908 [===========================>..] - ETA: 3661s - loss: 0.07455600000
5609923/5845908 [===========================>..] - ETA: 3513s - loss: 0.07455610000
5619965/5845908 [===========================>..] - ETA: 3363s - loss: 0.07455620000
5629964/5845908 [===========================>..] - ETA: 3214s - loss: 0.07455630000
5639943/5845908 [===========================>..] - ETA: 3066s - loss: 0.07455640000
5649926/5845908 [===========================>..] - ETA: 2917s - loss: 0.07445650000
5659951/5845908 [============================>.] - ETA: 2769s - loss: 0.07445660000
5669915/5845908 [============================>.] - ETA: 2621s - loss: 0.07445670000
5679993/5845908 [============================>.] - ETA: 2472s - loss: 0.07445680000
5689949/5845908 [============================>.] - ETA: 2324s - loss: 0.07435690000
5699900/5845908 [============================>.] - ETA: 2176s - loss: 0.07435700000
5709954/5845908 [============================>.] - ETA: 2026s - loss: 0.07435710000
5719944/5845908 [============================>.] - ETA: 1877s - loss: 0.07435720000
5729899/5845908 [============================>.] - ETA: 1729s - loss: 0.07425730000
5739986/5845908 [============================>.] - ETA: 1578s - loss: 0.07425740000
5749898/5845908 [============================>.] - ETA: 1431s - loss: 0.07425750000
5759985/5845908 [============================>.] - ETA: 1280s - loss: 0.07425760000
5769988/5845908 [============================>.] - ETA: 1131s - loss: 0.07415770000
5779941/5845908 [============================>.] - ETA: 983s - loss: 0.07415780000
5789998/5845908 [============================>.] - ETA: 833s - loss: 0.07415790000
5799981/5845908 [============================>.] - ETA: 685s - loss: 0.07415800000
5809916/5845908 [============================>.] - ETA: 537s - loss: 0.07415810000
5819989/5845908 [============================>.] - ETA: 386s - loss: 0.07405820000
5829908/5845908 [============================>.] - ETA: 238s - loss: 0.07405830000
5839951/5845908 [============================>.] - ETA: 88s - loss: 0.07405840000
5845829/5845908 [============================>.] - ETA: 1s - loss: 0.0740('Samples seen:', 1061092376)
Training completed!
Saving model...
In [18]:
# takes 24 housrs
In [19]:
print("It's test time!")
# recover the embedding weights trained with skipgram:
weights = model.layers[0].get_weights()[0]
It's test time!
In [20]:
weights[:skip_top] = np.zeros((skip_top, dim_proj))
norm_weights = np_utils.normalize(weights)
word_index = tokenizer.word_index
reverse_word_index = dict([(v, k) for k, v in list(word_index.items())])
word_index = tokenizer.word_index
In [22]:
def embed_word(w):
i = word_index.get(w)
if (not i) or (i<skip_top) or (i>=max_features):
return None
return norm_weights[i]
def closest_to_point(point, nb_closest=10):
proximities = np.dot(norm_weights, point)
tups = list(zip(list(range(len(proximities))), proximities))
tups.sort(key=lambda x: x[1], reverse=True)
return [(reverse_word_index.get(t[0]), t[1]) for t in tups[:nb_closest]]
def closest_to_word(w, nb_closest=10):
i = word_index.get(w)
if (not i) or (i<skip_top) or (i>=max_features):
return []
return closest_to_point(norm_weights[i].T, nb_closest)
In [23]:
''' the resuls in comments below were for:
5.8M HN comments
dim_proj = 256
nb_epoch = 2
optimizer = rmsprop
loss = mse
max_features = 50000
skip_top = 100
negative_samples = 1.
window_size = 4
and frequency subsampling of factor 10e-5.
'''
words = ["article", # post, story, hn, read, comments
"3", # 6, 4, 5, 2
"two", # three, few, several, each
"great", # love, nice, working, looking
"data", # information, memory, database
"money", # company, pay, customers, spend
"years", # ago, year, months, hours, week, days
"android", # ios, release, os, mobile, beta
"javascript", # js, css, compiler, library, jquery, ruby
"look", # looks, looking
"business", # industry, professional, customers
"company", # companies, startup, founders, startups
"after", # before, once, until
"own", # personal, our, having
"us", # united, country, american, tech, diversity, usa, china, sv
"using", # javascript, js, tools (lol)
"here", # hn, post, comments
]
for w in words:
res = closest_to_word(w)
print('====', w)
for r in res:
print(r)
('====', 'article')
('article', 1.0000002)
('post', 0.90891558)
('story', 0.89286608)
('posted', 0.89106327)
('here', 0.8900885)
('comments', 0.88936681)
('reddit', 0.88504016)
('pg', 0.88066208)
('posts', 0.87696922)
('thread', 0.87472731)
('====', '3')
('3', 0.99999988)
('6', 0.94339204)
('9', 0.94330382)
('2', 0.94284344)
('ff', 0.93928117)
('32', 0.93828988)
('24', 0.93781793)
('7', 0.93774015)
('36', 0.93521035)
('released', 0.93484235)
('====', 'two')
('two', 1.0)
('typically', 0.93890905)
('quantity', 0.93832433)
('defining', 0.93728578)
('sustain', 0.93695474)
('evolve', 0.93685579)
('letting', 0.93666261)
('dying', 0.93665802)
('generations', 0.93659782)
('avoiding', 0.93625355)
('====', 'great')
('great', 1.0)
('looking', 0.93422735)
('tell', 0.93256807)
('posting', 0.93174744)
('wish', 0.92921531)
('helpful', 0.92886698)
('informative', 0.92884183)
('cool', 0.92871445)
('idea', 0.92823637)
('fun', 0.92710769)
('====', 'data')
('data', 1.0000002)
('storage', 0.90516901)
('storing', 0.90049845)
('hashing', 0.89902455)
('implemented', 0.89745152)
('caching', 0.89728582)
('encrypted', 0.89696115)
('methods', 0.89646441)
('simple', 0.89602488)
('plaintext', 0.89597952)
('====', 'money')
('money', 1.0)
('companies', 0.93026137)
('company', 0.92494476)
('customers', 0.92021573)
('paying', 0.91961157)
('likely', 0.91767448)
('spending', 0.91637743)
('employees', 0.91532087)
('investment', 0.91492891)
('pay', 0.91452479)
('====', 'years')
('years', 1.0)
('months', 0.94392097)
('past', 0.93173051)
('week', 0.92992276)
('minutes', 0.92972291)
('spent', 0.92907798)
('hours', 0.92837012)
('couple', 0.9270919)
('consensus', 0.92708981)
('weeks', 0.92601526)
('====', 'android')
('android', 0.99999988)
('ios', 0.94578266)
('features', 0.93861037)
('tablet', 0.93838167)
('desktop', 0.93774199)
('ipad', 0.93726087)
('oses', 0.93691599)
('platforms', 0.93657035)
('ported', 0.9364593)
('apps', 0.93606949)
('====', 'javascript')
('javascript', 0.99999988)
('jquery', 0.93641472)
('using', 0.93390918)
('code', 0.93197763)
('browser', 0.92690337)
('default', 0.92589772)
('css', 0.92425084)
('server', 0.92361653)
('ajax', 0.92300272)
('plugins', 0.91980928)
('====', 'look')
('look', 0.99999982)
('wanted', 0.94779706)
('helpful', 0.94502401)
('excited', 0.94429147)
('write', 0.94348037)
('suggestions', 0.94347084)
('nicer', 0.9432714)
('option', 0.9431746)
('exact', 0.94313562)
('easiest', 0.94311738)
('====', 'business')
('business', 1.0)
('businesses', 0.93864095)
('company', 0.9364717)
('education', 0.93593216)
('entrepreneurs', 0.93444502)
('spending', 0.93438578)
('competitive', 0.9330337)
('startups', 0.93302143)
('technical', 0.93276441)
('companies', 0.93249726)
('====', 'company')
('company', 1.0)
('companies', 0.95230079)
('spending', 0.94772577)
('spend', 0.94633341)
('willing', 0.945072)
('employees', 0.94389528)
('situation', 0.94301093)
('businesses', 0.94213593)
('paying', 0.94121146)
('benefit', 0.94106424)
('====', 'after')
('after', 1.0000004)
('whatsapp', 0.9372161)
('shipped', 0.93622434)
('consoles', 0.93525684)
('invites', 0.93461001)
('mins', 0.93418413)
('hits', 0.93370008)
('showdead', 0.93312764)
('requested', 0.93279946)
('five', 0.93275297)
('====', 'own')
('own', 0.99999976)
('worthwhile', 0.95177341)
('reward', 0.95115095)
('contribution', 0.95105624)
('matters', 0.95075196)
('expertise', 0.94946122)
('complicated', 0.94895703)
('incentive', 0.94894111)
('fixing', 0.94872379)
('necessarily', 0.94861817)
('====', 'us')
('us', 0.99999982)
('eu', 0.92610723)
('usa', 0.92355561)
('berlin', 0.92285001)
('nations', 0.92176247)
('mexico', 0.91843802)
('guardian', 0.91689533)
('indian', 0.91606253)
('visited', 0.91603446)
('international', 0.9155103)
('====', 'using')
('using', 1.0)
('javascript', 0.93390918)
('css', 0.92669106)
('flash', 0.92400962)
('uses', 0.9220047)
('jquery', 0.92187738)
('ajax', 0.92156923)
('supports', 0.92156422)
('server', 0.92112899)
('browser', 0.92078733)
('====', 'here')
('here', 1.0000001)
('thread', 0.93336034)
('post', 0.93085027)
('posted', 0.92966461)
('posting', 0.92936492)
('pg', 0.92331135)
('hacker', 0.92293847)
('posts', 0.92182922)
('interesting', 0.92130566)
('hn', 0.92033947)
In [25]:
closest_to_word('book')
Out[25]:
[('book', 0.99999988),
('books', 0.93772721),
('tutorial', 0.9375813),
('paywall', 0.93704069),
('intro', 0.93491739),
('screenshots', 0.93375456),
('redirects', 0.9337393),
('favorite', 0.93335485),
('repo', 0.93316227),
('ff', 0.9330492)]
In [27]:
closest_to_word('paypal')
Out[27]:
[('paypal', 0.99999976),
('stripe', 0.94726515),
('listing', 0.94713485),
('doge', 0.94603837),
('belongs', 0.94497705),
('coinbase', 0.94476163),
('automating', 0.94357979),
('3gs', 0.94293594),
('heartbleed', 0.9426229),
('placement', 0.94246304)]
In [29]:
closest_to_word('iphone')
Out[29]:
[('iphone', 1.0),
('ipad', 0.93695891),
('mac', 0.93089706),
('android', 0.92847413),
('osx', 0.92499757),
('mobile', 0.9189598),
('desktop', 0.9188664),
('kindle', 0.91885668),
('app', 0.9180249),
('ios', 0.91645032)]
In [30]:
closest_to_word('samsung')
Out[30]:
[('samsung', 1.0000002),
('nexus', 0.94058442),
('htc', 0.93408191),
('motorola', 0.93351519),
('shipped', 0.93209493),
('droid', 0.92996943),
('shows', 0.92826527),
('contains', 0.9279933),
('salesforce', 0.92773867),
('gem', 0.92740226)]
In [31]:
closest_to_word('obama')
Out[31]:
[('obama', 0.99999988),
('clinton', 0.91198832),
('cue', 0.90583628),
('florida', 0.90488577),
('screencasts', 0.90177119),
('pending', 0.90155625),
('hits', 0.90109777),
('putin', 0.9001627),
('groupon', 0.90015745),
('esp', 0.90002215)]
Content source: dikien/personnel-study
Similar notebooks: