In [1]:
# As usual, a bit of setup

import time, os, json
import numpy as np
import matplotlib.pyplot as plt
import pickle

from gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from layers import *
from approach2 import *
from solver import *

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [2]:
raw = pickle.load(open("stories.pck", "rb"))
print len(raw), "examples"
print max([len(x) for x in raw]), "supporting facts and questions at most"
print max([len(y.split(' ')) for y in x for x in raw]), "words per sentence at most"

_null, _start, _query, _end = "<NULL>", "<Start>", "<Query>", "<End>"

words = [_null, _start, _query, _end] + [q for ex in raw for sent in ex for w in sent.split(' ') for q in w.split(',')]
words = sorted(set(words))
word_to_idx = {w:i for i,w in enumerate(words)}

print len(words), "total words"

T = 70 # longest story, 35 words unfolding
T2= 7

data = []
for ex in raw:
    sLen = 0
    while ex[sLen].find(',')==-1:
        sLen+=1
    supports = word_to_idx[_null] * np.ones(T, dtype=int)
    queries = word_to_idx[_null] * np.ones((len(ex)-sLen, 5), dtype=int)
    
    pos=0
    for idx, sent in enumerate(ex):
        if idx<sLen:
            sent = [word_to_idx[_start]] + [word_to_idx[x] for x in sent.split(' ')] + [word_to_idx[_end]]
            supports[pos:pos+T2] = sent +  [word_to_idx[_null]]*(T2-len(sent))
            pos += T2
        else:
            sent = sent.split(',')[0]
            sent = [word_to_idx[_query]] + [word_to_idx[x] for x in sent.split(' ')] + [word_to_idx[_end]]
            sent = sent + [word_to_idx[_null]]*(5-len(sent)) #null pad the sentence
            queries[idx-sLen, :] = sent
    
    answers = np.asarray([word_to_idx[x.split(',')[1]] for x in ex[sLen:]]).reshape(len(ex)-sLen, 1)
    for i in xrange(queries.shape[0]):
        data.append(np.hstack((supports, queries[i,:], answers[i,:])))

data = np.asarray(data)
data_train = data[:-1000,:]
data_test = data[-1000:,:]
print data_train.shape


1000 examples
14 supporting facts and questions at most
5 words per sentence at most
159 total words
(3275, 76)

In [3]:
model = SeqNN(word_to_idx, cell_type='rnn', hidden_dim=256, wordvec_dim=512, sentlen=T2, storylen=T, qlen=5)
solver = SeqNNSolver(model, data_train[:50],
           update_rule='adam',
           num_epochs=50,
           batch_size=25,
           optim_config={
             'learning_rate': 1e-3,
           },
           lr_decay=.995,
           verbose=True, print_every=10,
         )
solver.train()

# Plot the training losses
plt.plot(solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()


(Iteration 1 / 100) loss: 126.987805
(Iteration 11 / 100) loss: 61.239371
(Iteration 21 / 100) loss: 54.027567
(Iteration 31 / 100) loss: 28.590979
(Iteration 41 / 100) loss: 13.895459
(Iteration 51 / 100) loss: 12.150488
(Iteration 61 / 100) loss: 1.495752
(Iteration 71 / 100) loss: 1.231247
(Iteration 81 / 100) loss: 0.596445
(Iteration 91 / 100) loss: 0.392539

In [4]:
minibatch = data_train[:50]
print "Train:",
answ=model.loss(minibatch, sample=True)
print (answ==minibatch[:,-1]).mean()

story=" ".join([words[x] for x in data_train[0,:T]]).split(_null)
for x in story:
  if len(x.split(' '))>2:
    print x.strip()
i=0
while np.all(data_train[0,:T]==data_train[i,:T]):
  print " ".join([words[x] for x in data_train[i,T:T+6]])
  i=i+1

print
print "Other answers:"
print "\n".join([words[x]+" "+words[y] for x,y in zip(model.loss(minibatch, sample=True), minibatch[:,-1])])


Train: 1.0
<Start> hasan parka gitti . <End>
<Start> sinan eva gitti . <End>
<Start> hasan servisa gitti . <End>
<Start> emre tershanea gitti . <End>
<Start> ali boluma gitti . <End>
<Query> hasan nerede ? <End> servis
<Query> sinan nerede ? <End> ev
<Query> emre nerede ? <End> tershane
<Query> ali nerede ? <End> bolum

Other answers:
servis servis
ev ev
tershane tershane
bolum bolum
kantin kantin
kantin kantin
kamyon kamyon
ev ev
araba araba
labaratuvar labaratuvar
araba araba
kantin kantin
masa masa
sira sira
masa masa
lastikci lastikci
lastikci lastikci
otobus otobus
otobus otobus
okul okul
hastane hastane
tamirhane tamirhane
tamirhane tamirhane
sandalye sandalye
hastane hastane
servis servis
hastane hastane
tuvalet tuvalet
tuvalet tuvalet
tuvalet tuvalet
tuvalet tuvalet
masa masa
oda oda
servis servis
kamyon kamyon
dersane dersane
sinif sinif
sinif sinif
sinif sinif
dersane dersane
oda oda
banyo banyo
oda oda
oda oda
oda oda
duvar duvar
dersane dersane
ev ev
ev ev
bahce bahce

In [5]:
model = SeqNN(word_to_idx, cell_type='rnn', hidden_dim=256, wordvec_dim=512, sentlen=T2, storylen=T, qlen=5)
solver = SeqNNSolver(model, data_train,
           update_rule='adam',
           num_epochs=40,
           batch_size=32,
           optim_config={
             'learning_rate': 3e-4,
           },
           lr_decay=.9,
           verbose=True, print_every=10,
         )
solver.train()

# Plot the training losses
plt.plot(solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()


(Iteration 1 / 4080) loss: 161.695129
(Iteration 11 / 4080) loss: 138.921518
(Iteration 21 / 4080) loss: 115.475427
(Iteration 31 / 4080) loss: 116.602076
(Iteration 41 / 4080) loss: 112.268232
(Iteration 51 / 4080) loss: 112.695253
(Iteration 61 / 4080) loss: 114.829091
(Iteration 71 / 4080) loss: 106.658962
(Iteration 81 / 4080) loss: 101.682110
(Iteration 91 / 4080) loss: 106.934325
(Iteration 101 / 4080) loss: 110.279170
Train Acc: 0.126 Epoch: 1
(Iteration 111 / 4080) loss: 107.975058
(Iteration 121 / 4080) loss: 107.554445
(Iteration 131 / 4080) loss: 99.529738
(Iteration 141 / 4080) loss: 105.435849
(Iteration 151 / 4080) loss: 100.349523
(Iteration 161 / 4080) loss: 101.244864
(Iteration 171 / 4080) loss: 97.478982
(Iteration 181 / 4080) loss: 93.964581
(Iteration 191 / 4080) loss: 96.423041
(Iteration 201 / 4080) loss: 99.906357
Train Acc: 0.184 Epoch: 2
(Iteration 211 / 4080) loss: 91.696695
(Iteration 221 / 4080) loss: 94.211940
(Iteration 231 / 4080) loss: 96.964097
(Iteration 241 / 4080) loss: 91.426848
(Iteration 251 / 4080) loss: 84.596758
(Iteration 261 / 4080) loss: 86.721969
(Iteration 271 / 4080) loss: 89.595383
(Iteration 281 / 4080) loss: 92.997322
(Iteration 291 / 4080) loss: 88.307787
(Iteration 301 / 4080) loss: 82.094064
Train Acc: 0.266 Epoch: 3
(Iteration 311 / 4080) loss: 88.677327
(Iteration 321 / 4080) loss: 79.357117
(Iteration 331 / 4080) loss: 83.941502
(Iteration 341 / 4080) loss: 75.314254
(Iteration 351 / 4080) loss: 78.693424
(Iteration 361 / 4080) loss: 70.898482
(Iteration 371 / 4080) loss: 81.447244
(Iteration 381 / 4080) loss: 70.066265
(Iteration 391 / 4080) loss: 61.058641
(Iteration 401 / 4080) loss: 72.449787
Train Acc: 0.356 Epoch: 4
(Iteration 411 / 4080) loss: 63.449963
(Iteration 421 / 4080) loss: 78.789085
(Iteration 431 / 4080) loss: 69.417627
(Iteration 441 / 4080) loss: 67.666669
(Iteration 451 / 4080) loss: 67.124619
(Iteration 461 / 4080) loss: 73.792971
(Iteration 471 / 4080) loss: 62.146597
(Iteration 481 / 4080) loss: 74.541381
(Iteration 491 / 4080) loss: 62.281561
(Iteration 501 / 4080) loss: 66.225705
Train Acc: 0.442 Epoch: 5
(Iteration 511 / 4080) loss: 65.534643
(Iteration 521 / 4080) loss: 70.657424
(Iteration 531 / 4080) loss: 65.749492
(Iteration 541 / 4080) loss: 73.867686
(Iteration 551 / 4080) loss: 63.895258
(Iteration 561 / 4080) loss: 59.221029
(Iteration 571 / 4080) loss: 61.699074
(Iteration 581 / 4080) loss: 71.171408
(Iteration 591 / 4080) loss: 62.512910
(Iteration 601 / 4080) loss: 63.608356
(Iteration 611 / 4080) loss: 54.974616
Train Acc: 0.416 Epoch: 6
(Iteration 621 / 4080) loss: 65.952624
(Iteration 631 / 4080) loss: 56.105604
(Iteration 641 / 4080) loss: 65.768186
(Iteration 651 / 4080) loss: 59.824124
(Iteration 661 / 4080) loss: 63.353164
(Iteration 671 / 4080) loss: 67.216333
(Iteration 681 / 4080) loss: 54.594717
(Iteration 691 / 4080) loss: 57.107135
(Iteration 701 / 4080) loss: 52.843627
(Iteration 711 / 4080) loss: 60.806480
Train Acc: 0.458 Epoch: 7
(Iteration 721 / 4080) loss: 57.178841
(Iteration 731 / 4080) loss: 47.685053
(Iteration 741 / 4080) loss: 57.150996
(Iteration 751 / 4080) loss: 57.551471
(Iteration 761 / 4080) loss: 55.325566
(Iteration 771 / 4080) loss: 65.866367
(Iteration 781 / 4080) loss: 62.961289
(Iteration 791 / 4080) loss: 56.190140
(Iteration 801 / 4080) loss: 57.656035
(Iteration 811 / 4080) loss: 58.465405
Train Acc: 0.432 Epoch: 8
(Iteration 821 / 4080) loss: 57.509390
(Iteration 831 / 4080) loss: 60.429577
(Iteration 841 / 4080) loss: 55.644376
(Iteration 851 / 4080) loss: 55.613299
(Iteration 861 / 4080) loss: 53.235929
(Iteration 871 / 4080) loss: 58.554495
(Iteration 881 / 4080) loss: 46.283648
(Iteration 891 / 4080) loss: 62.838796
(Iteration 901 / 4080) loss: 60.267118
(Iteration 911 / 4080) loss: 49.133445
Train Acc: 0.422 Epoch: 9
(Iteration 921 / 4080) loss: 51.398694
(Iteration 931 / 4080) loss: 50.188609
(Iteration 941 / 4080) loss: 54.567296
(Iteration 951 / 4080) loss: 44.651145
(Iteration 961 / 4080) loss: 50.356914
(Iteration 971 / 4080) loss: 45.628768
(Iteration 981 / 4080) loss: 60.043528
(Iteration 991 / 4080) loss: 57.222282
(Iteration 1001 / 4080) loss: 51.138769
(Iteration 1011 / 4080) loss: 62.579822
Train Acc: 0.472 Epoch: 10
(Iteration 1021 / 4080) loss: 63.258103
(Iteration 1031 / 4080) loss: 46.194608
(Iteration 1041 / 4080) loss: 58.263421
(Iteration 1051 / 4080) loss: 52.391129
(Iteration 1061 / 4080) loss: 55.050208
(Iteration 1071 / 4080) loss: 50.336007
(Iteration 1081 / 4080) loss: 47.066548
(Iteration 1091 / 4080) loss: 54.523618
(Iteration 1101 / 4080) loss: 42.445089
(Iteration 1111 / 4080) loss: 55.052287
(Iteration 1121 / 4080) loss: 55.923015
Train Acc: 0.496 Epoch: 11
(Iteration 1131 / 4080) loss: 50.049285
(Iteration 1141 / 4080) loss: 50.680935
(Iteration 1151 / 4080) loss: 53.382449
(Iteration 1161 / 4080) loss: 44.521598
(Iteration 1171 / 4080) loss: 47.295442
(Iteration 1181 / 4080) loss: 55.101853
(Iteration 1191 / 4080) loss: 40.179459
(Iteration 1201 / 4080) loss: 39.225067
(Iteration 1211 / 4080) loss: 49.846300
(Iteration 1221 / 4080) loss: 44.288436
Train Acc: 0.476 Epoch: 12
(Iteration 1231 / 4080) loss: 51.421439
(Iteration 1241 / 4080) loss: 43.515075
(Iteration 1251 / 4080) loss: 36.771503
(Iteration 1261 / 4080) loss: 53.552175
(Iteration 1271 / 4080) loss: 45.154264
(Iteration 1281 / 4080) loss: 46.575503
(Iteration 1291 / 4080) loss: 55.528734
(Iteration 1301 / 4080) loss: 50.224500
(Iteration 1311 / 4080) loss: 40.107412
(Iteration 1321 / 4080) loss: 48.540680
Train Acc: 0.516 Epoch: 13
(Iteration 1331 / 4080) loss: 53.127071
(Iteration 1341 / 4080) loss: 47.868240
(Iteration 1351 / 4080) loss: 54.191239
(Iteration 1361 / 4080) loss: 51.922258
(Iteration 1371 / 4080) loss: 48.556818
(Iteration 1381 / 4080) loss: 53.795165
(Iteration 1391 / 4080) loss: 42.483527
(Iteration 1401 / 4080) loss: 46.982328
(Iteration 1411 / 4080) loss: 54.231581
(Iteration 1421 / 4080) loss: 48.455691
Train Acc: 0.51 Epoch: 14
(Iteration 1431 / 4080) loss: 42.973114
(Iteration 1441 / 4080) loss: 41.023021
(Iteration 1451 / 4080) loss: 49.842482
(Iteration 1461 / 4080) loss: 46.087429
(Iteration 1471 / 4080) loss: 51.099092
(Iteration 1481 / 4080) loss: 50.624882
(Iteration 1491 / 4080) loss: 43.508224
(Iteration 1501 / 4080) loss: 51.936226
(Iteration 1511 / 4080) loss: 51.780903
(Iteration 1521 / 4080) loss: 42.414423
Train Acc: 0.476 Epoch: 15
(Iteration 1531 / 4080) loss: 36.125996
(Iteration 1541 / 4080) loss: 46.405975
(Iteration 1551 / 4080) loss: 54.782510
(Iteration 1561 / 4080) loss: 44.104148
(Iteration 1571 / 4080) loss: 48.669312
(Iteration 1581 / 4080) loss: 44.815947
(Iteration 1591 / 4080) loss: 41.449603
(Iteration 1601 / 4080) loss: 49.582011
(Iteration 1611 / 4080) loss: 40.451917
(Iteration 1621 / 4080) loss: 38.787184
(Iteration 1631 / 4080) loss: 44.929890
Train Acc: 0.506 Epoch: 16
(Iteration 1641 / 4080) loss: 41.978165
(Iteration 1651 / 4080) loss: 46.640636
(Iteration 1661 / 4080) loss: 42.146772
(Iteration 1671 / 4080) loss: 47.130496
(Iteration 1681 / 4080) loss: 54.343874
(Iteration 1691 / 4080) loss: 49.931606
(Iteration 1701 / 4080) loss: 46.326601
(Iteration 1711 / 4080) loss: 46.160907
(Iteration 1721 / 4080) loss: 45.826882
(Iteration 1731 / 4080) loss: 55.002513
Train Acc: 0.544 Epoch: 17
(Iteration 1741 / 4080) loss: 45.810722
(Iteration 1751 / 4080) loss: 41.513789
(Iteration 1761 / 4080) loss: 46.852449
(Iteration 1771 / 4080) loss: 49.449610
(Iteration 1781 / 4080) loss: 48.889068
(Iteration 1791 / 4080) loss: 38.777474
(Iteration 1801 / 4080) loss: 51.053886
(Iteration 1811 / 4080) loss: 43.805795
(Iteration 1821 / 4080) loss: 44.413690
(Iteration 1831 / 4080) loss: 53.150889
Train Acc: 0.514 Epoch: 18
(Iteration 1841 / 4080) loss: 40.814950
(Iteration 1851 / 4080) loss: 42.469215
(Iteration 1861 / 4080) loss: 48.334874
(Iteration 1871 / 4080) loss: 61.230337
(Iteration 1881 / 4080) loss: 47.792178
(Iteration 1891 / 4080) loss: 47.832029
(Iteration 1901 / 4080) loss: 42.649317
(Iteration 1911 / 4080) loss: 42.232023
(Iteration 1921 / 4080) loss: 45.164719
(Iteration 1931 / 4080) loss: 44.914171
Train Acc: 0.54 Epoch: 19
(Iteration 1941 / 4080) loss: 40.731393
(Iteration 1951 / 4080) loss: 41.204326
(Iteration 1961 / 4080) loss: 43.257137
(Iteration 1971 / 4080) loss: 42.692293
(Iteration 1981 / 4080) loss: 49.099687
(Iteration 1991 / 4080) loss: 48.663156
(Iteration 2001 / 4080) loss: 46.914944
(Iteration 2011 / 4080) loss: 46.128689
(Iteration 2021 / 4080) loss: 38.598232
(Iteration 2031 / 4080) loss: 45.304746
Train Acc: 0.512 Epoch: 20
(Iteration 2041 / 4080) loss: 49.974403
(Iteration 2051 / 4080) loss: 50.878532
(Iteration 2061 / 4080) loss: 48.229450
(Iteration 2071 / 4080) loss: 45.490259
(Iteration 2081 / 4080) loss: 53.035377
(Iteration 2091 / 4080) loss: 50.336294
(Iteration 2101 / 4080) loss: 38.175802
(Iteration 2111 / 4080) loss: 36.953676
(Iteration 2121 / 4080) loss: 45.560814
(Iteration 2131 / 4080) loss: 34.756708
(Iteration 2141 / 4080) loss: 47.339128
Train Acc: 0.56 Epoch: 21
(Iteration 2151 / 4080) loss: 45.982063
(Iteration 2161 / 4080) loss: 43.864747
(Iteration 2171 / 4080) loss: 48.631712
(Iteration 2181 / 4080) loss: 38.631537
(Iteration 2191 / 4080) loss: 45.347233
(Iteration 2201 / 4080) loss: 50.893203
(Iteration 2211 / 4080) loss: 40.140906
(Iteration 2221 / 4080) loss: 46.071360
(Iteration 2231 / 4080) loss: 38.677479
(Iteration 2241 / 4080) loss: 45.069474
Train Acc: 0.542 Epoch: 22
(Iteration 2251 / 4080) loss: 38.827192
(Iteration 2261 / 4080) loss: 43.612626
(Iteration 2271 / 4080) loss: 42.965681
(Iteration 2281 / 4080) loss: 39.128026
(Iteration 2291 / 4080) loss: 41.621681
(Iteration 2301 / 4080) loss: 41.575151
(Iteration 2311 / 4080) loss: 48.706680
(Iteration 2321 / 4080) loss: 49.105836
(Iteration 2331 / 4080) loss: 37.725393
(Iteration 2341 / 4080) loss: 38.033405
Train Acc: 0.536 Epoch: 23
(Iteration 2351 / 4080) loss: 45.275303
(Iteration 2361 / 4080) loss: 50.460097
(Iteration 2371 / 4080) loss: 52.657414
(Iteration 2381 / 4080) loss: 40.823732
(Iteration 2391 / 4080) loss: 47.430555
(Iteration 2401 / 4080) loss: 40.829576
(Iteration 2411 / 4080) loss: 37.538313
(Iteration 2421 / 4080) loss: 38.605652
(Iteration 2431 / 4080) loss: 42.490537
(Iteration 2441 / 4080) loss: 44.921769
Train Acc: 0.544 Epoch: 24
(Iteration 2451 / 4080) loss: 42.642865
(Iteration 2461 / 4080) loss: 48.676434
(Iteration 2471 / 4080) loss: 40.508007
(Iteration 2481 / 4080) loss: 36.970902
(Iteration 2491 / 4080) loss: 38.170538
(Iteration 2501 / 4080) loss: 37.159150
(Iteration 2511 / 4080) loss: 39.059636
(Iteration 2521 / 4080) loss: 47.198203
(Iteration 2531 / 4080) loss: 40.874609
(Iteration 2541 / 4080) loss: 46.869405
Train Acc: 0.504 Epoch: 25
(Iteration 2551 / 4080) loss: 48.751745
(Iteration 2561 / 4080) loss: 42.058236
(Iteration 2571 / 4080) loss: 35.672237
(Iteration 2581 / 4080) loss: 39.293172
(Iteration 2591 / 4080) loss: 42.934318
(Iteration 2601 / 4080) loss: 45.063533
(Iteration 2611 / 4080) loss: 48.630337
(Iteration 2621 / 4080) loss: 45.254764
(Iteration 2631 / 4080) loss: 43.264797
(Iteration 2641 / 4080) loss: 35.923091
(Iteration 2651 / 4080) loss: 46.020328
Train Acc: 0.494 Epoch: 26
(Iteration 2661 / 4080) loss: 43.200047
(Iteration 2671 / 4080) loss: 46.852637
(Iteration 2681 / 4080) loss: 42.352880
(Iteration 2691 / 4080) loss: 36.517732
(Iteration 2701 / 4080) loss: 45.952340
(Iteration 2711 / 4080) loss: 40.098523
(Iteration 2721 / 4080) loss: 36.956289
(Iteration 2731 / 4080) loss: 39.268037
(Iteration 2741 / 4080) loss: 44.824148
(Iteration 2751 / 4080) loss: 37.665359
Train Acc: 0.506 Epoch: 27
(Iteration 2761 / 4080) loss: 42.183991
(Iteration 2771 / 4080) loss: 36.211356
(Iteration 2781 / 4080) loss: 35.869246
(Iteration 2791 / 4080) loss: 37.913146
(Iteration 2801 / 4080) loss: 42.624271
(Iteration 2811 / 4080) loss: 43.081140
(Iteration 2821 / 4080) loss: 44.523792
(Iteration 2831 / 4080) loss: 51.711548
(Iteration 2841 / 4080) loss: 45.407006
(Iteration 2851 / 4080) loss: 42.003007
Train Acc: 0.562 Epoch: 28
(Iteration 2861 / 4080) loss: 35.971011
(Iteration 2871 / 4080) loss: 33.789998
(Iteration 2881 / 4080) loss: 39.896176
(Iteration 2891 / 4080) loss: 35.223816
(Iteration 2901 / 4080) loss: 40.571639
(Iteration 2911 / 4080) loss: 41.239206
(Iteration 2921 / 4080) loss: 41.916159
(Iteration 2931 / 4080) loss: 41.109327
(Iteration 2941 / 4080) loss: 40.771408
(Iteration 2951 / 4080) loss: 44.183128
Train Acc: 0.548 Epoch: 29
(Iteration 2961 / 4080) loss: 35.269639
(Iteration 2971 / 4080) loss: 46.748559
(Iteration 2981 / 4080) loss: 40.466658
(Iteration 2991 / 4080) loss: 38.609403
(Iteration 3001 / 4080) loss: 41.444077
(Iteration 3011 / 4080) loss: 51.176928
(Iteration 3021 / 4080) loss: 39.096420
(Iteration 3031 / 4080) loss: 48.257370
(Iteration 3041 / 4080) loss: 45.518322
(Iteration 3051 / 4080) loss: 42.258331
Train Acc: 0.534 Epoch: 30
(Iteration 3061 / 4080) loss: 37.893242
(Iteration 3071 / 4080) loss: 39.023874
(Iteration 3081 / 4080) loss: 45.851955
(Iteration 3091 / 4080) loss: 40.325831
(Iteration 3101 / 4080) loss: 41.732582
(Iteration 3111 / 4080) loss: 42.097861
(Iteration 3121 / 4080) loss: 45.853049
(Iteration 3131 / 4080) loss: 35.933043
(Iteration 3141 / 4080) loss: 35.275474
(Iteration 3151 / 4080) loss: 42.046741
(Iteration 3161 / 4080) loss: 41.490822
Train Acc: 0.546 Epoch: 31
(Iteration 3171 / 4080) loss: 36.990687
(Iteration 3181 / 4080) loss: 41.310781
(Iteration 3191 / 4080) loss: 36.079200
(Iteration 3201 / 4080) loss: 42.199874
(Iteration 3211 / 4080) loss: 48.319597
(Iteration 3221 / 4080) loss: 38.446933
(Iteration 3231 / 4080) loss: 40.848476
(Iteration 3241 / 4080) loss: 46.829986
(Iteration 3251 / 4080) loss: 44.090225
(Iteration 3261 / 4080) loss: 47.223777
Train Acc: 0.532 Epoch: 32
(Iteration 3271 / 4080) loss: 43.381118
(Iteration 3281 / 4080) loss: 44.727956
(Iteration 3291 / 4080) loss: 51.140299
(Iteration 3301 / 4080) loss: 36.291588
(Iteration 3311 / 4080) loss: 38.110810
(Iteration 3321 / 4080) loss: 48.224419
(Iteration 3331 / 4080) loss: 52.299024
(Iteration 3341 / 4080) loss: 38.991137
(Iteration 3351 / 4080) loss: 37.155829
(Iteration 3361 / 4080) loss: 42.350827
Train Acc: 0.538 Epoch: 33
(Iteration 3371 / 4080) loss: 30.895462
(Iteration 3381 / 4080) loss: 39.665787
(Iteration 3391 / 4080) loss: 47.073375
(Iteration 3401 / 4080) loss: 36.192631
(Iteration 3411 / 4080) loss: 45.307365
(Iteration 3421 / 4080) loss: 36.450481
(Iteration 3431 / 4080) loss: 42.779223
(Iteration 3441 / 4080) loss: 39.469109
(Iteration 3451 / 4080) loss: 43.744565
(Iteration 3461 / 4080) loss: 46.363520
Train Acc: 0.542 Epoch: 34
(Iteration 3471 / 4080) loss: 43.874470
(Iteration 3481 / 4080) loss: 41.213327
(Iteration 3491 / 4080) loss: 40.051140
(Iteration 3501 / 4080) loss: 36.932749
(Iteration 3511 / 4080) loss: 43.427336
(Iteration 3521 / 4080) loss: 43.739905
(Iteration 3531 / 4080) loss: 33.756894
(Iteration 3541 / 4080) loss: 39.966801
(Iteration 3551 / 4080) loss: 36.677966
(Iteration 3561 / 4080) loss: 44.092002
Train Acc: 0.552 Epoch: 35
(Iteration 3571 / 4080) loss: 35.779247
(Iteration 3581 / 4080) loss: 44.286837
(Iteration 3591 / 4080) loss: 34.729587
(Iteration 3601 / 4080) loss: 44.296701
(Iteration 3611 / 4080) loss: 39.306213
(Iteration 3621 / 4080) loss: 47.671384
(Iteration 3631 / 4080) loss: 51.577474
(Iteration 3641 / 4080) loss: 45.347489
(Iteration 3651 / 4080) loss: 46.347040
(Iteration 3661 / 4080) loss: 47.445011
(Iteration 3671 / 4080) loss: 44.839620
Train Acc: 0.56 Epoch: 36
(Iteration 3681 / 4080) loss: 42.111231
(Iteration 3691 / 4080) loss: 37.543445
(Iteration 3701 / 4080) loss: 49.152558
(Iteration 3711 / 4080) loss: 44.743960
(Iteration 3721 / 4080) loss: 45.847970
(Iteration 3731 / 4080) loss: 40.793035
(Iteration 3741 / 4080) loss: 51.562598
(Iteration 3751 / 4080) loss: 42.240236
(Iteration 3761 / 4080) loss: 38.769117
(Iteration 3771 / 4080) loss: 46.702121
Train Acc: 0.566 Epoch: 37
(Iteration 3781 / 4080) loss: 41.946575
(Iteration 3791 / 4080) loss: 39.831859
(Iteration 3801 / 4080) loss: 36.310599
(Iteration 3811 / 4080) loss: 39.743589
(Iteration 3821 / 4080) loss: 46.937713
(Iteration 3831 / 4080) loss: 34.719565
(Iteration 3841 / 4080) loss: 39.394083
(Iteration 3851 / 4080) loss: 37.567446
(Iteration 3861 / 4080) loss: 40.383151
(Iteration 3871 / 4080) loss: 44.926255
Train Acc: 0.524 Epoch: 38
(Iteration 3881 / 4080) loss: 46.987871
(Iteration 3891 / 4080) loss: 36.621261
(Iteration 3901 / 4080) loss: 41.663433
(Iteration 3911 / 4080) loss: 41.082816
(Iteration 3921 / 4080) loss: 43.363036
(Iteration 3931 / 4080) loss: 52.477655
(Iteration 3941 / 4080) loss: 48.082679
(Iteration 3951 / 4080) loss: 47.773147
(Iteration 3961 / 4080) loss: 34.960797
(Iteration 3971 / 4080) loss: 41.142013
Train Acc: 0.518 Epoch: 39
(Iteration 3981 / 4080) loss: 35.700299
(Iteration 3991 / 4080) loss: 46.554514
(Iteration 4001 / 4080) loss: 38.239814
(Iteration 4011 / 4080) loss: 36.694141
(Iteration 4021 / 4080) loss: 38.043694
(Iteration 4031 / 4080) loss: 35.220001
(Iteration 4041 / 4080) loss: 36.142707
(Iteration 4051 / 4080) loss: 50.304452
(Iteration 4061 / 4080) loss: 42.267336
(Iteration 4071 / 4080) loss: 49.949133
Train Acc: 0.562 Epoch: 40

In [6]:
print "Train:",
answ=model.loss(data_train[:1000,:], sample=True)
print (answ==data_train[:1000,-1]).mean()
minibatch = data_train[:10]
print "\n".join([words[x]+" "+words[y] for x,y in zip(model.loss(minibatch, sample=True), minibatch[:,-1])])

print
print "Test:",
answ=model.loss(data_test, sample=True)
print (answ==data_test[:,-1]).mean()
minibatch = data_test[:10]
print "\n".join([words[x]+" "+words[y] for x,y in zip(model.loss(minibatch, sample=True), minibatch[:,-1])])


Train: 0.542
servis servis
servis ev
servis tershane
servis bolum
kantin kantin
kantin kantin
araba kamyon
araba ev
araba araba
araba labaratuvar

Test: 0.281
tamirhane bolum
tamirhane tamirhane
tamirhane tamirhane
banyo tuvalet
servis servis
banyo banyo
servis labaratuvar
banyo araba
banyo araba
banyo servis

In [ ]:


In [ ]:


In [ ]:


In [ ]: