In [12]:
# As usual, a bit of setup
import time, os, json
import numpy as np
import matplotlib.pyplot as plt
import pickle
from gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from layers import *
from approach1 import *
from solver import *
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
%load_ext autoreload
%autoreload 2
def rel_error(x, y):
""" returns relative error """
return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
In [9]:
raw = pickle.load(open("stories.pck", "rb"))
print len(raw), "examples"
print max([len(x) for x in raw]), "supporting facts and questions at most"
print max([len(y.split(' ')) for y in x for x in raw]), "words per sentence at most"
_null, _start, _query, _end = "<NULL>", "<Start>", "<Query>", "<End>"
words = [_null, _start, _query, _end] + [q for ex in raw for sent in ex for w in sent.split(' ') for q in w.split(',')]
words = sorted(set(words))
word_to_idx = {w:i for i,w in enumerate(words)}
print len(words), "total words"
T = 70 # longest story, 35 words unfolding
T2= 7
data = []
for ex in raw:
sLen = 0
while ex[sLen].find(',')==-1:
sLen+=1
supports = word_to_idx[_null] * np.ones(T, dtype=int)
queries = word_to_idx[_null] * np.ones((len(ex)-sLen, 5), dtype=int)
pos=0
for idx, sent in enumerate(ex):
if idx<sLen:
sent = [word_to_idx[_start]] + [word_to_idx[x] for x in sent.split(' ')] + [word_to_idx[_end]]
supports[pos:pos+T2] = sent + [word_to_idx[_null]]*(T2-len(sent))
pos += T2
else:
sent = sent.split(',')[0]
sent = [word_to_idx[_query]] + [word_to_idx[x] for x in sent.split(' ')] + [word_to_idx[_end]]
sent = sent + [word_to_idx[_null]]*(5-len(sent)) #null pad the sentence
queries[idx-sLen, :] = sent
answers = np.asarray([word_to_idx[x.split(',')[1]] for x in ex[sLen:]]).reshape(len(ex)-sLen, 1)
for i in xrange(queries.shape[0]):
data.append(np.hstack((supports, queries[i,:], answers[i,:])))
data = np.asarray(data)
data_train = data[:-1000,:]
data_test = data[-1000:,:]
print data_train.shape
In [21]:
model = SeqNN(word_to_idx, cell_type='rnn', hidden_dim=256, wordvec_dim=512, sentlen=T2, storylen=T, qlen=5)
solver = SeqNNSolver(model, data_train[:50],
update_rule='adam',
num_epochs=200,
batch_size=25,
optim_config={
'learning_rate': 1e-3,
},
lr_decay=.995,
verbose=True, print_every=10,
)
solver.train()
# Plot the training losses
plt.plot(solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()
In [36]:
minibatch = data_train[:50]
print "Train:",
answ=model.loss(minibatch, sample=True)
print (answ==minibatch[:,-1]).mean()
story=" ".join([words[x] for x in data_train[0,:T]]).split(_null)
for x in story:
if len(x.split(' '))>2:
print x.strip()
i=0
while np.all(data_train[0,:T]==data_train[i,:T]):
print " ".join([words[x] for x in data_train[i,T:T+6]])
i=i+1
print
print "Other answers:"
print "\n".join([words[x]+" "+words[y] for x,y in zip(model.loss(minibatch, sample=True), minibatch[:,-1])])
In [39]:
model = SeqNN(word_to_idx, cell_type='rnn', hidden_dim=256, wordvec_dim=512, sentlen=T2, storylen=T, qlen=5)
solver = SeqNNSolver(model, data_train,
update_rule='adam',
num_epochs=200,
batch_size=25,
optim_config={
'learning_rate': 5e-4,
},
lr_decay=.995,
verbose=True, print_every=10,
)
solver.train()
# Plot the training losses
plt.plot(solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()
In [40]:
# Plot the training losses
plt.plot(solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()
print "Train:",
answ=model.loss(data_train[:1000,:], sample=True)
print (answ==data_train[:1000,-1]).mean()
minibatch = data_train[:10]
print "\n".join([words[x]+" "+words[y] for x,y in zip(model.loss(minibatch, sample=True), minibatch[:,-1])])
print
print "Test:",
answ=model.loss(data_test, sample=True)
print (answ==data_test[:,-1]).mean()
minibatch = data_test[:10]
print "\n".join([words[x]+" "+words[y] for x,y in zip(model.loss(minibatch, sample=True), minibatch[:,-1])])
In [ ]:
In [ ]:
In [ ]:
In [ ]: