In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# load the dataset
import os
with open('sharespeare-kaparthy.txt') as f:
    raw = f.read()
print raw[:1000] # print the first 100 characters


First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citizens, the patricians good.
What authority surfeits on would relieve us: if they
would yield us but the superfluity, while it were
wholesome, we might guess they relieved us humanely;
but they think we are too dear: the leanness that
afflicts us, the object of our misery, is as an
inventory to particularise their abundance; our
sufferance is a gain to them Let us revenge this with
our pikes, ere we become rakes: for the gods know I
speak this in hunger for bread, not in thirst for revenge.



In [3]:
# convert raw data into indices
import numpy as np
from pystacks.utils.text.vocab import Vocab

vocab = Vocab(unk=False)
data = np.array([vocab.add(c) for c in raw])
print data[:100] # print the first 100 characters


[ 0  1  2  3  4  5  6  1  4  1  7  8  9 10 11 12  8 13 14  2  8  5 15  8  5
 16  2 14 17  8  8 18  5 19  9 20  5 13 21  2  4 22  8  2 23  5 22  8 19  2
  5 24  8  5  3 16  8 19 25 26 11 11 27 28 28 10 11 29 16  8 19 25 23  5  3
 16  8 19 25 26 11 11  0  1  2  3  4  5  6  1  4  1  7  8  9 10 11 30 14 21]

In [4]:
def get_batch(ngrams=10, batch_size=100):
    X, Y = [], []
    for i in np.random.randint(len(data)-ngrams-1, size=batch_size):
        x = data[i:i+ngrams]
        y = data[i+1:i+ngrams+1]
        X.append(x)
        Y.append(y)
    X = np.array(X, dtype='int32').reshape(batch_size, ngrams, 1)
    Y = np.array(Y, dtype='int32')
    return X, Y

X, Y = get_batch()
print X.shape # dim0 is batch size, dim1 is time steps, dim2 is feature size
print Y.shape # dim0 is batch size, dim2 is time steps

def one_hot(y):
    Y = np.zeros( list(y.shape) + [len(vocab)])
    for batch in xrange(Y.shape[0]):
        for time in xrange(Y.shape[1]):
            Y[batch, time, y[batch, time]] = 1
    return Y.astype('float32')

print one_hot(Y).shape


(100, 10, 1)
(100, 10)
(100, 10, 65)

In [5]:
# make a character model
from theano import tensor as T, function
from pystacks.layers.container import Recurrent
from pystacks.layers.memory import GRUMemoryLayer, LSTMMemoryLayer
from pystacks.layers.lookup import LookupTable
from pystacks.layers.common import LinearLayer, Tanh, Softmax, Dropout
from pystacks.criteria import cross_entropy_loss
from pystacks.transformer import UnitNorm

emb_size = 20
h1_size = 500
h2_size = 500

net = Recurrent([
        LookupTable(len(vocab), emb_size, E_transform=UnitNorm()), 
        LSTMMemoryLayer(emb_size, h1_size), 
        LSTMMemoryLayer(h1_size, h2_size), 
        LinearLayer(h2_size, len(vocab)), 
        Softmax()])

sym_X = T.itensor3()
sym_prob = net.forward(sym_X, return_sequence=True, truncate_grad=50)
sym_pred = sym_prob.argmax(axis=-1)

f_pred = function([sym_X], [sym_prob, sym_pred])


Using gpu device 0: GeForce GTX 760
/usr/local/lib/python2.7/site-packages/theano/scan_module/scan_perform_ext.py:133: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility
  from scan_perform.scan_perform import *
Couldn't import dot_parser, loading of dot files will not be possible.

In [6]:
original_weights = {name:param.var.get_value() for name, param in net.params.items()}

def reset_weights():
    for name, param in net.params.items():
        param.var.set_value(original_weights[name])

In [ ]:
prob, pred = f_pred(X)
print prob.shape # (batch_size, time_step, probabilities)
print pred.shape # (batch_size, class_label at each time step)


(100, 10, 65)
(100, 10)

In [ ]:
from pystacks.optimizer import RMSProp
from pystacks.gradient_transformer import ClipGradientNorm

optimizer = RMSProp()

sym_Y = T.ftensor3()

sym_loss = cross_entropy_loss(sym_prob, sym_Y)
sym_acc = T.mean(T.eq(sym_pred, sym_Y.argmax(-1)))
sym_lr = T.fscalar()

updates = net.grad_updates(sym_loss, lr=sym_lr, optimizer=optimizer, default_grad_transformer=ClipGradientNorm(20.))

train = function([sym_X, sym_Y, sym_lr], [sym_loss, sym_acc], updates=updates)
test = function([sym_X, sym_Y], [sym_loss, sym_acc])

In [ ]:
ngrams = 100
batch_size = 64
num_batches = 5000
print_every = 100
decay_rate = 1e-3
lr = 1e-2

from time import time

reset_weights()

start = time()
for i in xrange(num_batches):
    X, Y = get_batch(ngrams, batch_size)
    loss, acc = train(X, one_hot(Y), lr)
    lr *= 1. / (1. + decay_rate)
    
    if i % print_every == 0:
        print 'iteration', i, 'loss', loss, 'acc', acc, 'elapsed', time() - start
        start = time()

In [ ]:
chars = ['I']
for i in xrange(1000):
    in_ind = [vocab[c] for c in chars]
    prob, pred = f_pred(np.array([[in_ind[-ngrams:]]], dtype='int32').reshape(1, -1, 1))
    char = np.random.choice(vocab.index2word, p=prob[0, -1].flatten())
    chars.append(char)
print ''.join(chars)

In [ ]: