In [1]:
# Header
import numpy as np
path = '/home/ubuntu/data/training/keras/'
path = '/Users/jorge/data/training/keras/'
In [2]:
#Read book
text = open(path + "pg2000.txt").read().lower()
print('corpus length:', len(text))
chars = sorted(list(set(text)))
print('Chars list: ', chars)
print('total chars:', len(chars))
#Dictionaries to convert char to num & num to char
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))
In [3]:
# cut the text in semi-redundant sequences of maxlen characters
# One sentence of length 20 for each 3 characters
maxlen = 20
step = 3
sentences = []
next_chars = []
for i in range(300, len(text) - maxlen, step): #Start in line 30 to exclude Gutenberg header.
sentences.append(text[i: i + maxlen])
next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))
print(sentences[4996], '-', next_chars[4996])
In [ ]:
In [4]:
'''
X: One row by sentence
in each row a matrix of bool 0/1 of dim length_sentence x num_chars coding the sentence. Dummy variables
y: One row by sentence
in each row a vector of bool of lengt num_chars with 1 in the next char position
'''
print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.float32)
y = np.zeros((len(sentences), len(chars)), dtype=np.int64)
for i, sentence in enumerate(sentences):
for t, char in enumerate(sentence):
X[i, t, char_indices[char]] = 1
y[i, char_indices[next_chars[i]]] = 1
print('X shape: ',X.shape)
print('y shape: ',y.shape)
In [5]:
# Define the tensorflow graph
import tensorflow as tf
batch_size = 512
dim_lstm = 512
sess = tf.InteractiveSession()
x_input = tf.placeholder(tf.float32, shape=[batch_size, 20, 72])
y_input = tf.placeholder(tf.int64, shape=[batch_size, 72])
#word_lens = tf.placeholder(tf.int64, shape=[batch_size])
keep_prob = tf.placeholder(tf.float32)
learning_rate = tf.placeholder(tf.float32, name='learning_rate')
cell_1 = tf.nn.rnn_cell.LSTMCell(dim_lstm, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123))
cell_1 = tf.nn.rnn_cell.DropoutWrapper(cell_1, output_keep_prob=keep_prob)
#x_seq = [tf.squeeze(xi) for xi in tf.split(1, 20, x_input)]
lstm_outputs_1, _ = tf.nn.dynamic_rnn(cell_1, x_input, dtype=tf.float32, scope='rnn1')
cell_2 = tf.nn.rnn_cell.LSTMCell(dim_lstm, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123))
cell_2 = tf.nn.rnn_cell.DropoutWrapper(cell_2, output_keep_prob=keep_prob)
lstm_outputs_2, _ = tf.nn.dynamic_rnn(cell_2,lstm_outputs_1 , dtype=tf.float32, scope='rnn2')
'''
x_seq = [tf.squeeze(xi) for xi in tf.split(1, 20, x_input)]
cell_fw = tf.nn.rnn_cell.LSTMCell(dim_lstm, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123))
cell_bw = tf.nn.rnn_cell.LSTMCell(dim_lstm, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113))
outputs, output_state_fw, output_state_bw = tf.nn.bidirectional_rnn(cell_fw, cell_bw, x_seq,
dtype=tf.float32, sequence_length=word_lens)
print(outputs)
'''
W_dense = tf.Variable(tf.truncated_normal([dim_lstm, 72], stddev=0.1), name='W_dense')
b_dense = tf.Variable(tf.constant(0.1, shape=[72]), name='b_dense')
dense_output = tf.nn.relu(tf.matmul(lstm_outputs_2[:,-1,:], W_dense) + b_dense)
print(dense_output)
#Prediction
y_pred = tf.nn.softmax(dense_output)
# Loss function
global_step = tf.Variable(0, trainable=False)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(dense_output, y_input, name='cross_entropy')
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate,momentum=0.9)
train_op = optimizer.minimize(cross_entropy, global_step=global_step, name='trainer')
#Accuracy
correct_prediction = tf.equal(tf.argmax(dense_output,1), tf.argmax(y_input,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
In [6]:
print(lstm_outputs_2)
In [18]:
#batch generator
def batch_generator(x, y, batch_size=32):
from sklearn.utils import shuffle
x_shuffle, y_shuffle = shuffle(x, y, random_state=0)
for i in range(0, x.shape[0]-batch_size, batch_size):
x_batch = x_shuffle[i:i+batch_size,:]
y_batch = y_shuffle[i:i+batch_size]
yield x_batch, y_batch
seq = batch_generator(X, y, batch_size=20)
print(next(seq))
In [19]:
num_cases=0
batch_list = batch_generator(X[:600000], y[:600000], batch_size=batch_size)
for x_batch, y_batch in batch_list:
num_cases += x_batch.shape[0]
print(num_cases)
In [8]:
#Inicialization.
print('Initializing')
sess.run(tf.initialize_all_variables())
In [20]:
# Execute the graph to train a network
nEpochs = 50
for epoch in range(nEpochs):
ce_c=[]
acc_c=[]
ce_c_tst=[]
acc_c_tst=[]
batch_list = batch_generator(X[:600000], y[:600000], batch_size=batch_size)
for i, batch in enumerate(batch_list):
feedDict = {x_input: batch[0], y_input: batch[1],
keep_prob: 0.3, learning_rate: 0.001}
_, ce, acc = sess.run([train_op, cross_entropy, accuracy], feed_dict=feedDict)
ce_c += [ce]
acc_c += [acc]
if i%10==0:
print(epoch, np.mean(ce_c), np.mean(acc_c))
batch_list_tst = batch_generator(X[600000:], y[600000:], batch_size=batch_size)
for x_batch, y_batch in batch_list_tst:
feedDict = {x_input: x_batch, y_input: y_batch,
keep_prob: 1}
ce_tst, acc_tst = sess.run([cross_entropy, accuracy], feed_dict=feedDict)
ce_c_tst += [ce_tst]
acc_c_tst += [acc_tst]
print(epoch, np.mean(ce_c), np.mean(acc_c), np.mean(ce_c_tst), np.mean(acc_c_tst))
In [ ]:
In [15]:
batch_list_tst = batch_generator(X[600000:], y[600000:], batch_size=batch_size)
x_batch, y_batch = next(batch_list_tst)
feedDict = {x_input: x_batch, y_input: y_batch, keep_prob: 1}
pred = y_pred.eval(feed_dict=feedDict)
print(pred)
In [10]:
maxlen = 20
def sample(a, diversity=1.0):
'''
helper function to sample an index from a probability array
- Diversity control the level of randomless
'''
a = np.log(a) / diversity
a = np.exp(a) / np.sum(np.exp(a), axis=0)
a /= np.sum(a+0.0000001) #Precission error
return np.argmax(np.random.multinomial(1, a, 1))
def generate_text(sentence, diversity, current_model, num_char=400):
sentence_init = sentence
generated = ''
for i in range(400):
x = np.zeros((1, maxlen, len(chars)))
for t, char in enumerate(sentence):
x[0, t, char_indices[char]] = 1.
preds = current_model.predict(x, verbose=0)[0]
next_index = sample(preds, diversity)
next_char = indices_char[next_index]
generated += next_char
sentence = sentence[1:] + next_char
print()
print('DIVERSITY: ',diversity)
print(sentence_init + generated)
In [11]:
sentence = 'mire vuestra merced '
generate_text(sentence, 0.2, model1)
generate_text(sentence, 0.5, model1)
generate_text(sentence, 1, model1)
generate_text(sentence, 1.2, model1)