In [1]:
# Import TensorFlow >= 1.9 and enable eager execution
import tensorflow as tf

# Note: Once you enable eager execution, it cannot be disabled. 
tf.enable_eager_execution()

import numpy as np
import re
import random
import unidecode
import time


C:\ProgramData\Anaconda3\lib\site-packages\h5py\__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/yashkatariya/shakespeare.txt')


Downloading data from https://storage.googleapis.com/yashkatariya/shakespeare.txt
1122304/1115394 [==============================] - 1s 1us/step

In [3]:
text = unidecode.unidecode(open(path_to_file).read())
# length of text is the number of characters in it
print (len(text))
# unique contains all the unique characters in the file
unique = sorted(set(text))

# creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(unique)}
idx2char = {i:u for i, u in enumerate(unique)}
# setting the maximum length sentence we want for a single input in characters
max_length = 100

# length of the vocabulary in chars
vocab_size = len(unique)

# the embedding dimension 
embedding_dim = 256

# number of RNN (here GRU) units
units = 1024

# batch size 
BATCH_SIZE = 64

# buffer size to shuffle our dataset
BUFFER_SIZE = 10000


1115394

In [4]:
input_text = []
target_text = []

for f in range(0, len(text)-max_length, max_length):
    inps = text[f:f+max_length]
    targ = text[f+1:f+1+max_length]

    input_text.append([char2idx[i] for i in inps])
    target_text.append([char2idx[t] for t in targ])
    
print (np.array(input_text).shape)
print (np.array(target_text).shape)
dataset = tf.data.Dataset.from_tensor_slices((input_text, target_text)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(batch_size=BATCH_SIZE, drop_remainder=True)


(11153, 100)
(11153, 100)
WARNING:tensorflow:From <ipython-input-4-efa229c483f8>:14: batch_and_drop_remainder (from tensorflow.contrib.data.python.ops.batching) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.data.Dataset.batch(..., drop_remainder=True)`.

In [5]:
class Model(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, units, batch_size):
    super(Model, self).__init__()
    self.units = units
    self.batch_sz = batch_size

    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)

    if tf.test.is_gpu_available():
      self.gru = tf.keras.layers.CuDNNGRU(self.units, 
                                          return_sequences=True, 
                                          return_state=True, 
                                          recurrent_initializer='glorot_uniform')
    else:
      self.gru = tf.keras.layers.GRU(self.units, 
                                     return_sequences=True, 
                                     return_state=True, 
                                     recurrent_activation='sigmoid', 
                                     recurrent_initializer='glorot_uniform')

    self.fc = tf.keras.layers.Dense(vocab_size)
        
  def call(self, x, hidden):
    x = self.embedding(x)

    # output shape == (batch_size, max_length, hidden_size) 
    # states shape == (batch_size, hidden_size)

    # states variable to preserve the state of the model
    # this will be used to pass at every step to the model while training
    output, states = self.gru(x, initial_state=hidden)


    # reshaping the output so that we can pass it to the Dense layer
    # after reshaping the shape is (batch_size * max_length, hidden_size)
    output = tf.reshape(output, (-1, output.shape[2]))

    # The dense layer will output predictions for every time_steps(max_length)
    # output shape after the dense layer == (max_length * batch_size, vocab_size)
    x = self.fc(output)

    return x, states

In [6]:
model = Model(vocab_size, embedding_dim, units, BATCH_SIZE)
optimizer = tf.train.AdamOptimizer()

# using sparse_softmax_cross_entropy so that we don't have to create one-hot vectors
def loss_function(real, preds):
    return tf.losses.sparse_softmax_cross_entropy(labels=real, logits=preds)

In [7]:
# Training step

EPOCHS = 30

for epoch in range(EPOCHS):
    start = time.time()
    
    # initializing the hidden state at the start of every epoch
    hidden = model.reset_states()
    
    for (batch, (inp, target)) in enumerate(dataset):
          with tf.GradientTape() as tape:
              # feeding the hidden state back into the model
              # This is the interesting step
              predictions, hidden = model(inp, hidden)
              
              # reshaping the target because that's how the 
              # loss function expects it
              target = tf.reshape(target, (-1,))
              loss = loss_function(target, predictions)
              
          grads = tape.gradient(loss, model.variables)
          optimizer.apply_gradients(zip(grads, model.variables), global_step=tf.train.get_or_create_global_step())

          if batch % 100 == 0:
              print ('Epoch {} Batch {} Loss {:.4f}'.format(epoch+1,
                                                            batch,
                                                            loss))
    
    print ('Epoch {} Loss {:.4f}'.format(epoch+1, loss))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))


Epoch 1 Batch 0 Loss 4.1695
Epoch 1 Batch 100 Loss 2.1673
Epoch 1 Loss 1.9951
Time taken for 1 epoch 1174.355381011963 sec

Epoch 2 Batch 0 Loss 1.9240
Epoch 2 Batch 100 Loss 1.7606
Epoch 2 Loss 1.6903
Time taken for 1 epoch 1032.972647190094 sec

Epoch 3 Batch 0 Loss 1.6533
Epoch 3 Batch 100 Loss 1.5444
Epoch 3 Loss 1.5352
Time taken for 1 epoch 1030.8399131298065 sec

Epoch 4 Batch 0 Loss 1.4558
Epoch 4 Batch 100 Loss 1.4525
Epoch 4 Loss 1.4637
Time taken for 1 epoch 1019.3054502010345 sec

Epoch 5 Batch 0 Loss 1.3907
Epoch 5 Batch 100 Loss 1.3789
Epoch 5 Loss 1.3920
Time taken for 1 epoch 1037.3343195915222 sec

Epoch 6 Batch 0 Loss 1.3339
Epoch 6 Batch 100 Loss 1.3505
Epoch 6 Loss 1.3729
Time taken for 1 epoch 1035.5254275798798 sec

Epoch 7 Batch 0 Loss 1.2685
Epoch 7 Batch 100 Loss 1.3192
Epoch 7 Loss 1.3207
Time taken for 1 epoch 1038.3602838516235 sec

Epoch 8 Batch 0 Loss 1.2238
Epoch 8 Batch 100 Loss 1.2492
Epoch 8 Loss 1.2666
Time taken for 1 epoch 1065.1876459121704 sec

Epoch 9 Batch 0 Loss 1.1235
Epoch 9 Batch 100 Loss 1.2190
Epoch 9 Loss 1.2607
Time taken for 1 epoch 1441.918731212616 sec

Epoch 10 Batch 0 Loss 1.1330
Epoch 10 Batch 100 Loss 1.1619
Epoch 10 Loss 1.2168
Time taken for 1 epoch 1624.5265452861786 sec

Epoch 11 Batch 0 Loss 1.0685
Epoch 11 Batch 100 Loss 1.1521
Epoch 11 Loss 1.1479
Time taken for 1 epoch 1628.5719792842865 sec

Epoch 12 Batch 0 Loss 1.0658
Epoch 12 Batch 100 Loss 1.0685
Epoch 12 Loss 1.1177
Time taken for 1 epoch 1644.5875489711761 sec

Epoch 13 Batch 0 Loss 0.9818
Epoch 13 Batch 100 Loss 1.0577
Epoch 13 Loss 1.0414
Time taken for 1 epoch 1648.5339035987854 sec

Epoch 14 Batch 0 Loss 0.9207
Epoch 14 Batch 100 Loss 1.0688
Epoch 14 Loss 1.0654
Time taken for 1 epoch 1660.587124824524 sec

Epoch 15 Batch 0 Loss 0.9090
Epoch 15 Batch 100 Loss 1.0236
Epoch 15 Loss 1.0472
Time taken for 1 epoch 1726.7953419685364 sec

Epoch 16 Batch 0 Loss 0.8591
Epoch 16 Batch 100 Loss 0.9623
Epoch 16 Loss 0.9828
Time taken for 1 epoch 1669.5266633033752 sec

Epoch 17 Batch 0 Loss 0.8414
Epoch 17 Batch 100 Loss 0.9366
Epoch 17 Loss 0.9782
Time taken for 1 epoch 1713.5808050632477 sec

Epoch 18 Batch 0 Loss 0.7830
Epoch 18 Batch 100 Loss 0.8776
Epoch 18 Loss 0.9236
Time taken for 1 epoch 1678.7940528392792 sec

Epoch 19 Batch 0 Loss 0.7340
Epoch 19 Batch 100 Loss 0.8681
Epoch 19 Loss 0.9012
Time taken for 1 epoch 1574.3701367378235 sec

Epoch 20 Batch 0 Loss 0.7416
Epoch 20 Batch 100 Loss 0.8410
Epoch 20 Loss 0.8708
Time taken for 1 epoch 1015.0595569610596 sec

Epoch 21 Batch 0 Loss 0.6894
Epoch 21 Batch 100 Loss 0.8359
Epoch 21 Loss 0.8697
Time taken for 1 epoch 1014.5920464992523 sec

Epoch 22 Batch 0 Loss 0.6894
Epoch 22 Batch 100 Loss 0.8011
Epoch 22 Loss 0.8434
Time taken for 1 epoch 1014.4523389339447 sec

Epoch 23 Batch 0 Loss 0.6948
Epoch 23 Batch 100 Loss 0.7862
Epoch 23 Loss 0.8617
Time taken for 1 epoch 1014.7266185283661 sec

Epoch 24 Batch 0 Loss 0.6528
Epoch 24 Batch 100 Loss 0.7845
Epoch 24 Loss 0.8415
Time taken for 1 epoch 1017.875617980957 sec

Epoch 25 Batch 0 Loss 0.6274
Epoch 25 Batch 100 Loss 0.7903
Epoch 25 Loss 0.7860
Time taken for 1 epoch 1014.7340230941772 sec

Epoch 26 Batch 0 Loss 0.6317
Epoch 26 Batch 100 Loss 0.7692
Epoch 26 Loss 0.8090
Time taken for 1 epoch 1013.918208360672 sec

Epoch 27 Batch 0 Loss 0.5804
Epoch 27 Batch 100 Loss 0.7597
Epoch 27 Loss 0.7767
Time taken for 1 epoch 1014.0555167198181 sec

Epoch 28 Batch 0 Loss 0.6114
Epoch 28 Batch 100 Loss 0.7109
Epoch 28 Loss 0.7425
Time taken for 1 epoch 1012.9030754566193 sec

Epoch 29 Batch 0 Loss 0.5900
Epoch 29 Batch 100 Loss 0.7467
Epoch 29 Loss 0.7646
Time taken for 1 epoch 1014.1063311100006 sec

Epoch 30 Batch 0 Loss 0.5894
Epoch 30 Batch 100 Loss 0.6999
Epoch 30 Loss 0.7616
Time taken for 1 epoch 1014.307460308075 sec


In [8]:
# Evaluation step(generating text using the model learned)

# number of characters to generate
num_generate = 1000

# You can change the start string to experiment
start_string = 'T'
# converting our start string to numbers(vectorizing!) 
input_eval = [char2idx[s] for s in start_string]
input_eval = tf.expand_dims(input_eval, 0)

# empty string to store our results
text_generated = ''

# low temperatures results in more predictable text.
# higher temperatures results in more surprising text
# experiment to find the best setting
temperature = 2.0

# hidden state shape == (batch_size, number of rnn units); here batch size == 1
hidden = [tf.zeros((1, units))]
for i in range(num_generate):
    predictions, hidden = model(input_eval, hidden)

    # using a multinomial distribution to predict the word returned by the model
    predictions = predictions / temperature
    predicted_id = tf.multinomial(tf.exp(predictions), num_samples=1)[0][0].numpy()
    
    # We pass the predicted word as the next input to the model
    # along with the previous hidden state
    input_eval = tf.expand_dims([predicted_id], 0)
    
    text_generated += idx2char[predicted_id]

print (start_string + text_generated)


Thus to make looks,
When they do hate the daughter of a fair uncle's son,
Being put to pray for you to the world nor heaven,
That seeming would be talk'd with peace,
And over-ship more wondering in his breast.
What shall we do?

CLARENCE:

WARWICK:
I have some foot my gracious lord, I tell you, fellow, you may say.

HORTENSIO:
The master, my lord,
I have seen them in the house of a man
Your part in him: wherefore do I stay here be claim by on my knee,
And that the entertainment was the duke made bold words in prodigy,
He should take life to be the fault that thou hast been,
And threatened weeds at home to endure
My dearth to use my part from the heart to go.

BAPTISTA:
I must confess your grace to come, my gracious lord.

QUEEN MARGARET:
Ay, now be sent to them.

BIONDELLO:
Sir, my mistress.

KING RICHARD III:
Here pity that thou hast said enough commonwealth.
What say you, Signior Gremio?

GREMIO:
And may not young men die.

First Servingman:
By my house we would have said for this mil

In [ ]: