In [1]:
'''
I have taken original code from below mentioned URL. 
Used it to generate a sequence, the sequence will be of only 3 integers (1,2,3).
It reads 12 inputs and predict next output.
Author: Rowel Atienza
Project: https://github.com/roatienza/Deep-Learning-Experiments
'''

from __future__ import print_function

import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
import random
import collections
import time

In [2]:
start_time = time.time()
def elapsed(sec):
    if sec<60:
        return str(sec) + " sec"
    elif sec<(60*60):
        return str(sec/60) + " min"
    else:
        return str(sec/(60*60)) + " hr"

In [3]:
# Target log path
logs_path = 'rnn_code'
writer = tf.summary.FileWriter(logs_path)

# Text file containing words for training
training_file = 'input_code2.txt'

In [4]:
def read_data(fname):
    with open(fname) as f:
        content = f.read()
    #print(content)
    #content = [x.strip() for x in content]
    #content = [content[i].split(',') for i in range(len(content)-1)]
    #content = np.array(content)
    #content = np.reshape(content, [-1, ])
    content = content.split(',')
    #print(content)
    return content[:-1]

with open('inputcode.txt',encoding="utf8") as f: content = f.read() data = content.split(',') print(data)

content = list(content)

content = [content[i].split(',') for i in range(len(content))]

print(split(content))


In [5]:
training_data = read_data(training_file)
print("Loaded training data...")
print(training_data)

training_data = list(map(int, training_data))
print(training_data)


Loaded training data...
['1', '2', '1', '2', '1', '2', '1', '21', '2', '1', '2', '1', '3', '3', '3', '1', '2', '3', '2', '1', '2', '3', '1', '2', '3', '2', '1', '2', '3', '2', '1', '2', '3', '2', '1', '2', '3', '2', '1', '2', '3', '2', '1', '2', '3', '2', '1', '2', '2', '1', '2', '3', '2', '2', '2', '2', '1', '1', '1', '1', '2', '3', '2', '2', '3', '2']
[1, 2, 1, 2, 1, 2, 1, 21, 2, 1, 2, 1, 3, 3, 3, 1, 2, 3, 2, 1, 2, 3, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 2, 1, 2, 3, 2, 2, 2, 2, 1, 1, 1, 1, 2, 3, 2, 2, 3, 2]

In [6]:
print(training_data[:10])
print(len(training_data))


[1, 2, 1, 2, 1, 2, 1, 21, 2, 1]
66

def build_dataset(words): count = collections.Counter(words).mostcommon() dictionary = dict() for word, in count: dictionary[word] = len(dictionary) reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) return dictionary, reverse_dictionary

dictionary, reverse_dictionary = build_dataset(training_data) vocab_size = len(dictionary)

print(vocab_size) print(dictionary)

print(training_data) vocab = list(set(training_data)) print(vocab) vocab1 = ['1','2','3'] print(vocab1)


In [7]:
# Parameters
learning_rate = 0.001
training_iters = 50000
display_step = 1000
n_input = 12
vocab = [1,2,3]
vocab_size = 3
# number of units in RNN cell
n_hidden = 512
py_flag = False
# tf Graph input
x = tf.placeholder("float", [1, n_input, 1])
y = tf.placeholder("float", [1, vocab_size])

In [8]:
# RNN output node weights and biases
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, vocab_size]))
}
biases = {
    'out': tf.Variable(tf.random_normal([vocab_size]))
}

In [9]:
def RNN(x, weights, biases):

    # reshape to [1, n_input]
    x = tf.reshape(x, [-1, n_input])
    #print(x.shape)
    # Generate a n_input-element sequence of inputs
    # (eg. [had] [a] [general] -> [20] [6] [33])
    x = tf.split(x,n_input,1)

    # 1-layer LSTM with n_hidden units.
    rnn_cell = rnn.BasicLSTMCell(n_hidden)

    # generate prediction
    outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32)

    # there are n_input outputs but
    # we only want the last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

In [10]:
pred = RNN(x, weights, biases)

# Loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

# Model evaluation
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()
#testing    

#symbols_in_keys = [ [dictionary[ str(training_data[i])]] for i in range(offset, offset+n_input) ]
    #symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
    #input_set = np.reshape(training_data, [-1, n_input, 1])
    offset = 8
    symbols_out_onehot = np.zeros([vocab_size], dtype=float)
    #symbols_out_onehot[dictionary[str(training_data[offset+n_input])]] = 1.0
    symbols_out_onehot[int(training_data[offset+n_input]) - 1] = 1.0
    print(symbols_out_onehot)
    #symbols_out_onehot[training_data[offset+n_input]] = 1.0
    #symbols_out_onehot = np.reshape(symbols_out_onehot,[1,-1])

symbols_in_keys = [ [dictionary[ str(training_data[i])]] for i in range(offset, offset+n_input) ]

symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])

input_data = [training_data[i] for i in range(offset, offset+n_input)] input_set = np.reshape(input_data, [-1, n_input, 1]) print(input_set)


In [11]:
# Launch the graph
with tf.Session() as session:
    session.run(init)
    step = 0
    offset = random.randint(0,n_input+1)
    end_offset = n_input + 1
    acc_total = 0
    loss_total = 0

    writer.add_graph(session.graph)

    while step < training_iters: #training_iters=50000
        # Generate a minibatch. Add some randomness on selection process.
        if offset > (len(training_data)-end_offset):
            offset = random.randint(0, n_input+1)

        #symbols_in_keys = [ [dictionary[ str(training_data[i])]] for i in range(offset, offset+n_input) ]
        #symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
        input_data = [training_data[i] for i in range(offset, offset+n_input)]
        input_set = np.reshape(input_data, [-1, n_input, 1]) 
        #input_set = np.reshape(training_data, [-1, n_input, 1])
        #print(input_set, input_set.shape)
        symbols_out_onehot = np.zeros([vocab_size], dtype=float)
        symbols_out_onehot[int(training_data[offset+n_input]) - 1] = 1.0
        symbols_out_onehot = np.reshape(symbols_out_onehot,[1,-1])
        
        #print(symbols_out_onehot, symbols_out_onehot.shape)
        
        _, acc, loss, onehot_pred = session.run([optimizer, accuracy, cost, pred], \
                                                feed_dict={x: input_set, y: symbols_out_onehot})
        loss_total += loss
        acc_total += acc
        if (step+1) % display_step == 0:
            print("Iter= " + str(step+1) + ", Average Loss= " + \
                  "{:.6f}".format(loss_total/display_step) + ", Average Accuracy= " + \
                  "{:.2f}%".format(100*acc_total/display_step))
            acc_total = 0
            loss_total = 0
            symbols_in = [training_data[i] for i in range(offset, offset + n_input)]
            symbols_out = training_data[offset + n_input]
            #symbols_out_pred = reverse_dictionary[int(tf.argmax(onehot_pred, 1).eval())]
            symbols_out_pred = int(tf.argmax(onehot_pred, 1).eval() + 1)
            print("%s - [%s] vs [%s]" % (symbols_in,symbols_out,symbols_out_pred))
        step += 1
        offset += (n_input+1)
    print("Optimization Finished!")
    print("Elapsed time: ", elapsed(time.time() - start_time))
    print("Run on command line.")
    print("\ttensorboard --logdir=%s" % (logs_path))
    print("Point your web browser to: http://localhost:6006/")
    '''
    while True:
        prompt = "%s words: " % n_input
        sentence = input(prompt)
        sentence = sentence.strip()
        words = sentence.split(' ')
        if len(words) != n_input:
            continue
        try:
            symbols_in_keys = [dictionary[str(words[i])] for i in range(len(words))]
            for i in range(32):
                keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
                onehot_pred = session.run(pred, feed_dict={x: keys})
                onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
                sentence = "%s %s" % (sentence,reverse_dictionary[onehot_pred_index])
                symbols_in_keys = symbols_in_keys[1:]
                symbols_in_keys.append(onehot_pred_index)
            print(sentence)
        except:
            print("Word not in dictionary")'''


Iter= 1000, Average Loss= 1.011215, Average Accuracy= 60.80%
[3, 1, 2, 3, 2, 1, 2, 3, 1, 2, 3, 2] - [1] vs [1]
Iter= 2000, Average Loss= 0.414976, Average Accuracy= 85.10%
[3, 2, 1, 2, 3, 1, 2, 3, 2, 1, 2, 3] - [2] vs [2]
Iter= 3000, Average Loss= 0.263194, Average Accuracy= 91.20%
[3, 3, 3, 1, 2, 3, 2, 1, 2, 3, 1, 2] - [3] vs [3]
Iter= 4000, Average Loss= 0.170070, Average Accuracy= 95.00%
[3, 1, 2, 3, 2, 1, 2, 3, 1, 2, 3, 2] - [1] vs [1]
Iter= 5000, Average Loss= 0.166822, Average Accuracy= 94.80%
[1, 2, 1, 21, 2, 1, 2, 1, 3, 3, 3, 1] - [2] vs [2]
Iter= 6000, Average Loss= 0.124020, Average Accuracy= 96.50%
[1, 2, 3, 2, 2, 2, 2, 1, 1, 1, 1, 2] - [3] vs [3]
Iter= 7000, Average Loss= 0.127741, Average Accuracy= 96.50%
[2, 1, 3, 3, 3, 1, 2, 3, 2, 1, 2, 3] - [1] vs [1]
Iter= 8000, Average Loss= 0.109479, Average Accuracy= 97.40%
[2, 3, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2] - [3] vs [3]
Iter= 9000, Average Loss= 0.094220, Average Accuracy= 97.80%
[3, 3, 1, 2, 3, 2, 1, 2, 3, 1, 2, 3] - [2] vs [2]
Iter= 10000, Average Loss= 0.116833, Average Accuracy= 97.50%
[2, 1, 2, 1, 21, 2, 1, 2, 1, 3, 3, 3] - [1] vs [1]
Iter= 11000, Average Loss= 0.095238, Average Accuracy= 97.10%
[1, 2, 3, 2, 2, 2, 2, 1, 1, 1, 1, 2] - [3] vs [3]
Iter= 12000, Average Loss= 0.131454, Average Accuracy= 95.80%
[1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 2, 1] - [2] vs [2]
Iter= 13000, Average Loss= 0.113738, Average Accuracy= 97.40%
[3, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3] - [2] vs [2]
Iter= 14000, Average Loss= 0.083460, Average Accuracy= 97.80%
[1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2] - [1] vs [1]
Iter= 15000, Average Loss= 0.078838, Average Accuracy= 98.00%
[1, 2, 1, 2, 1, 21, 2, 1, 2, 1, 3, 3] - [3] vs [3]
Iter= 16000, Average Loss= 0.079949, Average Accuracy= 97.80%
[2, 1, 2, 1, 3, 3, 3, 1, 2, 3, 2, 1] - [2] vs [2]
Iter= 17000, Average Loss= 0.088632, Average Accuracy= 97.20%
[3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2] - [3] vs [3]
Iter= 18000, Average Loss= 0.076816, Average Accuracy= 98.40%
[2, 1, 2, 1, 2, 1, 21, 2, 1, 2, 1, 3] - [3] vs [3]
Iter= 19000, Average Loss= 0.077276, Average Accuracy= 98.10%
[3, 3, 1, 2, 3, 2, 1, 2, 3, 1, 2, 3] - [2] vs [2]
Iter= 20000, Average Loss= 0.112349, Average Accuracy= 96.70%
[2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1] - [2] vs [2]
Iter= 21000, Average Loss= 0.092480, Average Accuracy= 98.00%
[3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2] - [3] vs [3]
Iter= 22000, Average Loss= 0.068808, Average Accuracy= 98.60%
[2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1] - [2] vs [2]
Iter= 23000, Average Loss= 0.085760, Average Accuracy= 97.80%
[2, 3, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2] - [3] vs [3]
Iter= 24000, Average Loss= 0.076180, Average Accuracy= 98.00%
[3, 2, 1, 2, 3, 1, 2, 3, 2, 1, 2, 3] - [2] vs [2]
Iter= 25000, Average Loss= 0.075972, Average Accuracy= 98.10%
[3, 3, 1, 2, 3, 2, 1, 2, 3, 1, 2, 3] - [2] vs [2]
Iter= 26000, Average Loss= 0.089572, Average Accuracy= 98.10%
[1, 2, 1, 2, 1, 2, 1, 21, 2, 1, 2, 1] - [3] vs [3]
Iter= 27000, Average Loss= 0.088323, Average Accuracy= 97.90%
[1, 2, 3, 2, 2, 2, 2, 1, 1, 1, 1, 2] - [3] vs [3]
Iter= 28000, Average Loss= 0.090500, Average Accuracy= 97.60%
[2, 1, 2, 3, 1, 2, 3, 2, 1, 2, 3, 2] - [1] vs [1]
Iter= 29000, Average Loss= 0.081351, Average Accuracy= 97.20%
[2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3] - [2] vs [2]
Iter= 30000, Average Loss= 0.122264, Average Accuracy= 97.40%
[2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3] - [2] vs [2]
Iter= 31000, Average Loss= 0.111887, Average Accuracy= 97.60%
[1, 2, 1, 21, 2, 1, 2, 1, 3, 3, 3, 1] - [2] vs [2]
Iter= 32000, Average Loss= 0.085379, Average Accuracy= 97.40%
[21, 2, 1, 2, 1, 3, 3, 3, 1, 2, 3, 2] - [1] vs [1]
Iter= 33000, Average Loss= 0.063291, Average Accuracy= 99.00%
[2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1] - [2] vs [2]
Iter= 34000, Average Loss= 0.228956, Average Accuracy= 96.10%
[2, 1, 2, 2, 1, 2, 3, 2, 2, 2, 2, 1] - [1] vs [1]
Iter= 35000, Average Loss= 0.083058, Average Accuracy= 98.20%
[1, 2, 2, 1, 2, 3, 2, 2, 2, 2, 1, 1] - [1] vs [1]
Iter= 36000, Average Loss= 0.074868, Average Accuracy= 97.80%
[1, 3, 3, 3, 1, 2, 3, 2, 1, 2, 3, 1] - [2] vs [2]
Iter= 37000, Average Loss= 0.101523, Average Accuracy= 97.20%
[1, 21, 2, 1, 2, 1, 3, 3, 3, 1, 2, 3] - [2] vs [2]
Iter= 38000, Average Loss= 0.124341, Average Accuracy= 97.30%
[2, 3, 2, 1, 2, 3, 2, 1, 2, 2, 1, 2] - [3] vs [3]
Iter= 39000, Average Loss= 0.093165, Average Accuracy= 96.90%
[1, 2, 3, 1, 2, 3, 2, 1, 2, 3, 2, 1] - [2] vs [2]
Iter= 40000, Average Loss= 0.131280, Average Accuracy= 98.00%
[2, 1, 3, 3, 3, 1, 2, 3, 2, 1, 2, 3] - [1] vs [1]
Iter= 41000, Average Loss= 0.173837, Average Accuracy= 96.70%
[3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2] - [3] vs [3]
Iter= 42000, Average Loss= 0.067694, Average Accuracy= 98.70%
[1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2] - [1] vs [1]
Iter= 43000, Average Loss= 0.103916, Average Accuracy= 98.00%
[3, 3, 1, 2, 3, 2, 1, 2, 3, 1, 2, 3] - [2] vs [2]
Iter= 44000, Average Loss= 0.151070, Average Accuracy= 97.30%
[1, 2, 1, 2, 1, 2, 1, 21, 2, 1, 2, 1] - [3] vs [3]
Iter= 45000, Average Loss= 0.076150, Average Accuracy= 98.50%
[2, 2, 2, 2, 1, 1, 1, 1, 2, 3, 2, 2] - [3] vs [3]
Iter= 46000, Average Loss= 0.079045, Average Accuracy= 98.00%
[3, 2, 1, 2, 3, 2, 1, 2, 2, 1, 2, 3] - [2] vs [2]
Iter= 47000, Average Loss= 0.093110, Average Accuracy= 96.80%
[21, 2, 1, 2, 1, 3, 3, 3, 1, 2, 3, 2] - [1] vs [1]
Iter= 48000, Average Loss= 0.093426, Average Accuracy= 97.70%
[2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1] - [2] vs [2]
Iter= 49000, Average Loss= 0.072693, Average Accuracy= 98.00%
[2, 1, 2, 3, 2, 2, 2, 2, 1, 1, 1, 1] - [2] vs [2]
Iter= 50000, Average Loss= 0.075903, Average Accuracy= 98.40%
[1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 2, 1] - [2] vs [2]
Optimization Finished!
Elapsed time:  8.83382101919916 hr
Run on command line.
	tensorboard --logdir=rnn_code
Point your web browser to: http://localhost:6006/

In [ ]: