In [497]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import math
In [511]:
# Ascending sine
NSAMPLE = 1000
#x_data = np.float32(np.random.uniform(-10.5, 10.5, (1, NSAMPLE))).T
sample_range = np.float32(np.arange(-15.0,15.0,30.0/NSAMPLE)).T
r_data = np.float32(np.random.normal(0,0.05, size=(NSAMPLE)))
#y_data = 10.0*np.exp(-x_data*x_data/0.1)
x_data = np.float32(np.cos(0.55*sample_range)*7.0+sample_range*0.5 + r_data*0.1)
y_data = np.float32(np.sin(0.45*sample_range)*7.0+sample_range*0.5 + r_data*0.1)
#y_data_2 = np.float32(np.sin(0.5*x_data)*3.0-x_data*0.5+r_data*1.0)
#y_data = np.hstack((y_data_1, y_data_2))
plt.figure(figsize=(8, 8))
#plt.plot(x_data,y_data[:,0],'ro',x_data, y_data[:,1],'bo',alpha=0.3)
#plt.plot(x_data,'b-',alpha=0.3)
#plt.plot(y_data,'r-',alpha=0.3)
plt.plot(x_data,y_data,'r--',alpha=0.3)
#plt.plot(x_data, predictions[:],'r-',alpha=0.3)
plt.show()
In [475]:
# Upside down parabola
NSAMPLE = 1000
x_data = np.float32(np.arange(0,1,1.0/NSAMPLE))
y_data = np.float32(x_data-x_data*x_data)
plt.figure(figsize=(8, 8))
plt.plot(x_data, y_data,'r-',alpha=0.3)
plt.show()
In [512]:
batch_size=40
num_unrollings=5
"""
suppose there is a dataset d of the format:
x11, x21, x31, ..., xK1, y11, y21, ..., yM1
x12, x22, x32, ..., xK2, y12, y22, ..., yM2
...
x1N, x2N, x3N, ..., xKN, y1N, y2N, ..., yMN
The batch generator return batches as tuples of two components: inputs and outputs
inputs batches (batch_size = 3, num_unrollings = 4)
[[x11,...,xK1], [x1i,...,xKi], [x1j,...,xKj]], <- the first batch
[[x12,...,xK2], [x1i+1,...,xKi+1], [x1j+1,...,xKj+1]], <- the second batch
[[x13,...,xK3], [x1i+2,...,xKi+2], [x1j+2,...,xKj+2]] <- the third batch
[[x14,...,xK4], [x1i+3,...,xKi+3], [x1j+3,...,xKj+3]] <- the fourth batch
indices i and j are calculated based on cursors
"""
class BatchGenerator(object):
def __init__(self, data, outs_index, batch_size, num_unrollings):
"""
Creates a batch generator
data -- the dataset
outs_index -- index of the first outputs component
batch_size -- how many samples in each batch. Note the samples are NOT sequential in time!
num_unrollings -- how many batches to return. The batches are sequential in time
"""
self._data = data # the complete dataset
self._data_size = data.shape[0] # how many samples in the dataset
self._data_width = data.shape[1] # how many components in both inputs and outputs
self._outs_index = outs_index # where the outputs start
self._batch_size = batch_size
self._num_unrollings = num_unrollings
segment = self._data_size // self._batch_size
self._cursor = [offset * segment for offset in range(self._batch_size)] # starting points for each batch
self._last_batch = self._next_batch() # generate and save the first batch
def _next_batch(self):
"""
Generate a single batch from the current cursor position in the data.
Returns a tuple (inputs_batch,outputs_batch)
"""
batch = np.zeros(shape=(self._batch_size, self._data_width), dtype = np.float) # prepare the batch array
for b in range(self._batch_size): # cursors are indices where each data sample in the batch starts
batch[b] = self._data[self._cursor[b],:] # copy the data
self._cursor[b] = (self._cursor[b] + 1) % (self._data_size)
return (batch[:,:self._outs_index],batch[:,self._outs_index:])
def next(self):
"""
Generate the next array of batches from the data. The array consists of
the last batch of the previous array, followed by num_unrollings-1 new ones.
"""
# make sure that the cursors stay within range
self._cursor = [c%(self._data_size-self._num_unrollings) for c in self._cursor]
batches = [self._last_batch] # use the last batch as the first in the list
for step in range(self._num_unrollings-1): # we only need _num_unrollings-1 new batches
batches.append(self._next_batch())
self._last_batch = batches[-1] # save the last batch to be reused next time
return batches
d = np.column_stack((x_data,y_data))
#print(d.shape)
train_batches = BatchGenerator(data = d, outs_index = 1, batch_size = 40, num_unrollings = 5)
#print(train_batches.next())
In [513]:
num_nodes = 16
num_steps = 15001
start_learning_rate = 0.006
rate_coeff = 0.05
input_size = 1
output_size = 1
graph = tf.Graph()
with graph.as_default():
# Parameters:
# Input gate: input, previous output, and bias.
ix = tf.Variable(tf.truncated_normal([input_size, num_nodes], -0.1, 0.1))
im = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], -0.1, 0.1))
ib = tf.Variable(tf.zeros([1, num_nodes]))
# Forget gate: input, previous output, and bias.
fx = tf.Variable(tf.truncated_normal([input_size, num_nodes], -0.1, 0.1))
fm = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], -0.1, 0.1))
fb = tf.Variable(tf.zeros([1, num_nodes]))
# Memory cell: input, state and bias.
cx = tf.Variable(tf.truncated_normal([input_size, num_nodes], -0.1, 0.1))
cm = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], -0.1, 0.1))
cb = tf.Variable(tf.zeros([1, num_nodes]))
# Output gate: input, previous output, and bias.
ox = tf.Variable(tf.truncated_normal([input_size, num_nodes], -0.1, 0.1))
om = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], -0.1, 0.1))
ob = tf.Variable(tf.zeros([1, num_nodes]))
# Variables saving state across unrollings.
saved_output = tf.Variable(tf.zeros([batch_size, num_nodes]), trainable=False)
saved_state = tf.Variable(tf.zeros([batch_size, num_nodes]), trainable=False)
# Regression weights and biases.
w = tf.Variable(tf.truncated_normal([num_nodes, output_size], -0.1, 0.1))
b = tf.Variable(tf.zeros([output_size]))
# Definition of the cell computation.
def lstm_cell(i, o, state):
"""Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf
Note that in this formulation, we omit the various connections between the
previous state and the gates."""
input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib)
forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb)
update = tf.matmul(i, cx) + tf.matmul(o, cm) + cb
state = forget_gate * state + input_gate * tf.tanh(update)
output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob)
return output_gate * tf.tanh(state), state
# Prepare placeholders for inputs and outputs
# There is a total of 2*num_unrollings placeholders need to be fitted in the ANN
# identified by train_inputs and train_outputs lists
train_inputs = list()
train_outputs = list()
for _ in range(num_unrollings):
train_inputs.append(
tf.placeholder(tf.float32, shape=[batch_size, input_size])) #shape=[batch_size,1]))
train_outputs.append(
tf.placeholder(tf.float32, shape=[batch_size, output_size])) #shape=[batch_size,1]))
# Unrolled LSTM loop.
outputs = list() # list of outputs
output = saved_output # recall the last saved output
state = saved_state # recall the last saved state
for i in train_inputs:
output, state = lstm_cell(i, output, state)
outputs.append(output)
# State saving across unrollings.
with tf.control_dependencies([saved_output.assign(output),
saved_state.assign(state)]):
y = tf.matmul(tf.concat(0,outputs), w)+b
loss = tf.reduce_mean(tf.square(y - tf.concat(0,train_outputs)))
# Optimizer.
global_step = tf.Variable(0)
learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, num_steps, rate_coeff, staircase=False)
#optimizer = tf.train.GradientDescentOptimizer(learning_rate)
optimizer = tf.train.AdamOptimizer(learning_rate)
gradients, v = zip(*optimizer.compute_gradients(loss))
gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
optimizer = optimizer.apply_gradients(zip(gradients, v), global_step=global_step)
# Sampling and validation eval: batch 1, no unrolling.
sample_input = tf.placeholder(tf.float32, shape=[1,input_size])
saved_sample_output = tf.Variable(tf.zeros([1, num_nodes]))
saved_sample_state = tf.Variable(tf.zeros([1, num_nodes]))
reset_sample_state = tf.group(
saved_sample_output.assign(tf.zeros([1, num_nodes])),
saved_sample_state.assign(tf.zeros([1, num_nodes])))
sample_output, sample_state = lstm_cell(
sample_input, saved_sample_output, saved_sample_state)
with tf.control_dependencies([saved_sample_output.assign(sample_output),
saved_sample_state.assign(sample_state)]):
sample_prediction = tf.nn.xw_plus_b(sample_output, w, b)
#run_metadata = tf.RunMetadata()
In [517]:
summary_frequency = 400
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print('Initialized')
mean_loss = 0
for step in range(num_steps):
batches = train_batches.next()
feed_dict = dict()
for i in range(num_unrollings):
#print(batches[i][0])
#print(batches[i][1])
feed_dict[train_inputs[i]] = np.reshape(batches[i][0],(batch_size,1))
feed_dict[train_outputs[i]] = np.reshape(batches[i][1],(batch_size,1))
_, l, lr = session.run([optimizer, loss, learning_rate], feed_dict=feed_dict)
mean_loss += l
if step % summary_frequency == 0:
if step > 0:
mean_loss = mean_loss / summary_frequency
print('Average loss at step %d: %f learning rate: %f' % (step, mean_loss, lr))
mean_loss = 0
print('=' * 80)
reset_sample_state.run()
predictions = np.zeros(shape=[len(sample_range)])
for i in range(x_data.shape[0]):
predictions[i] = sample_prediction.eval({sample_input: np.reshape(x_data[i],(1,1))})
In [254]:
print(y_data[:10])
print(predictions[:10])
In [518]:
cc = np.corrcoef(y_data[:],predictions[:])[0,1]
print(cc)
plt.figure(figsize=(8, 8))
#plt.plot(x_data,y_data[:,0],'ro',x_data, y_data[:,1],'bo',alpha=0.3)
#plt.plot(x_data,'b-',alpha=0.3)
#plt.plot(y_data,'r--',alpha=0.3)
#plt.plot(predictions[:],'r-',alpha=0.3)
plt.plot(x_data,y_data,'r--',alpha=0.3)
plt.plot(x_data, predictions[:],'r-',alpha=0.3)
plt.show()
In [ ]: