notebook.community

Edit and run



In [497]:

    
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import math



In [511]:

    
# Ascending sine
NSAMPLE = 1000
#x_data = np.float32(np.random.uniform(-10.5, 10.5, (1, NSAMPLE))).T
sample_range = np.float32(np.arange(-15.0,15.0,30.0/NSAMPLE)).T
r_data = np.float32(np.random.normal(0,0.05, size=(NSAMPLE)))

#y_data = 10.0*np.exp(-x_data*x_data/0.1)
x_data = np.float32(np.cos(0.55*sample_range)*7.0+sample_range*0.5 + r_data*0.1)
y_data = np.float32(np.sin(0.45*sample_range)*7.0+sample_range*0.5 + r_data*0.1)
#y_data_2 = np.float32(np.sin(0.5*x_data)*3.0-x_data*0.5+r_data*1.0)
#y_data = np.hstack((y_data_1, y_data_2))

plt.figure(figsize=(8, 8))
#plt.plot(x_data,y_data[:,0],'ro',x_data, y_data[:,1],'bo',alpha=0.3)
#plt.plot(x_data,'b-',alpha=0.3)
#plt.plot(y_data,'r-',alpha=0.3)
plt.plot(x_data,y_data,'r--',alpha=0.3)
#plt.plot(x_data, predictions[:],'r-',alpha=0.3)
plt.show()



In [475]:

    
# Upside down parabola
NSAMPLE = 1000
x_data = np.float32(np.arange(0,1,1.0/NSAMPLE))
y_data = np.float32(x_data-x_data*x_data)

plt.figure(figsize=(8, 8))

plt.plot(x_data, y_data,'r-',alpha=0.3)
plt.show()



In [512]:

    
batch_size=40
num_unrollings=5
"""
suppose there is a dataset d of the format:
x11, x21, x31, ..., xK1, y11, y21, ..., yM1
x12, x22, x32, ..., xK2, y12, y22, ..., yM2
...
x1N, x2N, x3N, ..., xKN, y1N, y2N, ..., yMN

The batch generator return batches as tuples of two components: inputs and outputs

inputs batches (batch_size = 3, num_unrollings = 4)
[[x11,...,xK1], [x1i,...,xKi], [x1j,...,xKj]], <- the first batch
[[x12,...,xK2], [x1i+1,...,xKi+1], [x1j+1,...,xKj+1]], <- the second batch
[[x13,...,xK3], [x1i+2,...,xKi+2], [x1j+2,...,xKj+2]] <- the third batch
[[x14,...,xK4], [x1i+3,...,xKi+3], [x1j+3,...,xKj+3]] <- the fourth batch

indices i and j are calculated based on cursors
"""

class BatchGenerator(object):
    def __init__(self, data, outs_index, batch_size, num_unrollings):
        """
        Creates a batch generator
        data -- the dataset
        outs_index -- index of the first outputs component
        batch_size -- how many samples in each batch. Note the samples are NOT sequential in time!
        num_unrollings -- how many batches to return. The batches are sequential in time
        """
        self._data = data # the complete dataset
        self._data_size = data.shape[0] # how many samples in the dataset
        self._data_width = data.shape[1] # how many components in both inputs and outputs
        self._outs_index = outs_index # where the outputs start
        self._batch_size = batch_size
        self._num_unrollings = num_unrollings
        segment = self._data_size // self._batch_size 
        self._cursor = [offset * segment for offset in range(self._batch_size)] # starting points for each batch
        self._last_batch = self._next_batch() # generate and save the first batch
  
    def _next_batch(self):
        """
        Generate a single batch from the current cursor position in the data.
        Returns a tuple (inputs_batch,outputs_batch)
        """
        batch = np.zeros(shape=(self._batch_size, self._data_width), dtype = np.float) # prepare the batch array
        for b in range(self._batch_size): # cursors are indices where each data sample in the batch starts
            batch[b] = self._data[self._cursor[b],:] # copy the data
            self._cursor[b] = (self._cursor[b] + 1) % (self._data_size)
        return (batch[:,:self._outs_index],batch[:,self._outs_index:])
  
    def next(self):
        """
        Generate the next array of batches from the data. The array consists of
        the last batch of the previous array, followed by num_unrollings-1 new ones.
        """
        # make sure that the cursors stay within range
        self._cursor = [c%(self._data_size-self._num_unrollings) for c in self._cursor] 
        
        batches = [self._last_batch] # use the last batch as the first in the list
        for step in range(self._num_unrollings-1): # we only need _num_unrollings-1 new batches
            batches.append(self._next_batch())
        self._last_batch = batches[-1] # save the last batch to be reused next time
        return batches
    
d = np.column_stack((x_data,y_data))
#print(d.shape)
train_batches = BatchGenerator(data = d, outs_index = 1, batch_size = 40, num_unrollings = 5)
#print(train_batches.next())



In [513]:

    
num_nodes = 16
num_steps = 15001
start_learning_rate = 0.006
rate_coeff = 0.05

input_size = 1
output_size = 1

graph = tf.Graph()
with graph.as_default():
  
    # Parameters:
    # Input gate: input, previous output, and bias.
    ix = tf.Variable(tf.truncated_normal([input_size, num_nodes], -0.1, 0.1))
    im = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], -0.1, 0.1))
    ib = tf.Variable(tf.zeros([1, num_nodes]))
    # Forget gate: input, previous output, and bias.
    fx = tf.Variable(tf.truncated_normal([input_size, num_nodes], -0.1, 0.1))
    fm = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], -0.1, 0.1))
    fb = tf.Variable(tf.zeros([1, num_nodes]))
    # Memory cell: input, state and bias.                             
    cx = tf.Variable(tf.truncated_normal([input_size, num_nodes], -0.1, 0.1))
    cm = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], -0.1, 0.1))
    cb = tf.Variable(tf.zeros([1, num_nodes]))
    # Output gate: input, previous output, and bias.
    ox = tf.Variable(tf.truncated_normal([input_size, num_nodes], -0.1, 0.1))
    om = tf.Variable(tf.truncated_normal([num_nodes, num_nodes], -0.1, 0.1))
    ob = tf.Variable(tf.zeros([1, num_nodes]))
    # Variables saving state across unrollings.
    saved_output = tf.Variable(tf.zeros([batch_size, num_nodes]), trainable=False)
    saved_state = tf.Variable(tf.zeros([batch_size, num_nodes]), trainable=False)
    # Regression weights and biases.
    w = tf.Variable(tf.truncated_normal([num_nodes, output_size], -0.1, 0.1))
    b = tf.Variable(tf.zeros([output_size]))
  
    # Definition of the cell computation.
    def lstm_cell(i, o, state):
        """Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf
        Note that in this formulation, we omit the various connections between the
        previous state and the gates."""
        input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib)
        forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb)
        update = tf.matmul(i, cx) + tf.matmul(o, cm) + cb
        state = forget_gate * state + input_gate * tf.tanh(update)
        output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob)
        return output_gate * tf.tanh(state), state

    # Prepare placeholders for inputs and outputs
    # There is a total of 2*num_unrollings placeholders need to be fitted in the ANN
    # identified by train_inputs and train_outputs lists
    train_inputs = list()
    train_outputs = list()
    for _ in range(num_unrollings):
        train_inputs.append(
          tf.placeholder(tf.float32, shape=[batch_size, input_size])) #shape=[batch_size,1]))
        train_outputs.append(
          tf.placeholder(tf.float32, shape=[batch_size, output_size])) #shape=[batch_size,1]))
    
    # Unrolled LSTM loop.
    outputs = list() # list of outputs
    output = saved_output # recall the last saved output
    state = saved_state # recall the last saved state
    for i in train_inputs:
        output, state = lstm_cell(i, output, state)
        outputs.append(output)
    
    # State saving across unrollings.
    with tf.control_dependencies([saved_output.assign(output), 
                                  saved_state.assign(state)]):
        y = tf.matmul(tf.concat(0,outputs), w)+b
        loss = tf.reduce_mean(tf.square(y - tf.concat(0,train_outputs)))
          
    # Optimizer.
    global_step = tf.Variable(0)
    learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, num_steps, rate_coeff, staircase=False)
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    gradients, v = zip(*optimizer.compute_gradients(loss))
    gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
    optimizer = optimizer.apply_gradients(zip(gradients, v), global_step=global_step)
    
    # Sampling and validation eval: batch 1, no unrolling.
    sample_input = tf.placeholder(tf.float32, shape=[1,input_size])
    saved_sample_output = tf.Variable(tf.zeros([1, num_nodes]))
    saved_sample_state = tf.Variable(tf.zeros([1, num_nodes]))
    
    reset_sample_state = tf.group(
        saved_sample_output.assign(tf.zeros([1, num_nodes])),
        saved_sample_state.assign(tf.zeros([1, num_nodes])))
    
    sample_output, sample_state = lstm_cell(
        sample_input, saved_sample_output, saved_sample_state)
    
    with tf.control_dependencies([saved_sample_output.assign(sample_output),
                                saved_sample_state.assign(sample_state)]):
        sample_prediction = tf.nn.xw_plus_b(sample_output, w, b)
    #run_metadata = tf.RunMetadata()



In [517]:

    
summary_frequency = 400

with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print('Initialized')
    mean_loss = 0
    for step in range(num_steps):
        batches = train_batches.next()
        feed_dict = dict()
        for i in range(num_unrollings):
            #print(batches[i][0])
            #print(batches[i][1])
            feed_dict[train_inputs[i]] = np.reshape(batches[i][0],(batch_size,1))
            feed_dict[train_outputs[i]] = np.reshape(batches[i][1],(batch_size,1))
        _, l, lr = session.run([optimizer, loss, learning_rate], feed_dict=feed_dict)
        mean_loss += l
        if step % summary_frequency == 0:
            if step > 0:
                mean_loss = mean_loss / summary_frequency
            print('Average loss at step %d: %f learning rate: %f' % (step, mean_loss, lr))
            mean_loss = 0
                 
    print('=' * 80)
    
    reset_sample_state.run()
    predictions = np.zeros(shape=[len(sample_range)])
    for i in range(x_data.shape[0]):
        predictions[i] = sample_prediction.eval({sample_input: np.reshape(x_data[i],(1,1))})









    



Initialized
Average loss at step 0: 27.389429 learning rate: 0.006000
Average loss at step 400: 11.542659 learning rate: 0.005539
Average loss at step 800: 7.833378 learning rate: 0.005114
Average loss at step 1200: 8.040382 learning rate: 0.004721
Average loss at step 1600: 4.948184 learning rate: 0.004359
Average loss at step 2000: 3.353402 learning rate: 0.004024
Average loss at step 2400: 2.424041 learning rate: 0.003715
Average loss at step 2800: 3.436874 learning rate: 0.003430
Average loss at step 3200: 4.970384 learning rate: 0.003167
Average loss at step 3600: 6.602529 learning rate: 0.002924
Average loss at step 4000: 5.575613 learning rate: 0.002699
Average loss at step 4400: 2.781743 learning rate: 0.002492
Average loss at step 4800: 3.053677 learning rate: 0.002301
Average loss at step 5200: 3.571565 learning rate: 0.002124
Average loss at step 5600: 4.022698 learning rate: 0.001961
Average loss at step 6000: 3.551814 learning rate: 0.001810
Average loss at step 6400: 1.776132 learning rate: 0.001671
Average loss at step 6800: 0.954403 learning rate: 0.001543
Average loss at step 7200: 0.820235 learning rate: 0.001425
Average loss at step 7600: 0.688725 learning rate: 0.001315
Average loss at step 8000: 1.666410 learning rate: 0.001214
Average loss at step 8400: 2.505206 learning rate: 0.001121
Average loss at step 8800: 2.630811 learning rate: 0.001035
Average loss at step 9200: 2.153508 learning rate: 0.000956
Average loss at step 9600: 1.982549 learning rate: 0.000882
Average loss at step 10000: 2.207116 learning rate: 0.000814
Average loss at step 10400: 2.214724 learning rate: 0.000752
Average loss at step 10800: 2.030500 learning rate: 0.000694
Average loss at step 11200: 1.348160 learning rate: 0.000641
Average loss at step 11600: 0.752187 learning rate: 0.000592
Average loss at step 12000: 2.323905 learning rate: 0.000546
Average loss at step 12400: 3.823528 learning rate: 0.000504
Average loss at step 12800: 0.863258 learning rate: 0.000466
Average loss at step 13200: 0.864824 learning rate: 0.000430
Average loss at step 13600: 0.531147 learning rate: 0.000397
Average loss at step 14000: 0.416434 learning rate: 0.000366
Average loss at step 14400: 0.484760 learning rate: 0.000338
Average loss at step 14800: 0.313632 learning rate: 0.000312
================================================================================



In [254]:

    
print(y_data[:10])
print(predictions[:10])









    



[-3.51762533 -3.50756526 -3.56526613 -3.56607175 -3.61000276 -3.60337782
 -3.67348242 -3.7375021  -3.77941155 -3.78560805]
[-4.9043951  -6.09775257 -6.84452009 -7.40919733 -7.87189388 -8.27728748
 -8.62388802 -8.9228363  -9.17885303 -9.39753723]



In [518]:

    
cc = np.corrcoef(y_data[:],predictions[:])[0,1]
print(cc)

plt.figure(figsize=(8, 8))
#plt.plot(x_data,y_data[:,0],'ro',x_data, y_data[:,1],'bo',alpha=0.3)
#plt.plot(x_data,'b-',alpha=0.3)
#plt.plot(y_data,'r--',alpha=0.3)
#plt.plot(predictions[:],'r-',alpha=0.3)
plt.plot(x_data,y_data,'r--',alpha=0.3)
plt.plot(x_data, predictions[:],'r-',alpha=0.3)

plt.show()









    



0.91529302849



In [ ]: