In [2]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import math

from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import time
import tarfile
from IPython.display import display, Image
from scipy import ndimage
from sklearn.linear_model import LogisticRegression
from six.moves.urllib.request import urlretrieve
from six.moves import cPickle as pickle
import tensorflow as tf
import datetime as dt

flags = tf.app.flags
FLAGS = flags.FLAGS

flags.DEFINE_float('learning_rate', 0.05, 'Initial learning rate')
flags.DEFINE_float('learning_rate_decay', 0.1, 'Learning rate decay, i.e. the fraction of the initial learning rate at the end of training')

flags.DEFINE_integer('max_steps', 1000, 'Number of steps to run trainer')
flags.DEFINE_float('max_loss', 0.01, 'Maximally acceptable validation MSE')
flags.DEFINE_integer('batch_size', 64*193, 'Batch size. Divides evenly into the dataset size of 193')
flags.DEFINE_integer('hidden1', 35, 'Size of the first hidden layer')
flags.DEFINE_integer('hidden2', 10, 'Size of the second hidden layer')
flags.DEFINE_integer('output_vars', 1, 'Size of the output layer')
flags.DEFINE_integer('input_vars', 6, 'Size of the input layer')
#flags.DEFINE_string('train_dir', './data/', 'Directory to put the training data') # not currently used
flags.DEFINE_string('checkpoints_dir', './checkpoints/two-layer/'+dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'Directory to store checkpoints')
flags.DEFINE_string('summaries_dir','./logs/two-layer/'+dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),'Summaries directory')

In [3]:
NSAMPLE = 10000
#x_data = np.float32(np.random.uniform(-10.5, 10.5, (1, NSAMPLE))).T
x_data = np.float32(np.arange(-15.0,15.0,30.0/NSAMPLE)).T
r_data = np.float32(np.random.normal(0,1.0, size=(NSAMPLE)))

#y_data = 10.0*np.exp(-x_data*x_data/0.1)
y_data = np.float32(np.sin(0.75*x_data)*7.0+x_data*0.5 + r_data*0.02)
#y_data_2 = np.float32(np.sin(0.5*x_data)*3.0-x_data*0.5+r_data*1.0)
#y_data = np.hstack((y_data_1, y_data_2))

plt.figure(figsize=(8, 8))
#plt.plot(x_data,y_data[:,0],'ro',x_data, y_data[:,1],'bo',alpha=0.3)
plt.plot(x_data, y_data,'r-',alpha=0.3)
plt.show()

In [34]:
NSAMPLE = 1000
x_data = np.float32(np.arange(0,1,1.0/NSAMPLE))
y_data = np.float32(x_data-x_data*x_data)

plt.figure(figsize=(8, 8))

plt.plot(x_data, y_data,'r-',alpha=0.3)
plt.show()

In [5]:
x = y_data
#x = np.asarray(range(100))
input_size = 7
#data = np.zeros(shape=(x.shape[0]-size,input_size))
data = np.zeros(shape=(x.shape[0]-input_size,input_size), dtype = np.float)
for i in range(x.shape[0]-input_size):
    for j in range(input_size):
        data[i,j] = x[i+j]
inputs = np.hsplit(data, np.array([6, 7]))[0]
outputs = np.hsplit(data, np.array([6, 7]))[1]
print(inputs.shape, outputs.shape)


(9993, 6) (9993, 1)

In [18]:
def weight_variable(shape):
    """
    Returns TF weight variable with given shape. The weights are normally distributed with mean = 0, stddev = 0.1
    shape -- shape of the variable, i.e. [4,5] matrix of 4x5
    """
    initial = tf.truncated_normal(shape, stddev = 0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    """
    Returns TF bias variable with given shape. The biases are initially at 0.1
    shape -- shape of the variable, i.e. [4] -- vector of length 4
    """
    initial = tf.constant(0.1, shape = shape)
    return tf.Variable(initial)

def variable_summaries(var, name):
    """
    Adds multiple summaries (statistics) for a TF variable
    var -- TF variable
    name -- variable name
    """
    mean = tf.reduce_mean(var)
    tf.scalar_summary(name+'/mean', mean)
    stddev = tf.reduce_mean(tf.reduce_sum(tf.square(var-mean)))
    tf.scalar_summary(name+'/stddev', stddev)
    _min = tf.reduce_min(var)
    #tf.scalar_summary(name+'/min', _min)
    _max = tf.reduce_max(var)
    #tf.scalar_summary(name+'/max', _max)
    tf.histogram_summary(name, var)

def nn_layer(input_tensor, input_dim, output_dim, layer_name, act = tf.tanh):
    """
    Creates and returns NN layer
    input_tensor -- TF tensor at layer input
    input_dim -- size of layer input
    output_dim -- size of layer output
    layer_name -- name of the layer for summaries (statistics)
    act -- nonlinear activation function
    """
    with tf.name_scope(layer_name):
        with tf.name_scope('weights'):
            weights = weight_variable([input_dim, output_dim])
            variable_summaries(weights, layer_name+'/weights')
        with tf.name_scope('biases'):
            biases = bias_variable([output_dim])
            variable_summaries(biases, layer_name+'/biases')
        with tf.name_scope('WX_plus_b'):
            preactivate = tf.matmul(input_tensor, weights)+biases
            tf.histogram_summary(layer_name+'/pre_activations', preactivate)
        if act is not None:
            activations = act(preactivate, 'activation')
        else:
            activations = preactivate
        tf.histogram_summary(layer_name+'/activations', activations)
    return activations

def run_training():
    """
    Creates a NN and runs its training/running
    """
    with tf.Graph().as_default():
        with tf.name_scope('input'):
            x = tf.placeholder(tf.float32, [None, FLAGS.input_vars], name='x-input')
            y_ = tf.placeholder(tf.float32, [None, FLAGS.output_vars], name = 'y-input')
  
        hidden_1 = nn_layer(x, FLAGS.input_vars, FLAGS.hidden1, 'layer1')
        hidden_2 = nn_layer(hidden_1, FLAGS.hidden1, FLAGS.hidden2, 'layer2')
        train_prediction = nn_layer(hidden_2, FLAGS.hidden2, FLAGS.output_vars, 'output', act = None)      
        
        with tf.name_scope('MSE'):
            prediction_diff = train_prediction-y_
            MSE = tf.cast(tf.reduce_mean(tf.reduce_mean(tf.square(prediction_diff))),tf.float32)
            tf.scalar_summary('MSE', MSE)

        with tf.name_scope('train'):
            global_step = tf.Variable(0.00, trainable=False)
            learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, 
                                                       global_step, FLAGS.max_steps, 
                                                       FLAGS.learning_rate_decay, staircase=False)        
            #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
            #optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(loss, global_step=global_step)
            optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
                MSE, global_step=global_step)
                  
        merged = tf.merge_all_summaries()
        init = tf.initialize_all_variables()
        saver = tf.train.Saver()
        sess = tf.Session()
        train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir+'/train')
        test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir+'/validation')
        sess.run(init)
        
        train_loss = 1
        valid_loss = 1
        for step in xrange(10):#xrange(FLAGS.max_steps):
            start_time = time.time()
            if step%100 != 0:
                # regular training
                #feed_dict = fill_feed_dict(train_dataset, x, y_, train = True)
                feed_dict = {x:inputs, y_:outputs}
                _, train_loss, lr, summary = sess.run([optimizer, MSE, learning_rate, merged], feed_dict=feed_dict)
                train_writer.add_summary(summary,step)
            else:
                # check model fit
                feed_dict = feed_dict = {x:inputs, y_:outputs}
                valid_loss, summary = sess.run([MSE, merged], feed_dict = feed_dict)
                test_writer.add_summary(summary,step)
                duration = time.time()-start_time
                print('Step %d (%d op/sec): Training MSE: %.5f, Validation MSE: %.5f' % (step, 1/duration, train_loss, valid_loss))
            
        step_by_step = False
        if step_by_step:
            predictions = np.zeros(shape=[NSAMPLE, 1])
            start_point = np.reshape(inputs[0,:],(1,6))
            print(start_point)
            for i in xrange(1,data.shape[0]):
                prediction = sample_prediction.eval({sample_input: start_point})
                #start_point = np.reshape(data[i,0],prediction[0,1],(1,2))
                start_point[:,0] = data[i,0]
                start_point[:,1] = prediction[0,1]
                predictions[i] = prediction[0,1]
        else:
            predictions = np.zeros(shape=[NSAMPLE, ])
            start_point = np.reshape(data[0,:],(1,2))
            for step in range(NSAMPLE):
                prediction = sample_prediction.eval({sample_input: start_point})
                start_point = np.reshape(prediction[0,:],(1,2))
                predictions[i] = prediction[0,1]
            print('=' * 80)
        
        #feed_dict = fill_feed_dict(test_dataset, x, y_, train = False)
        #test_loss, summary = sess.run([MSE, merged], feed_dict = feed_dict)
        #print('Test MSE: %.5f' % (test_loss))
        
        #predicted_vs_actual = np.hstack((test_prediction.eval(session = sess), test_dataset.outputs))
        #print("correlation coefficients: ")
        #print(np.corrcoef(predicted_vs_actual[:,0],predicted_vs_actual[:,2]))
        #print(np.corrcoef(predicted_vs_actual[:,1],predicted_vs_actual[:,3]))
        sess.close()

run_training()


Step 0 (49 op/sec): Training MSE: 1.00000, Validation MSE: 42.18028
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-18-669140e2fe5b> in <module>()
    142         sess.close()
    143 
--> 144 run_training()

<ipython-input-18-669140e2fe5b> in run_training()
    125         else:
    126             predictions = np.zeros(shape=[NSAMPLE, ])
--> 127             start_point = np.reshape(data[0,:],(1,2))
    128             for step in range(NSAMPLE):
    129                 prediction = sample_prediction.eval({sample_input: start_point})

/home/bezuglov@ad.renci.org/anaconda2/lib/python2.7/site-packages/numpy/core/fromnumeric.pyc in reshape(a, newshape, order)
    223     except AttributeError:
    224         return _wrapit(a, 'reshape', newshape, order=order)
--> 225     return reshape(newshape, order=order)
    226 
    227 

ValueError: total size of new array must be unchanged

In [267]:
summary_frequency = 100

with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print('Initialized')
    mean_loss = 0
    for step in range(num_steps):
        batches = train_batches.next()
        feed_dict = dict()
        for i in range(num_unrollings+1):
            #this_x = np.reshape(this_x,(1, FIN_SIZE))
            #print("i: ",i)
            #print(batches[i].shape)
            feed_dict[train_data[i]] = np.reshape(batches[i],(batch_size,2))
        _, l, predictions, lr = session.run(
            [optimizer, loss, train_prediction, learning_rate], feed_dict=feed_dict)
        mean_loss += l
        if step % summary_frequency == 0:
            if step > 0:
                mean_loss = mean_loss / summary_frequency
                # The mean loss is an estimate of the loss over the last few batches.
            print('Average loss at step %d: %f learning rate: %f' % (step, mean_loss, lr))
            mean_loss = 0
            #if step % (summary_frequency * 10) == 0:
                #print('=' * 80)
                #prediction_list = []
                #for _ in range(1000):
                #    start_point = np.reshape(y_data[0],(1,1))
                #    prediction = sample_prediction.eval({sample_input: start_point})
                #    prediction_list.append(prediction[0][0])
                #print('=' * 80)
                
    print('=' * 80)
    
    step_by_step = False
    if step_by_step:
        predictions = np.zeros(shape=[NSAMPLE, 1])
        start_point = np.reshape(data[0,:],(1,2))
        for i in xrange(1,data.shape[0]):
            prediction = sample_prediction.eval({sample_input: start_point})
            #start_point = np.reshape(data[i,0],prediction[0,1],(1,2))
            start_point[:,0] = data[i,0]
            start_point[:,1] = prediction[0,1]
            predictions[i] = prediction[0,1]
    else:
        predictions = np.zeros(shape=[NSAMPLE, ])
        start_point = np.reshape(data[0,:],(1,2))
        for step in range(NSAMPLE):
            prediction = sample_prediction.eval({sample_input: start_point})
            start_point = np.reshape(prediction[0,:],(1,2))
            predictions[i] = prediction[0,1]
        print('=' * 80)


Initialized
Average loss at step 0: 46721.972656 learning rate: 0.020000
Average loss at step 100: 29237.300283 learning rate: 0.015890
Average loss at step 200: 8925.407236 learning rate: 0.012625
Average loss at step 300: 5307.461790 learning rate: 0.010031
Average loss at step 400: 4273.199182 learning rate: 0.007969
Average loss at step 500: 3532.226587 learning rate: 0.006332
Average loss at step 600: 3256.917402 learning rate: 0.005031
Average loss at step 700: 3097.492437 learning rate: 0.003997
Average loss at step 800: 2923.008850 learning rate: 0.003176
Average loss at step 900: 2659.730233 learning rate: 0.002523
Average loss at step 1000: 2644.401714 learning rate: 0.002005
================================================================================
================================================================================

In [19]:
print(predictions.shape)
#print(y_test)
#print(prediction_list)
plt.figure(figsize=(8, 8))
#plt.plot(x_data,y_data,'ro', x_test,y_test[:,0],'bo',alpha=0.3)
#plt.plot(x_data,y_data,'ro', x_test,y_test[:,0],'bo', x_test, y_test[:,1], 'b-', alpha=0.3)
plt.plot(x_data[:],y_data[:], 'r-', x_data[:],predictions[:],'b-')#,x_test, y_test,'bo',alpha=0.3)
plt.show()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-19-72206fc53351> in <module>()
----> 1 print(predictions.shape)
      2 #print(y_test)
      3 #print(prediction_list)
      4 plt.figure(figsize=(8, 8))
      5 #plt.plot(x_data,y_data,'ro', x_test,y_test[:,0],'bo',alpha=0.3)

NameError: name 'predictions' is not defined