In [2]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import math
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import time
import tarfile
from IPython.display import display, Image
from scipy import ndimage
from sklearn.linear_model import LogisticRegression
from six.moves.urllib.request import urlretrieve
from six.moves import cPickle as pickle
import tensorflow as tf
import datetime as dt
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.05, 'Initial learning rate')
flags.DEFINE_float('learning_rate_decay', 0.1, 'Learning rate decay, i.e. the fraction of the initial learning rate at the end of training')
flags.DEFINE_integer('max_steps', 1000, 'Number of steps to run trainer')
flags.DEFINE_float('max_loss', 0.01, 'Maximally acceptable validation MSE')
flags.DEFINE_integer('batch_size', 64*193, 'Batch size. Divides evenly into the dataset size of 193')
flags.DEFINE_integer('hidden1', 35, 'Size of the first hidden layer')
flags.DEFINE_integer('hidden2', 10, 'Size of the second hidden layer')
flags.DEFINE_integer('output_vars', 1, 'Size of the output layer')
flags.DEFINE_integer('input_vars', 6, 'Size of the input layer')
#flags.DEFINE_string('train_dir', './data/', 'Directory to put the training data') # not currently used
flags.DEFINE_string('checkpoints_dir', './checkpoints/two-layer/'+dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'Directory to store checkpoints')
flags.DEFINE_string('summaries_dir','./logs/two-layer/'+dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),'Summaries directory')
In [3]:
NSAMPLE = 10000
#x_data = np.float32(np.random.uniform(-10.5, 10.5, (1, NSAMPLE))).T
x_data = np.float32(np.arange(-15.0,15.0,30.0/NSAMPLE)).T
r_data = np.float32(np.random.normal(0,1.0, size=(NSAMPLE)))
#y_data = 10.0*np.exp(-x_data*x_data/0.1)
y_data = np.float32(np.sin(0.75*x_data)*7.0+x_data*0.5 + r_data*0.02)
#y_data_2 = np.float32(np.sin(0.5*x_data)*3.0-x_data*0.5+r_data*1.0)
#y_data = np.hstack((y_data_1, y_data_2))
plt.figure(figsize=(8, 8))
#plt.plot(x_data,y_data[:,0],'ro',x_data, y_data[:,1],'bo',alpha=0.3)
plt.plot(x_data, y_data,'r-',alpha=0.3)
plt.show()
In [34]:
NSAMPLE = 1000
x_data = np.float32(np.arange(0,1,1.0/NSAMPLE))
y_data = np.float32(x_data-x_data*x_data)
plt.figure(figsize=(8, 8))
plt.plot(x_data, y_data,'r-',alpha=0.3)
plt.show()
In [5]:
x = y_data
#x = np.asarray(range(100))
input_size = 7
#data = np.zeros(shape=(x.shape[0]-size,input_size))
data = np.zeros(shape=(x.shape[0]-input_size,input_size), dtype = np.float)
for i in range(x.shape[0]-input_size):
for j in range(input_size):
data[i,j] = x[i+j]
inputs = np.hsplit(data, np.array([6, 7]))[0]
outputs = np.hsplit(data, np.array([6, 7]))[1]
print(inputs.shape, outputs.shape)
In [18]:
def weight_variable(shape):
"""
Returns TF weight variable with given shape. The weights are normally distributed with mean = 0, stddev = 0.1
shape -- shape of the variable, i.e. [4,5] matrix of 4x5
"""
initial = tf.truncated_normal(shape, stddev = 0.1)
return tf.Variable(initial)
def bias_variable(shape):
"""
Returns TF bias variable with given shape. The biases are initially at 0.1
shape -- shape of the variable, i.e. [4] -- vector of length 4
"""
initial = tf.constant(0.1, shape = shape)
return tf.Variable(initial)
def variable_summaries(var, name):
"""
Adds multiple summaries (statistics) for a TF variable
var -- TF variable
name -- variable name
"""
mean = tf.reduce_mean(var)
tf.scalar_summary(name+'/mean', mean)
stddev = tf.reduce_mean(tf.reduce_sum(tf.square(var-mean)))
tf.scalar_summary(name+'/stddev', stddev)
_min = tf.reduce_min(var)
#tf.scalar_summary(name+'/min', _min)
_max = tf.reduce_max(var)
#tf.scalar_summary(name+'/max', _max)
tf.histogram_summary(name, var)
def nn_layer(input_tensor, input_dim, output_dim, layer_name, act = tf.tanh):
"""
Creates and returns NN layer
input_tensor -- TF tensor at layer input
input_dim -- size of layer input
output_dim -- size of layer output
layer_name -- name of the layer for summaries (statistics)
act -- nonlinear activation function
"""
with tf.name_scope(layer_name):
with tf.name_scope('weights'):
weights = weight_variable([input_dim, output_dim])
variable_summaries(weights, layer_name+'/weights')
with tf.name_scope('biases'):
biases = bias_variable([output_dim])
variable_summaries(biases, layer_name+'/biases')
with tf.name_scope('WX_plus_b'):
preactivate = tf.matmul(input_tensor, weights)+biases
tf.histogram_summary(layer_name+'/pre_activations', preactivate)
if act is not None:
activations = act(preactivate, 'activation')
else:
activations = preactivate
tf.histogram_summary(layer_name+'/activations', activations)
return activations
def run_training():
"""
Creates a NN and runs its training/running
"""
with tf.Graph().as_default():
with tf.name_scope('input'):
x = tf.placeholder(tf.float32, [None, FLAGS.input_vars], name='x-input')
y_ = tf.placeholder(tf.float32, [None, FLAGS.output_vars], name = 'y-input')
hidden_1 = nn_layer(x, FLAGS.input_vars, FLAGS.hidden1, 'layer1')
hidden_2 = nn_layer(hidden_1, FLAGS.hidden1, FLAGS.hidden2, 'layer2')
train_prediction = nn_layer(hidden_2, FLAGS.hidden2, FLAGS.output_vars, 'output', act = None)
with tf.name_scope('MSE'):
prediction_diff = train_prediction-y_
MSE = tf.cast(tf.reduce_mean(tf.reduce_mean(tf.square(prediction_diff))),tf.float32)
tf.scalar_summary('MSE', MSE)
with tf.name_scope('train'):
global_step = tf.Variable(0.00, trainable=False)
learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
global_step, FLAGS.max_steps,
FLAGS.learning_rate_decay, staircase=False)
#optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
#optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(loss, global_step=global_step)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
MSE, global_step=global_step)
merged = tf.merge_all_summaries()
init = tf.initialize_all_variables()
saver = tf.train.Saver()
sess = tf.Session()
train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir+'/train')
test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir+'/validation')
sess.run(init)
train_loss = 1
valid_loss = 1
for step in xrange(10):#xrange(FLAGS.max_steps):
start_time = time.time()
if step%100 != 0:
# regular training
#feed_dict = fill_feed_dict(train_dataset, x, y_, train = True)
feed_dict = {x:inputs, y_:outputs}
_, train_loss, lr, summary = sess.run([optimizer, MSE, learning_rate, merged], feed_dict=feed_dict)
train_writer.add_summary(summary,step)
else:
# check model fit
feed_dict = feed_dict = {x:inputs, y_:outputs}
valid_loss, summary = sess.run([MSE, merged], feed_dict = feed_dict)
test_writer.add_summary(summary,step)
duration = time.time()-start_time
print('Step %d (%d op/sec): Training MSE: %.5f, Validation MSE: %.5f' % (step, 1/duration, train_loss, valid_loss))
step_by_step = False
if step_by_step:
predictions = np.zeros(shape=[NSAMPLE, 1])
start_point = np.reshape(inputs[0,:],(1,6))
print(start_point)
for i in xrange(1,data.shape[0]):
prediction = sample_prediction.eval({sample_input: start_point})
#start_point = np.reshape(data[i,0],prediction[0,1],(1,2))
start_point[:,0] = data[i,0]
start_point[:,1] = prediction[0,1]
predictions[i] = prediction[0,1]
else:
predictions = np.zeros(shape=[NSAMPLE, ])
start_point = np.reshape(data[0,:],(1,2))
for step in range(NSAMPLE):
prediction = sample_prediction.eval({sample_input: start_point})
start_point = np.reshape(prediction[0,:],(1,2))
predictions[i] = prediction[0,1]
print('=' * 80)
#feed_dict = fill_feed_dict(test_dataset, x, y_, train = False)
#test_loss, summary = sess.run([MSE, merged], feed_dict = feed_dict)
#print('Test MSE: %.5f' % (test_loss))
#predicted_vs_actual = np.hstack((test_prediction.eval(session = sess), test_dataset.outputs))
#print("correlation coefficients: ")
#print(np.corrcoef(predicted_vs_actual[:,0],predicted_vs_actual[:,2]))
#print(np.corrcoef(predicted_vs_actual[:,1],predicted_vs_actual[:,3]))
sess.close()
run_training()
In [267]:
summary_frequency = 100
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print('Initialized')
mean_loss = 0
for step in range(num_steps):
batches = train_batches.next()
feed_dict = dict()
for i in range(num_unrollings+1):
#this_x = np.reshape(this_x,(1, FIN_SIZE))
#print("i: ",i)
#print(batches[i].shape)
feed_dict[train_data[i]] = np.reshape(batches[i],(batch_size,2))
_, l, predictions, lr = session.run(
[optimizer, loss, train_prediction, learning_rate], feed_dict=feed_dict)
mean_loss += l
if step % summary_frequency == 0:
if step > 0:
mean_loss = mean_loss / summary_frequency
# The mean loss is an estimate of the loss over the last few batches.
print('Average loss at step %d: %f learning rate: %f' % (step, mean_loss, lr))
mean_loss = 0
#if step % (summary_frequency * 10) == 0:
#print('=' * 80)
#prediction_list = []
#for _ in range(1000):
# start_point = np.reshape(y_data[0],(1,1))
# prediction = sample_prediction.eval({sample_input: start_point})
# prediction_list.append(prediction[0][0])
#print('=' * 80)
print('=' * 80)
step_by_step = False
if step_by_step:
predictions = np.zeros(shape=[NSAMPLE, 1])
start_point = np.reshape(data[0,:],(1,2))
for i in xrange(1,data.shape[0]):
prediction = sample_prediction.eval({sample_input: start_point})
#start_point = np.reshape(data[i,0],prediction[0,1],(1,2))
start_point[:,0] = data[i,0]
start_point[:,1] = prediction[0,1]
predictions[i] = prediction[0,1]
else:
predictions = np.zeros(shape=[NSAMPLE, ])
start_point = np.reshape(data[0,:],(1,2))
for step in range(NSAMPLE):
prediction = sample_prediction.eval({sample_input: start_point})
start_point = np.reshape(prediction[0,:],(1,2))
predictions[i] = prediction[0,1]
print('=' * 80)
In [19]:
print(predictions.shape)
#print(y_test)
#print(prediction_list)
plt.figure(figsize=(8, 8))
#plt.plot(x_data,y_data,'ro', x_test,y_test[:,0],'bo',alpha=0.3)
#plt.plot(x_data,y_data,'ro', x_test,y_test[:,0],'bo', x_test, y_test[:,1], 'b-', alpha=0.3)
plt.plot(x_data[:],y_data[:], 'r-', x_data[:],predictions[:],'b-')#,x_test, y_test,'bo',alpha=0.3)
plt.show()