CS 20SI: TensorFlow for Deep Learning Research
In the so called "stochastic" gradient descent applied in this notebook, the data is not shuffled in each iteration. So it is not stochastic gradient descent technically.
In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import xlrd
%matplotlib inline
In [2]:
FILE_NAME=r".\data\slr05.xls"
book=xlrd.open_workbook(filename=FILE_NAME,encoding_override="UTF-8")
sheet=book.sheet_by_index(0)
data=np.asarray([sheet.row_values(i) for i in range(1,sheet.nrows)])
In [3]:
with tf.name_scope("FNT"):
with tf.name_scope("Input"):
X=tf.placeholder(dtype=tf.float32,name="X")
Y=tf.placeholder(dtype=tf.float32,name="Y")
w=tf.Variable(0.0,name="w")
b=tf.Variable(0.0,name="b")
Y_pred=w*X+b
with tf.name_scope("Loss"):
#loss=tf.square(Y-Y_pred)#This works only when using stochstic gradient descent
loss=tf.reduce_mean(tf.square(Y-Y_pred),name="loss")
with tf.name_scope("Train"):
optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.00001).minimize(loss)#How it minimizes a non-scalar variable?
with tf.name_scope(name="Summary") as scope:
tf.summary.scalar("loss",loss)
summary_op=tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES,scope))
In [4]:
with tf.Session() as sess:
writer=tf.summary.FileWriter(r"./graphs",sess.graph)
sess.run(tf.global_variables_initializer())
for i in range(500):#Number of iterations
for x,y in data:
_,summary=sess.run([optimizer,summary_op],feed_dict={X:x,Y:y})
writer.add_summary(summary,global_step=i)
w_val,b_val=sess.run([w,b])
writer.close()
print("w:",w_val,"b:",b_val)
In [5]:
plt.figure(figsize=(8,5))
plt.scatter(x=data.T[0],y=data.T[1],label="data")
x_plot=np.arange(0,45,0.1)
y_plot=w_val*x_plot+b_val
plt.plot(x_plot,y_plot,label="predict",color="red")
plt.xlabel("fires per 1000 housing units")
plt.ylabel("thefts per 1000 population")
plt.legend()
plt.show()
In [6]:
#Need to use new learning rate for batch gradient descent.
optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.00001).minimize(loss)#Use new optimizer (with smaller learning rate).
with tf.Session() as sess:
writer=tf.summary.FileWriter(r"./graphs",sess.graph)
sess.run(tf.global_variables_initializer())
for i in range(1000):#Number of iterations
_,summary=sess.run([optimizer,summary_op],feed_dict={X:data.T[0],Y:data.T[1]})
writer.add_summary(summary=summary,global_step=i)
w_val,b_val=sess.run([w,b])
writer.close()
print("w:",w_val,"b:",b_val)
In [7]:
plt.figure(figsize=(8,5))
plt.scatter(x=data.T[0],y=data.T[1],label="data")
x_plot=np.arange(0,45,0.1)
y_plot=w_val*x_plot+b_val
plt.plot(x_plot,y_plot,label="predict",color="red")
plt.xlabel("fires per 1000 housing units")
plt.ylabel("thefts per 1000 population")
plt.legend()
plt.show()
In [8]:
with tf.name_scope("FNT"):
with tf.name_scope("Input"):
X=tf.placeholder(dtype=tf.float32,name="X")
Y=tf.placeholder(dtype=tf.float32,name="Y")
w3=tf.Variable(0.0,name="w3")
w2=tf.Variable(0.0,name="w2")
w1=tf.Variable(0.0,name="w1")
w0=tf.Variable(0.0,name="w0")
Y_pred=w3*X*X*X+w2*X*X+w1*X+w0
with tf.name_scope("Loss"):
loss=tf.reduce_mean(tf.square(Y-Y_pred))
with tf.name_scope("Train"):
optimizer=tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
with tf.name_scope("Summary") as scope:
tf.summary.scalar(tensor=loss,name="loss")
summary_op=tf.summary.merge(inputs=tf.get_collection(key=tf.GraphKeys.SUMMARIES,scope=scope))
In [9]:
with tf.Session() as sess:
writer=tf.summary.FileWriter(r"./graphs",sess.graph)
sess.run(tf.global_variables_initializer())
for i in range(750):#Number of iterations
for x,y in data:
sess.run(optimizer,feed_dict={X:x,Y:y})
param=sess.run([w3,w2,w1,w0])
writer.close()
for i in range(4):
print("w{}: {}".format(3-i,param[i]))
In [10]:
plt.figure(figsize=(8,5))
plt.scatter(x=data.T[0],y=data.T[1],label="data")
x_plot=np.arange(0,45,0.1)
y_plot=param[0]*(x_plot**3)+param[1]*x_plot**2+param[2]*x_plot+param[3]
plt.plot(x_plot,y_plot,label="predict",color="red")
plt.xlabel("fires per 1000 housing units")
plt.ylabel("thefts per 1000 population")
plt.legend()
plt.show()
In [11]:
from tensorflow.examples.tutorials.mnist import input_data
MNIST=input_data.read_data_sets(r"..\DataSet\mnist",one_hot=True)
In [12]:
learning_rate=0.01
batch_size=128
n_epochs=25
with tf.name_scope(name="MNIST"):
with tf.name_scope(name="MNIST_Input"):
X=tf.placeholder(dtype=tf.float32,name="X_MNIST",shape=[batch_size,784])
with tf.name_scope(name="MNIST_Ouput_Train"):
Y=tf.placeholder(dtype=tf.float32,name="Y_MNIST",shape=[batch_size,10])
W=tf.Variable(tf.random_normal(shape=[784,10],stddev=0.01),name="weight")
#b=tf.Variable(0.0,name="bias")#
#I don't think applying the same value of bias over all 10 dimensions in output layer will make a difference,
#for the reason that if an output of a dimension in the output vector is the max over all 10 dimensions,
#the ranking would not change if we apply the same bias to all 10 dimensions, thus it makes no difference in
#prediction.
b=tf.Variable(tf.truncated_normal(dtype=tf.float32,shape=[10]),name="bias")
#Tried to apply different biases to different dimensions, but it turns out
#that the model works well without bias in the output layer.
#Verified by outln=tf.matmul(X,W)
with tf.name_scope(name="MNIST_Ouput"):
outln=tf.matmul(X,W)+b
#outln=tf.matmul(X,W)#A verification of a hypothesis above.
entropy=tf.nn.softmax_cross_entropy_with_logits(logits=outln,labels=Y)
loss=tf.reduce_mean(entropy)
optimizer=tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss)
with tf.name_scope(name="MNIST_Summary") as scope:
tf.summary.scalar("MNIST_Loss",loss)
W_sum=tf.transpose(a=W)
W_sum=tf.reshape(tensor=W_sum,shape=[10,28,28,1])
tf.summary.image(tensor=W_sum,name="WeightSum",max_outputs=10)
summary_op=tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES,scope))
In [13]:
pltLossY=[]
with tf.Session() as sess:
writer=tf.summary.FileWriter(r"./graphs",sess.graph)
#Training
sess.run(tf.global_variables_initializer())
n_batches=int(MNIST.train.num_examples/batch_size)
for i in range(n_epochs):
for j in range(n_batches):
X_batch,Y_batch=MNIST.train.next_batch(batch_size=batch_size)
_,loss_val,summary=sess.run([optimizer,loss,summary_op],feed_dict={X:X_batch,Y:Y_batch})
pltLossY.append(loss_val)
writer.add_summary(summary=summary,global_step=i)
#Save model
saver=tf.train.Saver()
saver.save(sess,r"./model_checkpoints/MNIST_LR",n_epochs)
#close writer
writer.close()
#Plotting
pltLossY=np.array(pltLossY)
pltLossX=np.arange(1,np.shape(pltLossY)[0]+1,1)
plt.plot(pltLossX,pltLossY,label="loss",color="red")
pltLossX=np.array([enum[1] for enum in enumerate(pltLossX) if enum[0]%batch_size==0])
pltLossY=np.array([enum[1] for enum in enumerate(pltLossY) if enum[0]%batch_size==0])
plt.plot(pltLossX,pltLossY,label="loss",color="blue")
plt.xlabel("Batch Number")
plt.ylabel("Loss")
plt.title("Training Progress")
#plt.legend()
plt.show()
In [14]:
with tf.Session() as sess:
n_batches=int(MNIST.test.num_examples/batch_size)
total_correct_preds = 0
#Load the model(load the trianed variables)
tf.train.Saver().restore(sess,"./model_checkpoints/MNIST_LR"+"-"+str(n_epochs))
for i in range (n_batches):
X_batch,Y_batch=MNIST.test.next_batch(batch_size)
preds=sess.run(outln,feed_dict={X:X_batch,Y:Y_batch})
#preds=tf.nn.softmax(preds)
#No need for softmax to make predictions, actually. Spare computation.
correct_preds=tf.equal(tf.argmax(preds,1),tf.argmax(Y_batch,1))
batch_correct_preds=tf.reduce_sum(tf.cast(correct_preds,tf.float32))
total_correct_preds+=sess.run(batch_correct_preds)
#Print accuracy
print("Accuracy {0}".format(total_correct_preds/MNIST.test.num_examples))