Linear and Logistic Regression in TensorFlow

Dataset

Fire and Theft in Chicago

Reference

CS 20SI: TensorFlow for Deep Learning Research

Notice

In the so called "stochastic" gradient descent applied in this notebook, the data is not shuffled in each iteration. So it is not stochastic gradient descent technically.

Import corresponding libraries


In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import xlrd
%matplotlib inline

Read dataset


In [2]:
FILE_NAME=r".\data\slr05.xls"
book=xlrd.open_workbook(filename=FILE_NAME,encoding_override="UTF-8")
sheet=book.sheet_by_index(0)
data=np.asarray([sheet.row_values(i) for i in range(1,sheet.nrows)])

Create graph


In [3]:
with tf.name_scope("FNT"):
    with tf.name_scope("Input"):
        X=tf.placeholder(dtype=tf.float32,name="X")
        Y=tf.placeholder(dtype=tf.float32,name="Y")
    w=tf.Variable(0.0,name="w")
    b=tf.Variable(0.0,name="b")
    Y_pred=w*X+b
    with tf.name_scope("Loss"):
        #loss=tf.square(Y-Y_pred)#This works only when using stochstic gradient descent
        loss=tf.reduce_mean(tf.square(Y-Y_pred),name="loss")
    with tf.name_scope("Train"):
        optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.00001).minimize(loss)#How it minimizes a non-scalar variable?
    with tf.name_scope(name="Summary") as scope:
        tf.summary.scalar("loss",loss)
        summary_op=tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES,scope))

Train $w$, $b$ with stochastic gradient descent in a session


In [4]:
with tf.Session() as sess:
    writer=tf.summary.FileWriter(r"./graphs",sess.graph)
    sess.run(tf.global_variables_initializer())
    for i in range(500):#Number of iterations
        for x,y in data:
            _,summary=sess.run([optimizer,summary_op],feed_dict={X:x,Y:y})
            writer.add_summary(summary,global_step=i)
    w_val,b_val=sess.run([w,b])
    writer.close()
print("w:",w_val,"b:",b_val)


w: 2.03623 b: 2.48112

Plot and evaluate fitting


In [5]:
plt.figure(figsize=(8,5))
plt.scatter(x=data.T[0],y=data.T[1],label="data")
x_plot=np.arange(0,45,0.1)
y_plot=w_val*x_plot+b_val
plt.plot(x_plot,y_plot,label="predict",color="red")
plt.xlabel("fires per 1000 housing units")
plt.ylabel("thefts per 1000 population")
plt.legend()
plt.show()


Train $w$, $b$ with batch gradient descent in another session


In [6]:
#Need to use new learning rate for batch gradient descent.
optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.00001).minimize(loss)#Use new optimizer (with smaller learning rate).
with tf.Session() as sess:
    writer=tf.summary.FileWriter(r"./graphs",sess.graph)
    sess.run(tf.global_variables_initializer())
    for i in range(1000):#Number of iterations
        _,summary=sess.run([optimizer,summary_op],feed_dict={X:data.T[0],Y:data.T[1]})
        writer.add_summary(summary=summary,global_step=i)
    w_val,b_val=sess.run([w,b])
    writer.close()
print("w:",w_val,"b:",b_val)


w: 2.14436 b: 0.229683

Plot and evaluate fitting


In [7]:
plt.figure(figsize=(8,5))
plt.scatter(x=data.T[0],y=data.T[1],label="data")
x_plot=np.arange(0,45,0.1)
y_plot=w_val*x_plot+b_val
plt.plot(x_plot,y_plot,label="predict",color="red")
plt.xlabel("fires per 1000 housing units")
plt.ylabel("thefts per 1000 population")
plt.legend()
plt.show()


Create new graph for regression


In [8]:
with tf.name_scope("FNT"):
    with tf.name_scope("Input"):
        X=tf.placeholder(dtype=tf.float32,name="X")
        Y=tf.placeholder(dtype=tf.float32,name="Y")
    w3=tf.Variable(0.0,name="w3")
    w2=tf.Variable(0.0,name="w2")
    w1=tf.Variable(0.0,name="w1")
    w0=tf.Variable(0.0,name="w0")
    Y_pred=w3*X*X*X+w2*X*X+w1*X+w0
    with tf.name_scope("Loss"):
        loss=tf.reduce_mean(tf.square(Y-Y_pred))
    with tf.name_scope("Train"):
        optimizer=tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
    with tf.name_scope("Summary") as scope:
        tf.summary.scalar(tensor=loss,name="loss")
        summary_op=tf.summary.merge(inputs=tf.get_collection(key=tf.GraphKeys.SUMMARIES,scope=scope))

Train $w_{3}$, $w_{2}$, $w_{1}$, $w_{0}$ with stochastic gradient descent in a session


In [9]:
with tf.Session() as sess:
    writer=tf.summary.FileWriter(r"./graphs",sess.graph)
    sess.run(tf.global_variables_initializer())
    for i in range(750):#Number of iterations
        for x,y in data:
            sess.run(optimizer,feed_dict={X:x,Y:y})
    param=sess.run([w3,w2,w1,w0])
    writer.close()
for i in range(4):
    print("w{}: {}".format(3-i,param[i]))


w3: 0.00663035549223423
w2: -0.1510164737701416
w1: 1.8178060054779053
w0: 10.240422248840332

Plot and evaluate fitting.


In [10]:
plt.figure(figsize=(8,5))
plt.scatter(x=data.T[0],y=data.T[1],label="data")
x_plot=np.arange(0,45,0.1)
y_plot=param[0]*(x_plot**3)+param[1]*x_plot**2+param[2]*x_plot+param[3]
plt.plot(x_plot,y_plot,label="predict",color="red")
plt.xlabel("fires per 1000 housing units")
plt.ylabel("thefts per 1000 population")
plt.legend()
plt.show()


Import MNIST dataset


In [11]:
from tensorflow.examples.tutorials.mnist import input_data
MNIST=input_data.read_data_sets(r"..\DataSet\mnist",one_hot=True)


Extracting ..\DataSet\mnist\train-images-idx3-ubyte.gz
Extracting ..\DataSet\mnist\train-labels-idx1-ubyte.gz
Extracting ..\DataSet\mnist\t10k-images-idx3-ubyte.gz
Extracting ..\DataSet\mnist\t10k-labels-idx1-ubyte.gz

Create graph


In [12]:
learning_rate=0.01
batch_size=128
n_epochs=25
with tf.name_scope(name="MNIST"):
    with tf.name_scope(name="MNIST_Input"):
        X=tf.placeholder(dtype=tf.float32,name="X_MNIST",shape=[batch_size,784])
    with tf.name_scope(name="MNIST_Ouput_Train"):
        Y=tf.placeholder(dtype=tf.float32,name="Y_MNIST",shape=[batch_size,10])
    W=tf.Variable(tf.random_normal(shape=[784,10],stddev=0.01),name="weight")
    
    #b=tf.Variable(0.0,name="bias")#
    #I don't think applying the same value of bias over all 10 dimensions in output layer will make a difference, 
    #for the reason that if an output of a dimension in the output vector is the max over all 10 dimensions,
    #the ranking would not change if we apply the same bias to all 10 dimensions, thus it makes no difference in 
    #prediction.
    b=tf.Variable(tf.truncated_normal(dtype=tf.float32,shape=[10]),name="bias")
    #Tried to apply different biases to different dimensions, but it turns out 
    #that the model works well without bias in the output layer.
    
    #Verified by outln=tf.matmul(X,W)
    with tf.name_scope(name="MNIST_Ouput"):
        outln=tf.matmul(X,W)+b
        #outln=tf.matmul(X,W)#A verification of a hypothesis above.
    entropy=tf.nn.softmax_cross_entropy_with_logits(logits=outln,labels=Y)
    loss=tf.reduce_mean(entropy)
    optimizer=tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss)
    with tf.name_scope(name="MNIST_Summary") as scope:
        tf.summary.scalar("MNIST_Loss",loss)
        W_sum=tf.transpose(a=W)
        W_sum=tf.reshape(tensor=W_sum,shape=[10,28,28,1])
        tf.summary.image(tensor=W_sum,name="WeightSum",max_outputs=10)
        summary_op=tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES,scope))

Training


In [13]:
pltLossY=[]
with tf.Session() as sess:
    writer=tf.summary.FileWriter(r"./graphs",sess.graph)
    #Training
    sess.run(tf.global_variables_initializer())
    n_batches=int(MNIST.train.num_examples/batch_size)
    for i in range(n_epochs):
        for j in range(n_batches):
            X_batch,Y_batch=MNIST.train.next_batch(batch_size=batch_size)
            _,loss_val,summary=sess.run([optimizer,loss,summary_op],feed_dict={X:X_batch,Y:Y_batch})
            pltLossY.append(loss_val)
        writer.add_summary(summary=summary,global_step=i)
    #Save model
    saver=tf.train.Saver()
    saver.save(sess,r"./model_checkpoints/MNIST_LR",n_epochs)
    #close writer
    writer.close()
#Plotting
pltLossY=np.array(pltLossY)
pltLossX=np.arange(1,np.shape(pltLossY)[0]+1,1)
plt.plot(pltLossX,pltLossY,label="loss",color="red")
pltLossX=np.array([enum[1] for enum in enumerate(pltLossX) if enum[0]%batch_size==0])
pltLossY=np.array([enum[1] for enum in enumerate(pltLossY) if enum[0]%batch_size==0])
plt.plot(pltLossX,pltLossY,label="loss",color="blue")
plt.xlabel("Batch Number")
plt.ylabel("Loss")
plt.title("Training Progress")
#plt.legend()
plt.show()


Testing


In [14]:
with tf.Session() as sess:
    n_batches=int(MNIST.test.num_examples/batch_size)
    total_correct_preds = 0
    #Load the model(load the trianed variables)
    tf.train.Saver().restore(sess,"./model_checkpoints/MNIST_LR"+"-"+str(n_epochs))
    for i in range (n_batches):
        X_batch,Y_batch=MNIST.test.next_batch(batch_size)
        preds=sess.run(outln,feed_dict={X:X_batch,Y:Y_batch})
        #preds=tf.nn.softmax(preds)
        #No need for softmax to make predictions, actually. Spare computation. 
        correct_preds=tf.equal(tf.argmax(preds,1),tf.argmax(Y_batch,1))
        batch_correct_preds=tf.reduce_sum(tf.cast(correct_preds,tf.float32))
        total_correct_preds+=sess.run(batch_correct_preds)
#Print accuracy
print("Accuracy {0}".format(total_correct_preds/MNIST.test.num_examples))


Accuracy 0.9106