CSCI 5622 Fall 2017 HW#3

Brian McKean

Code



In [1]:

    
import matplotlib.pyplot as plt
import json
import numpy as np
plt.style.use('ggplot')
%matplotlib inline
from collections import defaultdict
import seaborn as sns
import pandas as pd



In [2]:

    
import seaborn as sns
from pydataset import data



In [3]:

    
import numpy as np
import pickle, gzip
import matplotlib.pyplot as plt



In [4]:

    
class Network:
    def __init__(self, sizes):
        self.L = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(n, 1) for n in self.sizes[1:]]
        self.weights = [np.random.randn(n, m) for (m,n) in zip(self.sizes[:-1], self.sizes[1:])]
        self.epoch_results = []

    def g(self, z):
        """
        activation function
        """
        return sigmoid(z)

    def g_prime(self, z):
        """
        derivative of activation function
        """
        return sigmoid_prime(z)

    def forward_prop(self, a):
        """
        memory aware forward propagation for testing
        only.  back_prop implements it's own forward_prop
        """
        for (W,b) in zip(self.weights, self.biases):
            a = self.g(np.dot(W, a) + b)
        return a

    def gradC(self, a, y):
        """
        gradient of cost function
        Assumes C(a,y) = (a-y)^2/2
        """
        return (a - y)
    
    def SGD_train(self, train, epochs, eta, lam=0.0, verbose=True, test=None):
        """
        SGD for training parameters
        epochs is the number of epocs to run
        eta is the learning rate
        lam is the regularization parameter
        If verbose is set will print progressive accuracy updates
        If test set is provided, routine will print accuracy on test set as learning evolves
        """
        n_train = len(train)
        #print ("train shape", np.array(train).shape)
        for epoch in range(epochs):
            perm = np.random.permutation(n_train)
            for kk in range(n_train):
                xk = train[perm[kk]][0]
                yk = train[perm[kk]][1]
                #print ("xk, yK", np.array(xk.shape), np.array(yk).shape)
                dWs, dbs = self.back_prop(xk, yk)
                # TODO: Add L2-regularization
                #self.weights = [W - eta*dW - lam*W  for (W, dW) in zip(self.weights, dWs)]
                self.weights = [W - eta*(dW  + lam*W) for (W, dW) in zip(self.weights, dWs)]
                self.biases = [b - eta*db  for (b, db) in zip(self.biases, dbs)]
            if verbose:
                if epoch==0 or (epoch + 1) % 15 == 0:
                    acc_train = self.evaluate(train)
                    if test is not None:
                        acc_test = self.evaluate(test)
                        print("Epoch {:4d}: Train {:10.5f}, Test {:10.5f}".format(epoch+1, acc_train, acc_test))
                        self.epoch_results.append((epoch+1, acc_train, acc_test))
                    else:
                        print("Epoch {:4d}: Train {:10.5f}".format(epoch+1, acc_train))
                        elf.epoch_results.append((epoch+1, acc_train))
        return acc_train

    def back_prop(self, x, y):
        """
        Back propagation for derivatives of C wrt parameters
        """
        db_list = [np.zeros(b.shape) for b in self.biases]
        dW_list = [np.zeros(W.shape) for W in self.weights]


        a = x
        a_list = [a]
        z_list = [np.zeros(a.shape)] # Pad with throwaway so indices match
        for W, b in zip(self.weights, self.biases):
            #print ("here", np.array(W).shape, np.array( a).shape, np.array(b).shape, "done")
            z = np.dot(W, a) + b
            z_list.append(z)
            a = self.g(z)
            a_list.append(a)

        # Back propagate deltas to compute derivatives
        # TODO delta  =
        # Last layer
        delta  =  self.g_prime(z) * self.gradC(a, y)
        #print (a, delta, np.ones((len(a),1)), np.ones(len(a)) - a , a-y)
        #print (a.shape, delta.shape, np.ones((len(a),1)).shape, (np.ones(len(a)) - a).shape , (a-y).shape )
        for ell in range(self.L-2,-1,-1):
            #print ("Ell", ell)
            # TODO db_list[ell] =
            #print (db_list[ell].shape, db_list[ell])
            db_list[ell] = delta
            #print (db_list[ell].shape, db_list[ell])
            # TODO dW_list[ell] =
            #print (dW_list[ell].shape, dW_list[ell])
            #dW_list[ell] = delta *  a_list[ell].T
            dW_list[ell] = delta.dot( a_list[ell].T )
            #print ("dW shape", dW_list[ell].shape, "dW", dW_list[ell], "W", self.weights)
            # TODO delta = self.weights[ell].T*delta * h
            #delta = self.weights[ell].T*delta* (a_list[ell] - y)
            delta = np.dot(self.weights[ell].T, delta )*( self.g_prime(z_list[ell]))

            #print (delta.shape, delta)
        #exit(0)
        return (dW_list, db_list)

    def evaluate(self, test):
        """
        Evaluate current model on labeled test data
        """
        ctr = 0
        for x, y in test:
            yhat = self.forward_prop(x)
            ctr += np.argmax(yhat) == np.argmax(y)
        return float(ctr) / float(len(test))

    def compute_cost(self, x, y):
        """
        Evaluate the cost function for a specified
        training example.
        """
        a = self.forward_prop(x)
        return 0.5*np.linalg.norm(a-y)**2
    
    def get_epoch_results(self):
        return self.epoch_results



In [5]:

    
def sigmoid(z, threshold=20):
    z = np.clip(z, -threshold, threshold)
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z) * (1.0 - sigmoid(z))

def mnist_digit_show(flatimage, outname=None):

    import matplotlib.pyplot as plt

    image = np.reshape(flatimage, (-1,14))

    plt.matshow(image, cmap=plt.cm.binary)
    plt.xticks([])
    plt.yticks([])
    if outname:
        plt.savefig(outname)
    else:
        plt.show()



In [6]:

    
#f = gzip.open('../data/tinyTOY.pkl.gz', 'rb') # change path to ../data/tinyMNIST.pkl.gz after debugging
f = gzip.open('../data/tinyMNIST.pkl.gz', 'rb')
u = pickle._Unpickler(f)
u.encoding = 'latin1'
train, test = u.load()

#nn = Network([2,30,2])
nn = Network([196,30,10])
nn.SGD_train(train, epochs=100, eta=0.01, lam=0.001, verbose=True, test=test)









    



Epoch    1: Train    0.09444, Test    0.07843
Epoch   15: Train    0.60064, Test    0.52381
Epoch   30: Train    0.80072, Test    0.69348
Epoch   45: Train    0.84354, Test    0.77351
Epoch   60: Train    0.86635, Test    0.80152
Epoch   75: Train    0.86955, Test    0.80752
Epoch   90: Train    0.87955, Test    0.80912






    Out[6]:





0.8795518207282913



In [7]:

    
### Plot accuracy versus hidden layer



In [8]:

    
acc_results_nodes = []
acc_results_acc_test = []
acc_results_acc_train = []



In [9]:

    
for num_nodes in range (10, 41, 5):
    print ("Num nodes", num_nodes)
    nn = Network([196,num_nodes,10])
    acc_results_acc_train.append(nn.SGD_train(train, epochs=100, eta=0.01, lam=0.001, verbose=True, test=test))
    acc_results_nodes.append(num_nodes)
    acc_results_acc_test.append(nn.evaluate(test))









    



Num nodes 10
Epoch    1: Train    0.12925, Test    0.11685
Epoch   15: Train    0.34814, Test    0.33573
Epoch   30: Train    0.53341, Test    0.51461
Epoch   45: Train    0.61985, Test    0.58703
Epoch   60: Train    0.76991, Test    0.70468
Epoch   75: Train    0.82833, Test    0.75350
Epoch   90: Train    0.85914, Test    0.77911
Num nodes 15
Epoch    1: Train    0.12725, Test    0.08283
Epoch   15: Train    0.50300, Test    0.46899
Epoch   30: Train    0.65746, Test    0.60864
Epoch   45: Train    0.80912, Test    0.71989
Epoch   60: Train    0.85034, Test    0.76911
Epoch   75: Train    0.86595, Test    0.79672
Epoch   90: Train    0.87155, Test    0.80712
Num nodes 20
Epoch    1: Train    0.17047, Test    0.19328
Epoch   15: Train    0.62265, Test    0.56663
Epoch   30: Train    0.79192, Test    0.72709
Epoch   45: Train    0.84714, Test    0.78752
Epoch   60: Train    0.86234, Test    0.80232
Epoch   75: Train    0.87075, Test    0.80912
Epoch   90: Train    0.87675, Test    0.81232
Num nodes 25
Epoch    1: Train    0.15086, Test    0.13325
Epoch   15: Train    0.64746, Test    0.56823
Epoch   30: Train    0.79512, Test    0.72629
Epoch   45: Train    0.84234, Test    0.77431
Epoch   60: Train    0.86515, Test    0.79112
Epoch   75: Train    0.87435, Test    0.80512
Epoch   90: Train    0.87955, Test    0.80912
Num nodes 30
Epoch    1: Train    0.15526, Test    0.13165
Epoch   15: Train    0.57183, Test    0.49940
Epoch   30: Train    0.71949, Test    0.62945
Epoch   45: Train    0.84954, Test    0.77631
Epoch   60: Train    0.86795, Test    0.79552
Epoch   75: Train    0.87635, Test    0.80672
Epoch   90: Train    0.88155, Test    0.80992
Num nodes 35
Epoch    1: Train    0.16006, Test    0.11685
Epoch   15: Train    0.58183, Test    0.52141
Epoch   30: Train    0.80592, Test    0.73830
Epoch   45: Train    0.84994, Test    0.78752
Epoch   60: Train    0.86795, Test    0.80352
Epoch   75: Train    0.87475, Test    0.80472
Epoch   90: Train    0.88035, Test    0.81192
Num nodes 40
Epoch    1: Train    0.19568, Test    0.12965
Epoch   15: Train    0.66707, Test    0.56863
Epoch   30: Train    0.80912, Test    0.73149
Epoch   45: Train    0.84554, Test    0.78031
Epoch   60: Train    0.86515, Test    0.79552
Epoch   75: Train    0.87715, Test    0.80792
Epoch   90: Train    0.87835, Test    0.81112



In [10]:

    
print (acc_results_nodes, acc_results_acc_train, acc_results_acc_test)









    



[10, 15, 20, 25, 30, 35, 40] [0.8591436574629852, 0.8715486194477791, 0.876750700280112, 0.8795518207282913, 0.8815526210484194, 0.8803521408563425, 0.8783513405362144] [0.7943177270908364, 0.8119247699079631, 0.8159263705482193, 0.8107242897158864, 0.8115246098439376, 0.8135254101640657, 0.8103241296518607]



In [11]:

    
acc_results_nodes = np.array(acc_results_nodes)
acc_results_acc_train = np.array(acc_results_acc_train)
acc_results_acc_test = np.array(acc_results_acc_test)



In [12]:

    
plt.clf()
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
sns.set_style("whitegrid")

x = acc_results_nodes
y2 = acc_results_acc_test
y1 = acc_results_acc_train


line, = ax.plot(x, y1 , lw=2, label=['Train Accuracy'])
line, = ax.plot(x, y2 , lw=2, label=['Test Accuracy'])





ax.set_xlabel('Number of Nodes in Hidden layer')
ax.set_ylabel('Accuracy')

      
ax.set_ylim(0,1.00)



plt.title('HW3 1.2 Accuracy versus number of hidden nodes (with 100 epochs)')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.show()
plt.clf()
plt.close(fig)









    





<matplotlib.figure.Figure at 0x1152ee630>






    












    





<matplotlib.figure.Figure at 0x115898748>



In [17]:

    
plt.clf()
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
sns.set_style("whitegrid")

x = acc_results_nodes
y2 = acc_results_acc_test
y1 = acc_results_acc_train


line, = ax.plot(x, y1 , lw=2, label=['Train Accuracy'])
line, = ax.plot(x, y2 , lw=2, label=['Test Accuracy'])





ax.set_xlabel('Number of Nodes in Hidden layer')
ax.set_ylabel('Accuracy')

      
ax.set_ylim(0.75,1.00)



plt.title('HW3 1.2 Accuracy versus number of hidden nodes (with 100 epochs)')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.show()
plt.clf()
plt.close(fig)









    





<matplotlib.figure.Figure at 0x115a20748>






    












    





<matplotlib.figure.Figure at 0x1158a6c50>



In [19]:

    
nn = Network([196,30,10])
nn.SGD_train(train, epochs=100, eta=0.01, lam=0.001, verbose=True, test=test)









    



Epoch    1: Train    0.13365, Test    0.14606
Epoch   15: Train    0.48179, Test    0.43778
Epoch   30: Train    0.79112, Test    0.71749
Epoch   45: Train    0.83393, Test    0.77431
Epoch   60: Train    0.86395, Test    0.79472
Epoch   75: Train    0.87195, Test    0.81192
Epoch   90: Train    0.87795, Test    0.81553






    Out[19]:





0.8779511804721889



In [20]:

    
epoch_results = nn.get_epoch_results()
x_epochs = []
y_test_acc = []
y_train_acc = []
for x, y1, y2 in epoch_results:
    x_epochs.append(x)
    y_train_acc.append(y1)
    y_test_acc.append(y2)
    
y30_test = list(y_test_acc)
y30_train = list(y_train_acc)



In [21]:

    
nn = Network([196,20,10])
nn.SGD_train(train, epochs=100, eta=0.01, lam=0.001, verbose=True, test=test)









    



Epoch    1: Train    0.12645, Test    0.14326
Epoch   15: Train    0.49300, Test    0.44378
Epoch   30: Train    0.74070, Test    0.68868
Epoch   45: Train    0.81353, Test    0.74870
Epoch   60: Train    0.84594, Test    0.78631
Epoch   75: Train    0.86715, Test    0.79952
Epoch   90: Train    0.87435, Test    0.80752






    Out[21]:





0.8743497398959584



In [22]:

    
epoch_results = nn.get_epoch_results()
x_epochs = []
y_test_acc = []
y_train_acc = []
for x, y1, y2 in epoch_results:
    x_epochs.append(x)
    y_train_acc.append(y1)
    y_test_acc.append(y2)
    
y20_test = y_test_acc
y20_train = y_train_acc



In [23]:

    
plt.clf()
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
sns.set_style("whitegrid")

x = x_epochs


line1, = ax.plot(x, y30_train , lw=2, label=['Train Acc - 30 nodes'])


line2, = ax.plot(x, y30_test , lw=2, label=['Test Acc - 30 nodes'])


line4, = ax.plot(x, y20_train , lw=2, label=['Train Acc - 20 nodes'])


line5, = ax.plot(x, y20_test , lw=2, label=['Test Acc - 20 nodes'])

ax.set_ylim(0,1.00)

ax.set_xlabel('Number of Epochs')
ax.set_ylabel('Accuracy')


plt.title('HW3 1.2 Accuracy versus epochs with 20 and 30 hidden nodes')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.show()
plt.clf()
plt.close(fig)









    





<matplotlib.figure.Figure at 0x115aed4a8>






    












    





<matplotlib.figure.Figure at 0x115a289e8>



In [25]:

    
results = pd.read_csv('rnn_tests.csv')



In [29]:

    
print results









    Out[29]:







  
    
      
      num_words
      example_len
      batch_size
      embedding_len
      lstm_units_1
      Best Accuracy
      Epoch of 3 for best
    
  
  
    
      0
      5000
      128
      32
      64
      128
      0.8624
      2
    
    
      1
      5000
      256
      32
      64
      128
      0.8676
      2
    
    
      2
      5000
      512
      32
      64
      128
      0.8664
      2
    
    
      3
      5000
      768
      32
      64
      128
      0.8671
      3
    
    
      4
      5000
      1024
      32
      64
      128
      0.8682
      2
    
    
      5
      1000
      512
      32
      64
      128
      0.8490
      3
    
    
      6
      2500
      512
      32
      64
      128
      0.8622
      2
    
    
      7
      5000
      512
      32
      64
      128
      0.8707
      3
    
    
      8
      7500
      512
      32
      64
      128
      0.8613
      3
    
    
      9
      10000
      512
      32
      64
      128
      0.8519
      3
    
    
      10
      5000
      512
      32
      16
      128
      0.8571
      3
    
    
      11
      5000
      512
      32
      32
      128
      0.8608
      3
    
    
      12
      5000
      512
      32
      64
      128
      0.8659
      3
    
    
      13
      5000
      512
      32
      128
      128
      0.8715
      3
    
    
      14
      5000
      512
      32
      256
      128
      0.8744
      2
    
    
      15
      5000
      512
      16
      64
      128
      0.8535
      3
    
    
      16
      5000
      512
      32
      64
      128
      0.8525
      3
    
    
      17
      5000
      512
      64
      64
      128
      0.8532
      1
    
    
      18
      5000
      512
      128
      64
      128
      0.8712
      2
    
    
      19
      5000
      512
      256
      64
      128
      0.8792
      2
    
    
      20
      5000
      512
      32
      64
      64
      0.8687
      2
    
    
      21
      5000
      512
      32
      64
      96
      0.8616
      3
    
    
      22
      5000
      512
      32
      64
      128
      0.8739
      2
    
    
      23
      5000
      512
      32
      64
      192
      0.8058
      1
    
    
      24
      5000
      512
      32
      64
      256
      0.8758
      3



In [ ]:

	num_words	example_len	batch_size	embedding_len	lstm_units_1	Best Accuracy	Epoch of 3 for best
0	5000	128	32	64	128	0.8624	2
1	5000	256	32	64	128	0.8676	2
2	5000	512	32	64	128	0.8664	2
3	5000	768	32	64	128	0.8671	3
4	5000	1024	32	64	128	0.8682	2
5	1000	512	32	64	128	0.8490	3
6	2500	512	32	64	128	0.8622	2
7	5000	512	32	64	128	0.8707	3
8	7500	512	32	64	128	0.8613	3
9	10000	512	32	64	128	0.8519	3
10	5000	512	32	16	128	0.8571	3
11	5000	512	32	32	128	0.8608	3
12	5000	512	32	64	128	0.8659	3
13	5000	512	32	128	128	0.8715	3
14	5000	512	32	256	128	0.8744	2
15	5000	512	16	64	128	0.8535	3
16	5000	512	32	64	128	0.8525	3
17	5000	512	64	64	128	0.8532	1
18	5000	512	128	64	128	0.8712	2
19	5000	512	256	64	128	0.8792	2
20	5000	512	32	64	64	0.8687	2
21	5000	512	32	64	96	0.8616	3
22	5000	512	32	64	128	0.8739	2
23	5000	512	32	64	192	0.8058	1
24	5000	512	32	64	256	0.8758	3