In [1]:
import matplotlib.pyplot as plt
import json
import numpy as np
plt.style.use('ggplot')
%matplotlib inline
from collections import defaultdict
import seaborn as sns
import pandas as pd
In [2]:
import seaborn as sns
from pydataset import data
In [3]:
import numpy as np
import pickle, gzip
import matplotlib.pyplot as plt
In [4]:
class Network:
def __init__(self, sizes):
self.L = len(sizes)
self.sizes = sizes
self.biases = [np.random.randn(n, 1) for n in self.sizes[1:]]
self.weights = [np.random.randn(n, m) for (m,n) in zip(self.sizes[:-1], self.sizes[1:])]
self.epoch_results = []
def g(self, z):
"""
activation function
"""
return sigmoid(z)
def g_prime(self, z):
"""
derivative of activation function
"""
return sigmoid_prime(z)
def forward_prop(self, a):
"""
memory aware forward propagation for testing
only. back_prop implements it's own forward_prop
"""
for (W,b) in zip(self.weights, self.biases):
a = self.g(np.dot(W, a) + b)
return a
def gradC(self, a, y):
"""
gradient of cost function
Assumes C(a,y) = (a-y)^2/2
"""
return (a - y)
def SGD_train(self, train, epochs, eta, lam=0.0, verbose=True, test=None):
"""
SGD for training parameters
epochs is the number of epocs to run
eta is the learning rate
lam is the regularization parameter
If verbose is set will print progressive accuracy updates
If test set is provided, routine will print accuracy on test set as learning evolves
"""
n_train = len(train)
#print ("train shape", np.array(train).shape)
for epoch in range(epochs):
perm = np.random.permutation(n_train)
for kk in range(n_train):
xk = train[perm[kk]][0]
yk = train[perm[kk]][1]
#print ("xk, yK", np.array(xk.shape), np.array(yk).shape)
dWs, dbs = self.back_prop(xk, yk)
# TODO: Add L2-regularization
#self.weights = [W - eta*dW - lam*W for (W, dW) in zip(self.weights, dWs)]
self.weights = [W - eta*(dW + lam*W) for (W, dW) in zip(self.weights, dWs)]
self.biases = [b - eta*db for (b, db) in zip(self.biases, dbs)]
if verbose:
if epoch==0 or (epoch + 1) % 15 == 0:
acc_train = self.evaluate(train)
if test is not None:
acc_test = self.evaluate(test)
print("Epoch {:4d}: Train {:10.5f}, Test {:10.5f}".format(epoch+1, acc_train, acc_test))
self.epoch_results.append((epoch+1, acc_train, acc_test))
else:
print("Epoch {:4d}: Train {:10.5f}".format(epoch+1, acc_train))
elf.epoch_results.append((epoch+1, acc_train))
return acc_train
def back_prop(self, x, y):
"""
Back propagation for derivatives of C wrt parameters
"""
db_list = [np.zeros(b.shape) for b in self.biases]
dW_list = [np.zeros(W.shape) for W in self.weights]
a = x
a_list = [a]
z_list = [np.zeros(a.shape)] # Pad with throwaway so indices match
for W, b in zip(self.weights, self.biases):
#print ("here", np.array(W).shape, np.array( a).shape, np.array(b).shape, "done")
z = np.dot(W, a) + b
z_list.append(z)
a = self.g(z)
a_list.append(a)
# Back propagate deltas to compute derivatives
# TODO delta =
# Last layer
delta = self.g_prime(z) * self.gradC(a, y)
#print (a, delta, np.ones((len(a),1)), np.ones(len(a)) - a , a-y)
#print (a.shape, delta.shape, np.ones((len(a),1)).shape, (np.ones(len(a)) - a).shape , (a-y).shape )
for ell in range(self.L-2,-1,-1):
#print ("Ell", ell)
# TODO db_list[ell] =
#print (db_list[ell].shape, db_list[ell])
db_list[ell] = delta
#print (db_list[ell].shape, db_list[ell])
# TODO dW_list[ell] =
#print (dW_list[ell].shape, dW_list[ell])
#dW_list[ell] = delta * a_list[ell].T
dW_list[ell] = delta.dot( a_list[ell].T )
#print ("dW shape", dW_list[ell].shape, "dW", dW_list[ell], "W", self.weights)
# TODO delta = self.weights[ell].T*delta * h
#delta = self.weights[ell].T*delta* (a_list[ell] - y)
delta = np.dot(self.weights[ell].T, delta )*( self.g_prime(z_list[ell]))
#print (delta.shape, delta)
#exit(0)
return (dW_list, db_list)
def evaluate(self, test):
"""
Evaluate current model on labeled test data
"""
ctr = 0
for x, y in test:
yhat = self.forward_prop(x)
ctr += np.argmax(yhat) == np.argmax(y)
return float(ctr) / float(len(test))
def compute_cost(self, x, y):
"""
Evaluate the cost function for a specified
training example.
"""
a = self.forward_prop(x)
return 0.5*np.linalg.norm(a-y)**2
def get_epoch_results(self):
return self.epoch_results
In [5]:
def sigmoid(z, threshold=20):
z = np.clip(z, -threshold, threshold)
return 1.0/(1.0+np.exp(-z))
def sigmoid_prime(z):
return sigmoid(z) * (1.0 - sigmoid(z))
def mnist_digit_show(flatimage, outname=None):
import matplotlib.pyplot as plt
image = np.reshape(flatimage, (-1,14))
plt.matshow(image, cmap=plt.cm.binary)
plt.xticks([])
plt.yticks([])
if outname:
plt.savefig(outname)
else:
plt.show()
In [6]:
#f = gzip.open('../data/tinyTOY.pkl.gz', 'rb') # change path to ../data/tinyMNIST.pkl.gz after debugging
f = gzip.open('../data/tinyMNIST.pkl.gz', 'rb')
u = pickle._Unpickler(f)
u.encoding = 'latin1'
train, test = u.load()
#nn = Network([2,30,2])
nn = Network([196,30,10])
nn.SGD_train(train, epochs=100, eta=0.01, lam=0.001, verbose=True, test=test)
Out[6]:
In [7]:
### Plot accuracy versus hidden layer
In [8]:
acc_results_nodes = []
acc_results_acc_test = []
acc_results_acc_train = []
In [9]:
for num_nodes in range (10, 41, 5):
print ("Num nodes", num_nodes)
nn = Network([196,num_nodes,10])
acc_results_acc_train.append(nn.SGD_train(train, epochs=100, eta=0.01, lam=0.001, verbose=True, test=test))
acc_results_nodes.append(num_nodes)
acc_results_acc_test.append(nn.evaluate(test))
In [10]:
print (acc_results_nodes, acc_results_acc_train, acc_results_acc_test)
In [11]:
acc_results_nodes = np.array(acc_results_nodes)
acc_results_acc_train = np.array(acc_results_acc_train)
acc_results_acc_test = np.array(acc_results_acc_test)
In [12]:
plt.clf()
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
sns.set_style("whitegrid")
x = acc_results_nodes
y2 = acc_results_acc_test
y1 = acc_results_acc_train
line, = ax.plot(x, y1 , lw=2, label=['Train Accuracy'])
line, = ax.plot(x, y2 , lw=2, label=['Test Accuracy'])
ax.set_xlabel('Number of Nodes in Hidden layer')
ax.set_ylabel('Accuracy')
ax.set_ylim(0,1.00)
plt.title('HW3 1.2 Accuracy versus number of hidden nodes (with 100 epochs)')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.show()
plt.clf()
plt.close(fig)
In [17]:
plt.clf()
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
sns.set_style("whitegrid")
x = acc_results_nodes
y2 = acc_results_acc_test
y1 = acc_results_acc_train
line, = ax.plot(x, y1 , lw=2, label=['Train Accuracy'])
line, = ax.plot(x, y2 , lw=2, label=['Test Accuracy'])
ax.set_xlabel('Number of Nodes in Hidden layer')
ax.set_ylabel('Accuracy')
ax.set_ylim(0.75,1.00)
plt.title('HW3 1.2 Accuracy versus number of hidden nodes (with 100 epochs)')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.show()
plt.clf()
plt.close(fig)
In [19]:
nn = Network([196,30,10])
nn.SGD_train(train, epochs=100, eta=0.01, lam=0.001, verbose=True, test=test)
Out[19]:
In [20]:
epoch_results = nn.get_epoch_results()
x_epochs = []
y_test_acc = []
y_train_acc = []
for x, y1, y2 in epoch_results:
x_epochs.append(x)
y_train_acc.append(y1)
y_test_acc.append(y2)
y30_test = list(y_test_acc)
y30_train = list(y_train_acc)
In [21]:
nn = Network([196,20,10])
nn.SGD_train(train, epochs=100, eta=0.01, lam=0.001, verbose=True, test=test)
Out[21]:
In [22]:
epoch_results = nn.get_epoch_results()
x_epochs = []
y_test_acc = []
y_train_acc = []
for x, y1, y2 in epoch_results:
x_epochs.append(x)
y_train_acc.append(y1)
y_test_acc.append(y2)
y20_test = y_test_acc
y20_train = y_train_acc
In [23]:
plt.clf()
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
sns.set_style("whitegrid")
x = x_epochs
line1, = ax.plot(x, y30_train , lw=2, label=['Train Acc - 30 nodes'])
line2, = ax.plot(x, y30_test , lw=2, label=['Test Acc - 30 nodes'])
line4, = ax.plot(x, y20_train , lw=2, label=['Train Acc - 20 nodes'])
line5, = ax.plot(x, y20_test , lw=2, label=['Test Acc - 20 nodes'])
ax.set_ylim(0,1.00)
ax.set_xlabel('Number of Epochs')
ax.set_ylabel('Accuracy')
plt.title('HW3 1.2 Accuracy versus epochs with 20 and 30 hidden nodes')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.show()
plt.clf()
plt.close(fig)
In [25]:
results = pd.read_csv('rnn_tests.csv')
In [29]:
print results
Out[29]:
In [ ]: