Logistic regression is a method for learning a mapping between data and labels. Here we'll focus on the problem where each data sample has exactly one label. The data will be images of handwritten digits, the MNIST dataset, between 0 and 9.
The main goals for this will be to:
In [ ]:
# You'll need a few different libraries to get started
# Common python science libraries
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# Libraries for this project
import theano
import theano.tensor as T
from theano.compat.python2x import OrderedDict
from pylearn2.datasets import mnist
from utils import tile_raster_images as tri
In [ ]:
ds = mnist.MNIST('train')
X = ds.X
y = ds.y
print('Initial data shape: {}'.format(X.shape))
dim = X.shape[1]
classes = 10
Plot a few images:
In [ ]:
n = 100
side = int(np.sqrt(n))
im = tri(X[:n], (28, 28), (side, side), (2, 2))
plt.imshow(im, cmap='gray', interpolation='nearest')
plt.show()
for ii in range(side):
print(y[ii*side:(ii+1)*side].ravel())
Theano is a bit of an odd library if you've ever worked with numpy or matlab.
You first setup the symbolic computation you want to do. Then, you feed data into this computation to get the results you are interested. Normally, these two steps are mixed together.
To setup the symbolic computation we'll need to define the inputs to our computation. They'll come in two variants: inputs that just get used once and inputs that we'll want to keep track of and update.
In [ ]:
# First define inputs that get used once
X_sym = T.matrix('X')
y_sym = T.lvector('y')
# Then define the parameters which will be kept and updated (called shared variables)
w_0 = np.random.randn(28**2, 10).astype('float32')
W = theano.shared(w_0)
b = theano.shared(np.zeros(10).astype('float32'))
In [ ]:
# Create the network predictions
# T.nnet.softmax gives multinomial classification
y_hat =
# Calculate cost
# T.nnet.categorical_crossentropy is right for classification
cost =
# Accuracy can be computer with T.argmax
accuracy =
W_grad = T.grad(cost, W)
b_grad = T.grad(cost, b)
In [ ]:
eps = .001
updates = OrderedDict()
W_prime = W - eps * W_grad
b_prime = b - eps * b_grad
updates[W] = W_prime
updates[b] = b_prime
In [ ]:
f = theano.function(inputs=[X_sym, y_sym], outputs=[cost, accuracy])
f_updates = theano.function(inputs=[X_sym, y_sym], outputs=[cost, accuracy], updates=updates)
In [ ]:
batch_size = 100
n_batches = 100
for ii in range(n_batches):
idx = np.random.randint(low=0, high=X.shape[0]-batch_size)
X_batch = X[idx:idx+batch_size]
y_batch = y[idx:idx+batch_size]
c, a = f_updates(X_batch, y_batch)
In [ ]:
im = tri(w_0.T, (28, 28), (1, 10), (2, 2))
plt.imshow(im, cmap='gray', interpolation='nearest')
plt.title('Original weights')
plt.figure()
im = tri(W.get_value().T, (28, 28), (1, 10), (2, 2))
plt.imshow(im, cmap='gray', interpolation='nearest')
plt.title('Learned weights')
In [ ]: