In [1]:
import theano
from theano import tensor as T
import numpy as np
from load import mnist # mnist function from load.py
# using unzipped files from http://yann.lecun.com/exdb/mnist/

In [ ]:


In [2]:
def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

In [3]:
def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

In [10]:
def sgd(cost, params, lr=0.05):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        updates.append([p, p - g * lr])
    return updates

In [15]:
def model(X, w_h, w_o):
    h = T.nnet.sigmoid(T.dot(X,w_h)) # Activation (sigmoid) function on hidden layer
    pyx = T.nnet.softmax(T.dot(h, w_o)) # Softmax output function on output layer
    return pyx

In [16]:
train_x, test_x, train_y, test_y = mnist(onehot=True)

In [17]:
X = T.fmatrix()
Y = T.fmatrix()

In [18]:
## Initialize random weights for hidden layer with 784 inputs from input layer
## and 625 outputs to hidden layer.
w_h = init_weights((784, 625))

## Initialize random weight for 625 hidden units to 10 output units.
w_o = init_weights((625, 10))

In [19]:
py_x = model(X, w_h, w_o)
## Returns which column (digit) has the highest predicted probability for each row(training example)
y_x = T.argmax(py_x, axis=1)

In [20]:
cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
params = [w_h, w_o]
updates = sgd(cost, params)

In [21]:
train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)

In [24]:
def run_model(iterations=100):
    for i in range(iterations):
        for start, end in zip(range(0, len(train_x), 128), range(128, len(train_x), 128)):
            cost = train(train_x[start:end], train_y[start:end])
        print np.mean(np.argmax(test_y, axis=1) == predict(test_x))

run_model(10)


0.9132
0.914
0.9151
0.9162
0.9168
0.9179
0.9184
0.9187
0.9194
0.9197

In [ ]: