In [1]:
import theano
from theano import tensor as T
import numpy as np
from load import mnist

In [2]:
def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

def model(X, w):
    return T.nnet.softmax(T.dot(X, w))

In [8]:
trX, teX, trY, teY = mnist(onehot=True)

In [16]:
X = T.fmatrix()
Y = T.fmatrix()

w = init_weights((784, 10))

In [21]:
w.get_value().shape


Out[21]:
(784, 10)

In [26]:
py_x = model(X, w)
y_pred = T.argmax(py_x, axis=1)

In [27]:
cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
gradient = T.grad(cost=cost, wrt=w)
update = [[w, w - gradient * 0.05]]

In [28]:
train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)

In [30]:
for i in range(10):
    # batch : 128
    for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
        cost = train(trX[start:end], trY[start:end])
    print i, np.mean(np.argmax(teY, axis=1) == predict(teX))


0 0.9159
1 0.9167
2 0.9167
3 0.9168
4 0.9174
5 0.918
6 0.9187
7 0.9187
8 0.9192
9 0.9194

In [41]:
print np.argmax(teY, axis=1)
print predict(teX)
print np.argmax(teY, axis=1) == predict(teX)
print np.mean(np.argmax(teY, axis=1) == predict(teX))


[7 2 1 ..., 4 5 6]
[7 2 1 ..., 4 5 6]
[ True  True  True ...,  True  True  True]
0.9194