In [1]:
import theano
from theano import tensor as T
import numpy as np
from load import mnist
In [2]:
def floatX(X):
return np.asarray(X, dtype=theano.config.floatX)
def init_weights(shape):
return theano.shared(floatX(np.random.randn(*shape) * 0.01))
def model(X, w):
return T.nnet.softmax(T.dot(X, w))
In [8]:
trX, teX, trY, teY = mnist(onehot=True)
In [16]:
X = T.fmatrix()
Y = T.fmatrix()
w = init_weights((784, 10))
In [21]:
w.get_value().shape
Out[21]:
In [26]:
py_x = model(X, w)
y_pred = T.argmax(py_x, axis=1)
In [27]:
cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
gradient = T.grad(cost=cost, wrt=w)
update = [[w, w - gradient * 0.05]]
In [28]:
train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)
In [30]:
for i in range(10):
# batch : 128
for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
cost = train(trX[start:end], trY[start:end])
print i, np.mean(np.argmax(teY, axis=1) == predict(teX))
In [41]:
print np.argmax(teY, axis=1)
print predict(teX)
print np.argmax(teY, axis=1) == predict(teX)
print np.mean(np.argmax(teY, axis=1) == predict(teX))