In [30]:
import theano
from theano import tensor as T
import numpy as np
from load import mnist
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
from theano.tensor.nnet.conv import conv2d
from theano.tensor.signal.downsample import max_pool_2d


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-30-406ec2352052> in <module>()
      6 from theano.tensor.nnet.conv import conv2d
      7 from theano.tensor.signal.downsample import max_pool_2d
----> 8 import theano.config as config
      9 print config.floatX

ImportError: No module named config

In [2]:
srng = RandomStreams()

In [3]:
def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

In [4]:
def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

In [5]:
def rectify(X):
    return T.maximum(X, 0.)

In [6]:
def softmax(X):
    e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
    return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')

In [7]:
def dropout(X, p=0.):
    if p > 0:
        retain_prob = 1 - p
        X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
        X /= retain_prob
    return X

In [8]:
def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates

In [11]:
def model(X, w, w2, w3, w4, p_drop_conv, p_drop_hidden):
    # Block of computation
    # activate -> pool -> noise
    l1a1 = conv2d(X, w, border_mode='full')
    l1a = rectify(l1a1)
    l1 = max_pool_2d(l1a, (2, 2))
    l1 = dropout(l1, p_drop_conv)

    l2a = rectify(conv2d(l1, w2))
    l2 = max_pool_2d(l2a, (2, 2))
    l2 = dropout(l2, p_drop_conv)

    l3a = rectify(conv2d(l2, w3))
    l3b = max_pool_2d(l3a, (2, 2))
    l3 = T.flatten(l3b, outdim=2) # convert from 4tensor to normal matrix
    l3 = dropout(l3, p_drop_conv)

    l4 = rectify(T.dot(l3, w4))
    l4 = dropout(l4, p_drop_hidden)

    pyx = softmax(T.dot(l4, w_o))
    return l1, l2, l3, l4, pyx

In [12]:
train_x, test_x, train_y, test_y = mnist(onehot=True)

In [33]:
train_x = train_x.reshape(-1, 1, 28, 28) # Reshape into conv 4tensor(b, c, 0, 1) format
test_x = test_x.reshape(-1, 1, 28, 28)

print train_x.dtype
# help(conv2d)


float64

In [36]:
X = T.dtensor4() # 4tensor instead of matrix
Y = T.dmatrix()
#help(T)
print X.dtype
print Y.dtype


float64
float64

In [37]:
w = init_weights((32, 1, 3, 3)) # Conv weights(# kernels, # channels, kernel width, kernel height)
w2 = init_weights((64, 32, 3, 3))
w3 = init_weights((128, 64, 3, 3))
w4 = init_weights((128 * 3 * 3, 625))# highest conv layer has 128 filters and 3x3 grid of responses
w_o = init_weights((625, 10))
print "w:", w
print "w dtype:", w.dtype
print "X:", X
print "X dtype:", X.dtype
print floatX(np.random.randn(*(32, 1, 3, 3)) * 0.01).dtype
#print floatX(train_x.dtype)


w: <TensorType(float64, 4D)>
w dtype: float64
X: <TensorType(float64, 4D)>
X dtype: float64
float64

In [38]:
# Compile two versions:
# Noise during training 20% dropout for conv layers, 50% for output.
noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, w, w2, w3, w4, 0.2, 0.5)
# no noise for prediction
l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, 0., 0.)
y_x = T.argmax(py_x, axis=1)


c:\Users\fch80_000\Anaconda\lib\site-packages\theano\sandbox\rng_mrg.py:768: UserWarning: MRG_RandomStreams Can't determine #streams from size (Shape.0), guessing 60*256
  nstreams = self.n_streams(size)

In [39]:
cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
params = [w, w2, w3, w4, w_o]
updates = RMSprop(cost, params, lr=0.001)

In [40]:
train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)


c:\Users\fch80_000\Anaconda\lib\site-packages\theano\tensor\subtensor.py:110: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.
  start in [None, 0] or
c:\Users\fch80_000\Anaconda\lib\site-packages\theano\tensor\subtensor.py:114: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.
  stop in [None, length, maxsize] or

In [43]:
for i in range(10):
    for start, end in zip(range(0, len(train_x), 128), range(128, len(train_x), 128)):
        cost = train(train_x[start:end], train_y[start:end])
    print np.mean(np.argmax(test_y, axis=1) == predict(test_x))


0.9244
0.9701
0.983
0.9875
0.9894
0.9881
0.9908
0.9913
0.9918
0.9912

In [44]:
for i in range(10):
    for start, end in zip(range(0, len(train_x), 128), range(128, len(train_x), 128)):
        cost = train(train_x[start:end], train_y[start:end])
    print np.mean(np.argmax(test_y, axis=1) == predict(test_x))


0.9917
0.9913
0.9925
0.9922
0.993
0.9928
0.9936
0.9931
0.992
0.9928

In [45]:
for i in range(10):
    for start, end in zip(range(0, len(train_x), 128), range(128, len(train_x), 128)):
        cost = train(train_x[start:end], train_y[start:end])
    print np.mean(np.argmax(test_y, axis=1) == predict(test_x))


0.9936
0.994
0.9943
0.9939
0.9936
0.9938
0.9935
0.9942
0.9939
0.994

In [ ]: