In [1]:
import theano
from theano import tensor as T
import numpy as np
from load import mnist
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
from theano.tensor.nnet.conv import conv2d
from theano.tensor.signal.downsample import max_pool_2d

In [2]:
srng = RandomStreams()

In [3]:
def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

In [4]:
def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

In [5]:
def rectify(X):
    return T.maximum(X, 0.)

In [6]:
def softmax(X):
    e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
    return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')

In [7]:
def dropout(X, p=0.):
    if p > 0:
        retain_prob = 1 - p
        X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
        X /= retain_prob
    return X

In [8]:
def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates

In [9]:
def model(X, w, w2, w3, w4, p_drop_conv, p_drop_hidden):
    # Block of computation
    # activate -> pool -> noise
    l1a = rectify(conv2d(X, w, border_mode='full'))
    l1 = max_pool_2d(l1a, (2, 2))
    l1 = dropout(l1, p_drop_conv)

    l2a = rectify(conv2d(l1, w2))
    l2 = max_pool_2d(l2a, (2, 2))
    l2 = dropout(l2, p_drop_conv)

    l3a = rectify(conv2d(l2, w3))
    l3b = max_pool_2d(l3a, (2, 2))
    l3 = T.flatten(l3b, outdim=2) # convert from 4tensor to normal matrix
    l3 = dropout(l3, p_drop_conv)

    l4 = rectify(T.dot(l3, w4))
    l4 = dropout(l4, p_drop_hidden)

    pyx = softmax(T.dot(l4, w_o))
    return l1, l2, l3, l4, pyx

In [10]:
train_x, test_x, train_y, test_y = mnist(onehot=True)

In [11]:
train_x = train_x.reshape(-1, 1, 28, 28) # Reshape into conv 4tensor(b, c, 0, 1) format
test_x = test_x.reshape(-1, 1, 28, 28)

In [12]:
X = T.ftensor4() # 4tensor instead of matrix
Y = T.fmatrix()

In [13]:
w = init_weights((32, 1, 3, 3)) # Conv weights(# kernels, # channels, kernel width, kernel height)
w2 = init_weights((64, 32, 3, 3))
w3 = init_weights((128, 64, 3, 3))
w4 = init_weights((128 * 3 * 3, 625))# highest conv layer has 128 filters and 3x3 grid of responses
w_o = init_weights((625, 10))

In [14]:
# Compile two versions:
# Noise during training 20% dropout for conv layers, 50% for output.
noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, w, w2, w3, w4, 0.2, 0.5)
# no noise for prediction
l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, 0., 0.)
y_x = T.argmax(py_x, axis=1)


---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
<ipython-input-14-3551484b01c6> in <module>()
----> 1 noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, w, w2, w3, w4, 0.2, 0.5)
      2 l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, 0., 0.)
      3 y_x = T.argmax(py_x, axis=1)

<ipython-input-9-2373e16a8cbe> in model(X, w, w2, w3, w4, p_drop_conv, p_drop_hidden)
      3     # Block of computation
      4     # activate -> pool -> noise
----> 5     l1a = rectify(conv2d(X, w, border_mode='full'))
      6     l1 = max_pool_2d(l1a, (2, 2))
      7     l1 = dropout(l1, p_drop_conv)

c:\Users\fch80_000\Anaconda\lib\site-packages\theano\tensor\nnet\conv.pyc in conv2d(input, filters, image_shape, filter_shape, border_mode, subsample, **kargs)
    146                 imshp=imshp, kshp=kshp, nkern=nkern, bsize=bsize, **kargs)
    147 
--> 148     return op(input, filters)
    149 
    150 

c:\Users\fch80_000\Anaconda\lib\site-packages\theano\gof\op.pyc in __call__(self, *inputs, **kwargs)
    397         """
    398         return_list = kwargs.pop('return_list', False)
--> 399         node = self.make_node(*inputs, **kwargs)
    400         if self.add_stack_trace_on_call:
    401             self.add_tag_trace(node)

c:\Users\fch80_000\Anaconda\lib\site-packages\theano\tensor\nnet\conv.pyc in make_node(self, inputs, kerns)
    604             raise NotImplementedError(
    605                 "The image and the kernel must have the same type."
--> 606                 "inputs(%s), kerns(%s)" % (_inputs.dtype, _kerns.dtype))
    607         if self.outshp is not None:
    608             bcastable23 = [self.outshp[0] == 1, self.outshp[1] == 1]

NotImplementedError: The image and the kernel must have the same type.inputs(float32), kerns(float64)

In [15]:
cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
params = [w, w2, w3, w4, w_o]
updates = RMSprop(cost, params, lr=0.001)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-3e427a66f519> in <module>()
----> 1 cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
      2 params = [w, w2, w3, w4, w_o]
      3 updates = RMSprop(cost, params, lr=0.001)

NameError: name 'noise_py_x' is not defined

In [16]:
train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-9ebf3bbb414f> in <module>()
----> 1 train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
      2 predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)

NameError: name 'cost' is not defined

In [15]: