In [87]:
%matplotlib inline

import theano
import cPickle
import numpy as np
from theano import tensor, function
from pylab import imshow
from pylab import rcParams

from agnez.weight import grid2d
from fuel.datasets import MNIST
from fuel.streams import DataStream
from fuel.transformers import Mapping, Flatten
from fuel.schemes import SequentialScheme
from blocks.bricks.cost import MisclassificationRate
from rnn import allrotations, _meanize

import seaborn as sns
sns.set_style('dark')
sns.set_palette('muted')
sns.set_context("notebook", font_scale=1.5,
                rc={"lines.linewidth": 2.5})

In [88]:
def autocorrentropy(X, sigma=1):
    b, t, d = X.shape
    V = np.zeros((b, t, d))
    nor = np.sqrt(2*np.pi)*sigma
    dem = 2*sigma**2
    for i in range(b):
        if i % 1000 == 0:
            print i
        for j in range(t):
            V[i, j, :] = np.exp((-(X[i, :(t-j), :]-X[i, j:, :])**2) / dem).sum(axis=0) / (t-j)
            #V[i, j, :] = ((X[i, :(t-j), :]-X[i, j:, :])**2).sum(axis=0)
    return V

Load models and data


In [89]:
batch_size = 128
n_steps = 20
mnist_train = MNIST("train", subset=slice(0, 50000))
mnist_valid = MNIST("train", subset=slice(50000, 60000))
mnist_test = MNIST("test")
trainstream = Mapping(Flatten(DataStream(mnist_train,
                          iteration_scheme=SequentialScheme(50000, batch_size))),
                          _meanize(n_steps))
validstream = Mapping(Flatten(DataStream(mnist_valid,
                                             iteration_scheme=SequentialScheme(10000,
                                                                               batch_size))),
                          _meanize(n_steps))
teststream = Mapping(Flatten(DataStream(mnist_test,
                                            iteration_scheme=SequentialScheme(10000,
                                                                              batch_size))),
                         _meanize(n_steps))

In [90]:
handle = mnist_train.open()
_, labels_t = mnist_train.get_data(handle, slice(0, 50000))
handle = mnist_test.open()
_, labels = mnist_test.get_data(handle, slice(0, 10000))

In [91]:
model_name = 'best_mnist.pkl.pkl'
i2h, h2o, rec1 = cPickle.load(file(model_name, 'r'))

Visualize weights


In [92]:
rcParams['figure.figsize'] = 5,5
W = i2h.linear_transformations[0].W.get_value()
I = grid2d(W.T[:100])
imshow(I, cmap='gray')
W.shape


Out[92]:
(784, 100)

Reconstructions


In [93]:
X = tensor.tensor3('features')
Z0 = i2h.apply(X)
Z1 = rec1.apply(Z0)
Y = h2o.apply(Z1)
F = function([X], Y)
E = function([X], tensor.sqr(X-Y).sum())

total_error = 0
total_energy = 0
for x_val in teststream.get_epoch_iterator():
    total_error += E(x_val[0])
    total_energy += (x_val[0]**2).sum()

In [94]:
from agnez.inputs import image_sequence
rcParams['figure.figsize'] = 10, 5
y = F(x_val[0])
rec = image_sequence(y[:,1,:], (28,28))
imshow(rec, cmap='gray')

print "SNR (DB) = %f" % (10*np.log10(total_error))


SNR (DB) = 69.931745

In [95]:
rec = image_sequence(x_val[0][:,1,:], (28,28))
imshow(rec, cmap='gray')


Out[95]:
<matplotlib.image.AxesImage at 0x7fc78a51f510>

2D Embedding


In [34]:
zt = np.zeros((5e4, 1e2))
G = theano.function([X], Z1[-1])
for i, b in enumerate(trainstream.get_epoch_iterator()):
    first = i*batch_size
    last = (i+1)*batch_size
    zt[first:last] = G(b[0])

In [37]:
z = np.zeros((1e4, 1e2))
G = theano.function([X], Z1[-1])
for i, b in enumerate(teststream.get_epoch_iterator()):
    first = i*batch_size
    last = (i+1)*batch_size
    z[first:last] = G(b[0])

In [52]:
from agnez import embedding2d, embedding2dplot
labels=labels.astype('int')
ebd, mtd = embedding2d(z)
_ = embedding2dplot(ebd, labels[:,0])


Autocorrentropy


In [67]:
G(b[0]).shape


Out[67]:
(20, 16, 100)

In [68]:
zt = np.zeros((5e4, 20, 1e2))
G = theano.function([X], Z1)
for i, b in enumerate(trainstream.get_epoch_iterator()):
    first = i*batch_size
    last = (i+1)*batch_size
    zt[first:last] = G(b[0]).transpose(1, 0, 2)

In [70]:
z = np.zeros((1e4, 20, 1e2))
for i, b in enumerate(teststream.get_epoch_iterator()):
    first = i*batch_size
    last = (i+1)*batch_size
    z[first:last] = G(b[0]).transpose(1, 0, 2)

In [74]:
z.shape


Out[74]:
(10000, 20, 100)

In [83]:
from agnez import embedding2d, embedding2dplot
from sklearn.manifold.t_sne import TSNE
tsne = TSNE()
labels=labels.astype('int')
ebd, mtd = embedding2d(z.reshape((-1, 2000)))
_ = embedding2dplot(ebd, labels[:,0])



In [84]:
aczt = autocorrentropy(zt)

In [76]:
acz  = autocorrentropy(z)

In [78]:
acz.shape


Out[78]:
(10000, 20, 100)

In [81]:
from sklearn.manifold.t_sne import TSNE
tsne = TSNE()
ebd, mtd = embedding2d(acz.reshape((-1, 2000)), method=tsne)
_ = embedding2dplot(ebd, labels[:,0])


Classification


In [86]:
# from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier as KNC
print "Last state"
knc = KNC(1)
knc.fit(aczt.reshape((-1, 2000)), labels_t).score(acz.reshape((-1, 2000)), labels)


Last state
-c:5: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
Out[86]:
0.91210000000000002

In [ ]:
print "Original Data"
knc = KNC(1)
knc.fit(, labels_t).score(z, labels)

In [ ]:
TT = tensor.matrix('targets')
probs = clf.apply(Z)
clf_error_rate = MisclassificationRate().apply(TT.flatten(), probs)
CLF = theano.function([X,TT], clf_error_rate)

error_rate = 0
for x_val in teststream.get_epoch_iterator():
    error_rate += CLF(x_val[0], x_val[1])
    
print "total error rate = %f" % (error_rate/100)

In [ ]:
y.shape

In [ ]:
print y.mean()
print y.var()

In [ ]: