In [ ]:
from progressbar import AnimatedMarker, Bar, BouncingBar, Counter, ETA, \
    FileTransferSpeed, FormatLabel, Percentage, \
    ProgressBar, ReverseBar, RotatingMarker, \
    SimpleProgress, Timer, AdaptiveETA, AdaptiveTransferSpeed

examples = []

import sys
import time

In [ ]:
%matplotlib inline

from numpy.fft import fft
import theano
import cPickle
import numpy as np
from theano import tensor, function
from pylab import imshow
from pylab import rcParams

from IPython.display import Image
from agnez.weight import grid2d
from fuel.datasets import MNIST
from fuel.streams import DataStream
from fuel.transformers import Mapping, Flatten
from fuel.schemes import SequentialScheme
from blocks.bricks.cost import MisclassificationRate

from blocks_contrib.probability import Pserver
from blocks_contrib.datasets import rotated_dataset, Meanizer
from sklearn.manifold.t_sne import TSNE
from sklearn.linear_model import LogisticRegression

from agnez import embedding2d, embedding2dplot, video_grid

import seaborn as sns
sns.set_style('dark')
sns.set_palette('muted')
sns.set_context("notebook", font_scale=1.5,
                rc={"lines.linewidth": 2.5})
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier as KNC

In [ ]:
def _circshift(data):
    b = data[0]
    for i in range(data[0].shape[1]):
        bi = data[0][:, i, :]
        bi[10:] = 0
        bi[:, 10:] = 0*bi[:, 10:]
        n = np.random.randint(0, 20, 1)
        b[:, i, :] = np.roll(bi, n, axis=0)
    return (b, data[1])

In [ ]:
leng = 10000
    batch_size=1000
    mnist_test = MNIST("test") #, subset=slice(60000,leng+60000))
    teststream = Mapping(Flatten(DataStream(mnist_test,
                             iteration_scheme=SequentialScheme(
                                 leng,
                                 batch_size))), rotated_dataset(20))
    teststream = Mapping(teststream, _circshift)
    
    mnist_train = MNIST("train", subset=slice(0,6*leng))
    trainstream = Mapping(Flatten(DataStream(mnist_train,
                             iteration_scheme=SequentialScheme(
                                 6*leng,
                                 batch_size))), rotated_dataset(20))
    trainstream = Mapping(trainstream, _circshift)
    # meanizer = Meanizer(trainstream)
    # trainstream = Mapping(trainstream, meanizer.meanless)
    # teststream = Mapping(teststream, meanizer.meanless)

Load models and data


In [ ]:
def autocorrentropy(X, sigma=1):
    b, t, d = X.shape
    V = np.zeros((b, t))
    nor = np.sqrt(2*np.pi)*sigma
    dem = 2*sigma**2
    for i in range(b):
        for j in range(t):
            V[i, j] = np.exp((-(X[i, :(t-j)]-X[i, j:])**2).sum() / dem / (t-j)) / nor
    return V

def autocorrentropy2(X, sigma=1):
    b, t, d = X.shape
    V = np.zeros((b, t, d))
    nor = np.sqrt(2*np.pi)*sigma
    dem = 2*sigma**2
    for i in range(b):
        for j in range(t):
            V[i, j, :] = np.exp((-(X[i, :(t-j), :]-X[i, j:, :])**2) / dem).sum(axis=0) / (t-j)
            #V[i, j, :] = ((X[i, :(t-j), :]-X[i, j:, :])**2).sum(axis=0)
    #V = V - V.mean(axis=(0,1))[None, None, :]
    return V

def correlation(X, sigma=1):
    b, t, d = X.shape
    V = np.zeros((b, t, d))
    nor = np.sqrt(2*np.pi)*sigma
    dem = 2*sigma**2
    for i in range(b):
        for j in range(t):
            a = X[i, :(t-j), :]
            b = X[i, j:, :]
            #ma = a.mean
            #asq = (a**2).mean()
            #mb = b.mean
            #bsq = (b**2).mean()
            V[i, j, :] = (a*b).sum(axis=0) / (t-j)
    return V

def example(fn):
    def wrapped(X):
        try:
            sys.stdout.write('Running: %s\n' % fn.__name__)
            V = fn(X)
            sys.stdout.write('\n')
        except KeyboardInterrupt:
            sys.stdout.write('\nSkipping example.\n\n')
        return V

    examples.append(wrapped)
    return wrapped

@example
def crosscorrelation(X):
    b, t, d = X.shape
    V = np.zeros((b, b))
    v1 = np.zeros(t)
    v2 = np.zeros(t)
    with ProgressBar(maxval=b) as progress:
        for i in range(b):
            progress.update(i)
            for j in range(b):
                for k in range(0):
                    v1[k] = ((X[i, :(t-k), :] - X[j, k:, :])**2 / (t-k)).sum()
                    v2[k] = ((X[i, k:, :] - X[j, :(t-k), :])**2 / (t-k)).sum()
                V[i, j] = v1[0] #+v2[0]/2#(v1+v2).min() / 2
    return V

@example
def crosscorrentropy(X, sigma=1):
    b, t, d = X.shape
    V = np.zeros((b, b))
    v1 = np.zeros(t)
    v2 = np.zeros(t)
    nor = np.sqrt(2*np.pi)*sigma
    dem = 2*sigma**2
    with ProgressBar(maxval=b) as progress:
        for i in range(b):
            progress.update(i)
            for j in range(b):
                for k in range(t):
                    v1[k] = np.exp(-((X[i, :(t-k), :] - X[j, k:, :])**2 / (t-k)).sum() / dem) / nor
                    v2[k] = np.exp(-((X[i, k:, :] - X[j, :(t-k), :])**2 / (t-k)).sum() / dem) / nor
                V[i, j] = (v1+v2).max() / 2
    return V

In [ ]:
X = np.random.normal(0, 1, (100, 20, 784))
V = autocorrentropy2(X)
_ = plt.plot(V[1])

In [ ]:
V = crosscorrentropy(X)
print V.shape

In [ ]:
imshow(V)

In [8]:
# z = np.zeros((leng, 20))
x = np.zeros((leng, 20, 784))
y = np.zeros((leng, 20, 784))

labels = np.zeros(leng)

sigma=.1
for i,b in enumerate(teststream.get_epoch_iterator()):
    print i
    first = i*batch_size
    last = (i+1)*batch_size
    data = b[0].transpose(1, 0, 2)
    # z[first:last] = autocorrentropy(data, sigma=sigma)
    #x[first:last] = data
    y[first:last] = autocorrentropy2(data, sigma=sigma) #.reshape((100, -1))
    #y[first:last] = correlation(data, sigma=sigma) #.reshape((100, -1))
    labels[first:last] = b[1][:,0]
    
labels = labels.astype('int')


0
1
2
3
4
5
6
7
8
9

In [ ]:
# zt = np.zeros((4*leng, 20))
# txt = np.zeros((6*leng, 20, 784))
yt = np.zeros((6*leng, 20, 784))

labelst = np.zeros(6*leng)

for i,b in enumerate(trainstream.get_epoch_iterator()):
    print i
    first = i*batch_size
    last = (i+1)*batch_size
    data = b[0].transpose(1, 0, 2)
    # zt[first:last] = autocorrentropy(data, sigma=sigma)
    # xt[first:last] = data.reshape((100, -1))
    yt[first:last] = autocorrentropy2(data, sigma=sigma) # .reshape((100, -1))
    # yt[first:last] = correlation(data, sigma=sigma) # .reshape((100, -1))
    labelst[first:last] = b[1][:,0]
    
labelst = labelst.astype('int')


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

In [ ]:
print x.shape
_ = video_grid(x[:100].transpose(1, 0, 2))


In [ ]:
x[0].shape

In [ ]:
handle = mnist_test.open()
x = mnist_test.get_data(handle, slice(0, 100))

I = grid2d(y[:100, 0, :])
I2 = grid2d(x[0].reshape((100, 784)))
plt.subplot(121)
plt.imshow(I)
plt.subplot(122)
plt.imshow(I2)

In [ ]:
plt.imshow(I)

clf


In [ ]:
y.shape

In [ ]:
# clf1 = KNC(n_neighbors=1).fit(zt, labelst)
lag = 1 # np.arange(20)
clf2 = KNC(n_neighbors=1).fit(yt[:50000,lag,:].reshape((5*leng, -1)), labelst[:5*leng])
print clf2.score(yt[50000:60000, lag, :].reshape((leng, -1)), labelst[50000:60000])
# clf3 = KNC(n_neighbors=1).fit(xt, labelst)

In [ ]:
def plot_confusion_matrix(cm, labels, title='Confusion matrix', cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(labels))
    plt.xticks(tick_marks, labels, rotation=45)
    plt.yticks(tick_marks, labels)
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [ ]:
for lag in range(20):
    print "lag: %d" % lag
    lagvec = lag
    #clf2 = KNC(n_neighbors=1).fit(yt[:,lagvec,:].reshape((6*leng, -1)), labelst)
    clf2 = LogisticRegression().fit(yt[:, lagvec, :], labelst)
    print clf2.score(y[:, lagvec, :].reshape((leng, -1)), labels)
    # clf3 = KNC(n_neighbors=1).fit(xt, labelst)

In [ ]:
from sklearn.metrics import confusion_matrix
l_pred = clf2.predict(y.reshape((leng, -1)))
cm = confusion_matrix(labels, l_pred)
plot_confusion_matrix(cm, np.arange(10))

In [ ]:
print clf1.score(z, labels)
print clf2.score(y, labels)
print clf3.score(x, labels)

In [ ]:
plt.plot(z[:10].T)

In [ ]:
tsne = TSNE(perplexity=40)
ebd, tsne = embedding2d(z, method=tsne)
_ = embedding2dplot(ebd, labels)

In [ ]:
tsne = TSNE(perplexity=40)
ebd, tsne = embedding2d(y[:,0,:], method=tsne)
_ = embedding2dplot(ebd, labels)

In [ ]:
tsne = TSNE(perplexity=40)
ebd, tsne = embedding2d(x, method=tsne)
_ = embedding2dplot(ebd, labels)

Cross-correntropy


In [ ]:
from sklearn.manifold.t_sne import TSNE
from agnez import embedding2d, embedding2dplot

In [ ]:
x = np.zeros((leng, 20, 784))
labels = np.zeros(leng)
for i,b in enumerate(teststream.get_epoch_iterator()):
    first = i*100
    last = (i+1)*100
    data = b[0].transpose(1, 0, 2)
    x[first:last] = data
    labels[first:last] = b[1][:,0]
print x.shape
labels = labels.astype('int')

In [ ]:
V = crosscorrentropy(x)

In [ ]:
tsne = TSNE(perplexity=40, metric='precomputed')
ebd, tsne = embedding2d(V, method=tsne)
_ = embedding2dplot(ebd, labels)

In [ ]:
V = crosscorrelation(x)

In [ ]:
def V(shape, ksize=.5):
    def func(x, y, **kwargs):
        #N = len(x)
        #return ((x-y)**2).sum()
        #return (x**2).sum() + (y**2).sum() -2*(x*y).sum()
        t = shape[0]
        N = shape[1]
        x = x.reshape(shape)
        y = y.reshape(shape)
        V1 = np.zeros(t)
        V2 = np.zeros(t)
        for i in range(t):
            V1[i] = np.exp(-ksize*(x[:(t-i)] - y[i:])**2).sum() / (t-i)
            V2[i] = np.exp(-ksize*(x[i:] - y[:(t-i)])**2).sum() / (t-i)
        return N - (.5*(V1+V2)).max()
    return func

def S(shape, ksize=.5):
    def func(x, y, **kwargs):
        #N = len(x)
        #return ((x-y)**2).sum()
        #return (x**2).sum() + (y**2).sum() -2*(x*y).sum()
        t = shape[0]
        N = shape[1]
        x = x.reshape(shape)
        y = y.reshape(shape)
        X = fft(x, axis=0)
        Y = fft(y, axis=0)
        # F = ((abs(X)-abs(Y))**2).sum()
        F = abs(X * np.conj(Y)).sum()
        D = abs(X).sum() * abs(Y).sum()
        return 1 - F / D
    return func

data = x.reshape((leng, -1))
# V = ((data[:, None, :] - data)**2).sum(axis=-1)

tsne = TSNE(perplexity=40, metric=V((20,784)), verbose=True)
ebd = tsne.fit_transform(data)
cPickle.dump(ebd, file('ebd.pkl','w'))
_ = embedding2dplot(ebd, labels)

In [ ]:
ebd = cPickle.load(file('ebd.pkl', 'r'))
_ = embedding2dplot(ebd, labels)

In [ ]:
from agnez import image_sequence
for b in teststream.get_epoch_iterator():
    break
x = b[0]
A1 = image_sequence(x[:,0,:], (28, 28))
A2 = image_sequence(x[:,1,:], (28, 28))
A3 = image_sequence(x[:,2,:], (28, 28))
A = np.vstack([A1, A2, A3])
f = plt.figure(figsize=(10,10))
imshow(A)

In [ ]:
x.shape

In [ ]: