In [ ]:
from progressbar import AnimatedMarker, Bar, BouncingBar, Counter, ETA, \
FileTransferSpeed, FormatLabel, Percentage, \
ProgressBar, ReverseBar, RotatingMarker, \
SimpleProgress, Timer, AdaptiveETA, AdaptiveTransferSpeed
examples = []
import sys
import time
In [ ]:
%matplotlib inline
from numpy.fft import fft
import theano
import cPickle
import numpy as np
from theano import tensor, function
from pylab import imshow
from pylab import rcParams
from IPython.display import Image
from agnez.weight import grid2d
from fuel.datasets import MNIST
from fuel.streams import DataStream
from fuel.transformers import Mapping, Flatten
from fuel.schemes import SequentialScheme
from blocks.bricks.cost import MisclassificationRate
from blocks_contrib.probability import Pserver
from blocks_contrib.datasets import rotated_dataset, Meanizer
from sklearn.manifold.t_sne import TSNE
from sklearn.linear_model import LogisticRegression
from agnez import embedding2d, embedding2dplot, video_grid
import seaborn as sns
sns.set_style('dark')
sns.set_palette('muted')
sns.set_context("notebook", font_scale=1.5,
rc={"lines.linewidth": 2.5})
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier as KNC
In [ ]:
def _circshift(data):
b = data[0]
for i in range(data[0].shape[1]):
bi = data[0][:, i, :]
bi[10:] = 0
bi[:, 10:] = 0*bi[:, 10:]
n = np.random.randint(0, 20, 1)
b[:, i, :] = np.roll(bi, n, axis=0)
return (b, data[1])
In [ ]:
leng = 10000
batch_size=1000
mnist_test = MNIST("test") #, subset=slice(60000,leng+60000))
teststream = Mapping(Flatten(DataStream(mnist_test,
iteration_scheme=SequentialScheme(
leng,
batch_size))), rotated_dataset(20))
teststream = Mapping(teststream, _circshift)
mnist_train = MNIST("train", subset=slice(0,6*leng))
trainstream = Mapping(Flatten(DataStream(mnist_train,
iteration_scheme=SequentialScheme(
6*leng,
batch_size))), rotated_dataset(20))
trainstream = Mapping(trainstream, _circshift)
# meanizer = Meanizer(trainstream)
# trainstream = Mapping(trainstream, meanizer.meanless)
# teststream = Mapping(teststream, meanizer.meanless)
In [ ]:
def autocorrentropy(X, sigma=1):
b, t, d = X.shape
V = np.zeros((b, t))
nor = np.sqrt(2*np.pi)*sigma
dem = 2*sigma**2
for i in range(b):
for j in range(t):
V[i, j] = np.exp((-(X[i, :(t-j)]-X[i, j:])**2).sum() / dem / (t-j)) / nor
return V
def autocorrentropy2(X, sigma=1):
b, t, d = X.shape
V = np.zeros((b, t, d))
nor = np.sqrt(2*np.pi)*sigma
dem = 2*sigma**2
for i in range(b):
for j in range(t):
V[i, j, :] = np.exp((-(X[i, :(t-j), :]-X[i, j:, :])**2) / dem).sum(axis=0) / (t-j)
#V[i, j, :] = ((X[i, :(t-j), :]-X[i, j:, :])**2).sum(axis=0)
#V = V - V.mean(axis=(0,1))[None, None, :]
return V
def correlation(X, sigma=1):
b, t, d = X.shape
V = np.zeros((b, t, d))
nor = np.sqrt(2*np.pi)*sigma
dem = 2*sigma**2
for i in range(b):
for j in range(t):
a = X[i, :(t-j), :]
b = X[i, j:, :]
#ma = a.mean
#asq = (a**2).mean()
#mb = b.mean
#bsq = (b**2).mean()
V[i, j, :] = (a*b).sum(axis=0) / (t-j)
return V
def example(fn):
def wrapped(X):
try:
sys.stdout.write('Running: %s\n' % fn.__name__)
V = fn(X)
sys.stdout.write('\n')
except KeyboardInterrupt:
sys.stdout.write('\nSkipping example.\n\n')
return V
examples.append(wrapped)
return wrapped
@example
def crosscorrelation(X):
b, t, d = X.shape
V = np.zeros((b, b))
v1 = np.zeros(t)
v2 = np.zeros(t)
with ProgressBar(maxval=b) as progress:
for i in range(b):
progress.update(i)
for j in range(b):
for k in range(0):
v1[k] = ((X[i, :(t-k), :] - X[j, k:, :])**2 / (t-k)).sum()
v2[k] = ((X[i, k:, :] - X[j, :(t-k), :])**2 / (t-k)).sum()
V[i, j] = v1[0] #+v2[0]/2#(v1+v2).min() / 2
return V
@example
def crosscorrentropy(X, sigma=1):
b, t, d = X.shape
V = np.zeros((b, b))
v1 = np.zeros(t)
v2 = np.zeros(t)
nor = np.sqrt(2*np.pi)*sigma
dem = 2*sigma**2
with ProgressBar(maxval=b) as progress:
for i in range(b):
progress.update(i)
for j in range(b):
for k in range(t):
v1[k] = np.exp(-((X[i, :(t-k), :] - X[j, k:, :])**2 / (t-k)).sum() / dem) / nor
v2[k] = np.exp(-((X[i, k:, :] - X[j, :(t-k), :])**2 / (t-k)).sum() / dem) / nor
V[i, j] = (v1+v2).max() / 2
return V
In [ ]:
X = np.random.normal(0, 1, (100, 20, 784))
V = autocorrentropy2(X)
_ = plt.plot(V[1])
In [ ]:
V = crosscorrentropy(X)
print V.shape
In [ ]:
imshow(V)
In [8]:
# z = np.zeros((leng, 20))
x = np.zeros((leng, 20, 784))
y = np.zeros((leng, 20, 784))
labels = np.zeros(leng)
sigma=.1
for i,b in enumerate(teststream.get_epoch_iterator()):
print i
first = i*batch_size
last = (i+1)*batch_size
data = b[0].transpose(1, 0, 2)
# z[first:last] = autocorrentropy(data, sigma=sigma)
#x[first:last] = data
y[first:last] = autocorrentropy2(data, sigma=sigma) #.reshape((100, -1))
#y[first:last] = correlation(data, sigma=sigma) #.reshape((100, -1))
labels[first:last] = b[1][:,0]
labels = labels.astype('int')
In [ ]:
# zt = np.zeros((4*leng, 20))
# txt = np.zeros((6*leng, 20, 784))
yt = np.zeros((6*leng, 20, 784))
labelst = np.zeros(6*leng)
for i,b in enumerate(trainstream.get_epoch_iterator()):
print i
first = i*batch_size
last = (i+1)*batch_size
data = b[0].transpose(1, 0, 2)
# zt[first:last] = autocorrentropy(data, sigma=sigma)
# xt[first:last] = data.reshape((100, -1))
yt[first:last] = autocorrentropy2(data, sigma=sigma) # .reshape((100, -1))
# yt[first:last] = correlation(data, sigma=sigma) # .reshape((100, -1))
labelst[first:last] = b[1][:,0]
labelst = labelst.astype('int')
In [ ]:
print x.shape
_ = video_grid(x[:100].transpose(1, 0, 2))
In [ ]:
x[0].shape
In [ ]:
handle = mnist_test.open()
x = mnist_test.get_data(handle, slice(0, 100))
I = grid2d(y[:100, 0, :])
I2 = grid2d(x[0].reshape((100, 784)))
plt.subplot(121)
plt.imshow(I)
plt.subplot(122)
plt.imshow(I2)
In [ ]:
plt.imshow(I)
In [ ]:
y.shape
In [ ]:
# clf1 = KNC(n_neighbors=1).fit(zt, labelst)
lag = 1 # np.arange(20)
clf2 = KNC(n_neighbors=1).fit(yt[:50000,lag,:].reshape((5*leng, -1)), labelst[:5*leng])
print clf2.score(yt[50000:60000, lag, :].reshape((leng, -1)), labelst[50000:60000])
# clf3 = KNC(n_neighbors=1).fit(xt, labelst)
In [ ]:
def plot_confusion_matrix(cm, labels, title='Confusion matrix', cmap=plt.cm.Blues):
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(labels))
plt.xticks(tick_marks, labels, rotation=45)
plt.yticks(tick_marks, labels)
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
In [ ]:
for lag in range(20):
print "lag: %d" % lag
lagvec = lag
#clf2 = KNC(n_neighbors=1).fit(yt[:,lagvec,:].reshape((6*leng, -1)), labelst)
clf2 = LogisticRegression().fit(yt[:, lagvec, :], labelst)
print clf2.score(y[:, lagvec, :].reshape((leng, -1)), labels)
# clf3 = KNC(n_neighbors=1).fit(xt, labelst)
In [ ]:
from sklearn.metrics import confusion_matrix
l_pred = clf2.predict(y.reshape((leng, -1)))
cm = confusion_matrix(labels, l_pred)
plot_confusion_matrix(cm, np.arange(10))
In [ ]:
print clf1.score(z, labels)
print clf2.score(y, labels)
print clf3.score(x, labels)
In [ ]:
plt.plot(z[:10].T)
In [ ]:
tsne = TSNE(perplexity=40)
ebd, tsne = embedding2d(z, method=tsne)
_ = embedding2dplot(ebd, labels)
In [ ]:
tsne = TSNE(perplexity=40)
ebd, tsne = embedding2d(y[:,0,:], method=tsne)
_ = embedding2dplot(ebd, labels)
In [ ]:
tsne = TSNE(perplexity=40)
ebd, tsne = embedding2d(x, method=tsne)
_ = embedding2dplot(ebd, labels)
In [ ]:
from sklearn.manifold.t_sne import TSNE
from agnez import embedding2d, embedding2dplot
In [ ]:
x = np.zeros((leng, 20, 784))
labels = np.zeros(leng)
for i,b in enumerate(teststream.get_epoch_iterator()):
first = i*100
last = (i+1)*100
data = b[0].transpose(1, 0, 2)
x[first:last] = data
labels[first:last] = b[1][:,0]
print x.shape
labels = labels.astype('int')
In [ ]:
V = crosscorrentropy(x)
In [ ]:
tsne = TSNE(perplexity=40, metric='precomputed')
ebd, tsne = embedding2d(V, method=tsne)
_ = embedding2dplot(ebd, labels)
In [ ]:
V = crosscorrelation(x)
In [ ]:
def V(shape, ksize=.5):
def func(x, y, **kwargs):
#N = len(x)
#return ((x-y)**2).sum()
#return (x**2).sum() + (y**2).sum() -2*(x*y).sum()
t = shape[0]
N = shape[1]
x = x.reshape(shape)
y = y.reshape(shape)
V1 = np.zeros(t)
V2 = np.zeros(t)
for i in range(t):
V1[i] = np.exp(-ksize*(x[:(t-i)] - y[i:])**2).sum() / (t-i)
V2[i] = np.exp(-ksize*(x[i:] - y[:(t-i)])**2).sum() / (t-i)
return N - (.5*(V1+V2)).max()
return func
def S(shape, ksize=.5):
def func(x, y, **kwargs):
#N = len(x)
#return ((x-y)**2).sum()
#return (x**2).sum() + (y**2).sum() -2*(x*y).sum()
t = shape[0]
N = shape[1]
x = x.reshape(shape)
y = y.reshape(shape)
X = fft(x, axis=0)
Y = fft(y, axis=0)
# F = ((abs(X)-abs(Y))**2).sum()
F = abs(X * np.conj(Y)).sum()
D = abs(X).sum() * abs(Y).sum()
return 1 - F / D
return func
data = x.reshape((leng, -1))
# V = ((data[:, None, :] - data)**2).sum(axis=-1)
tsne = TSNE(perplexity=40, metric=V((20,784)), verbose=True)
ebd = tsne.fit_transform(data)
cPickle.dump(ebd, file('ebd.pkl','w'))
_ = embedding2dplot(ebd, labels)
In [ ]:
ebd = cPickle.load(file('ebd.pkl', 'r'))
_ = embedding2dplot(ebd, labels)
In [ ]:
from agnez import image_sequence
for b in teststream.get_epoch_iterator():
break
x = b[0]
A1 = image_sequence(x[:,0,:], (28, 28))
A2 = image_sequence(x[:,1,:], (28, 28))
A3 = image_sequence(x[:,2,:], (28, 28))
A = np.vstack([A1, A2, A3])
f = plt.figure(figsize=(10,10))
imshow(A)
In [ ]:
x.shape
In [ ]: