In [1]:
# Import here
%matplotlib inline
import matplotlib.pylab as pylab
import matplotlib.pyplot as plt
import numpy as np
import theano
import theano.tensor as T
pylab.rcParams['figure.figsize'] = (9.0, 6.0)
In [17]:
n_steps = 100
x = T.fmatrix(name='x')
S = T.fmatrix(name='S')
W = T.fmatrix(name='W')
b_eff = T.dot(W, x)
def one_step(y, S, b_eff):
return T.dot(S, y) + b_eff
y_seq, updates = theano.scan(fn=one_step,
outputs_info=T.zeros_like(b_eff),
non_sequences=[S, b_eff],
n_steps=n_steps)
y=y_seq[-1]
cost = T.mean(T.sum((x - y) ** 2, axis=0, keepdims=True))
dS = T.grad(cost, S)
dW = T.grad(cost, W)
f = theano.function(inputs=[x, S, W],
outputs=[cost, dS, dW, y],
updates=updates)
In [22]:
dim_x = 64
dim_batch = 256
val_S = np.eye(dim_x, dtype=np.float32)
val_W = np.random.randn(dim_x, dim_x).astype(np.float32) / np.sqrt(dim_x) / n_steps
In [24]:
eta_W = 0.1 / dim_batch
eta_S = 0.1 / dim_batch / n_steps
for t in range(1000):
val_x = np.random.randn(dim_x, dim_batch).astype(np.float32)
val_cost, val_dS, val_dW, val_y = f(val_x, val_S, val_W)
if (t + 1) % 10 == 0:
print (t + 1), val_cost
val_S -= eta_S * val_dS
val_W -= eta_W * val_dW
In [25]:
print val_S
plt.imshow(val_S, interpolation='None')
Out[25]:
In [26]:
print val_W
plt.imshow(val_W, interpolation='None')
Out[26]:
In [ ]: