In [1]:
# from https://triangleinequality.wordpress.com/2014/08/12/theano-autoencoders-and-mnist/

import numpy as np
import theano as th
from theano import tensor as T
from numpy import random as rng
 
class AutoEncoder(object):
    def __init__(self, X, hidden_size, activation_function,
                 output_function):
        #X is the data, an m x n numpy matrix
        #where rows correspond to datapoints
        #and columns correspond to features.
        assert type(X) is np.ndarray
        assert len(X.shape)==2
        self.X=X
        self.X=th.shared(name='X', value=np.asarray(self.X, 
                         dtype=th.config.floatX),borrow=True)
        #The config.floatX and borrow=True stuff is to get this to run
        #fast on the gpu. I recommend just doing this without thinking about
        #it until you understand the code as a whole, then learning more
        #about gpus and theano.
        self.n = X.shape[1]
        self.m = X.shape[0]
        #Hidden_size is the number of neurons in the hidden layer, an int.
        assert type(hidden_size) is int
        assert hidden_size > 0
        self.hidden_size=hidden_size
        initial_W = np.asarray(rng.uniform(
                 low=-4 * np.sqrt(6. / (self.hidden_size + self.n)),
                 high=4 * np.sqrt(6. / (self.hidden_size + self.n)),
                 size=(self.n, self.hidden_size)), dtype=th.config.floatX)
        self.W = th.shared(value=initial_W, name='W', borrow=True)
        self.b1 = th.shared(name='b1', value=np.zeros(shape=(self.hidden_size,),
                            dtype=th.config.floatX),borrow=True)
        self.b2 = th.shared(name='b2', value=np.zeros(shape=(self.n,),
                            dtype=th.config.floatX),borrow=True)
        self.activation_function=activation_function
        self.output_function=output_function
                     
    def train(self, n_epochs=100, mini_batch_size=1, learning_rate=0.1):
        index = T.lscalar()
        x=T.matrix('x')
        params = [self.W, self.b1, self.b2]
        hidden = self.activation_function(T.dot(x, self.W)+self.b1)
        output = T.dot(hidden,T.transpose(self.W))+self.b2
        output = self.output_function(output)
         
        #Use cross-entropy loss.
        L = -T.sum(x*T.log(output) + (1-x)*T.log(1-output), axis=1)
        cost=L.mean()       
        updates=[]
         
        #Return gradient with respect to W, b1, b2.
        gparams = T.grad(cost,params)
         
        #Create a list of 2 tuples for updates.
        for param, gparam in zip(params, gparams):
            updates.append((param, param-learning_rate*gparam))
         
        #Train given a mini-batch of the data.
        train = th.function(inputs=[index], outputs=[cost], updates=updates,
                            givens={x:self.X[index:index+mini_batch_size,:]})
                             
 
        import time
        start_time = time.clock()
        for epoch in xrange(n_epochs):
            print "Epoch:",epoch
            for row in xrange(0,self.m, mini_batch_size):
                train(row)
        end_time = time.clock()
        print "Average time per epoch=", (end_time-start_time)/n_epochs
                    
    def get_hidden(self,data):
        x=T.dmatrix('x')
        hidden = self.activation_function(T.dot(x,self.W)+self.b1)
        transformed_data = th.function(inputs=[x], outputs=[hidden])
        return transformed_data(data)
     
    def get_weights(self):
        return [self.W.get_value(), self.b1.get_value(), self.b2.get_value()]


Using gpu device 0: GeForce GT 750M

In [2]:
activation_function = T.nnet.sigmoid

In [4]:
output_function=activation_function

In [2]:
import cPickle, gzip, numpy

# Load the dataset
f = gzip.open('mnist.pkl.gz', 'rb')
train_set, valid_set, test_set = cPickle.load(f)
f.close()

In [3]:
from sklearn.decomposition import PCA

In [4]:
pca = PCA(32)

In [5]:
X = pca.fit_transform(train_set[0])

In [6]:
sum(pca.explained_variance_ratio_)


Out[6]:
0.74393093166872859

In [7]:
X.shape


Out[7]:
(50000, 32)

In [18]:
A = AutoEncoder(X, 2, activation_function, output_function)

In [19]:
A.train(20,20)


Epoch: 0
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9
Epoch: 10
Epoch: 11
Epoch: 12
Epoch: 13
Epoch: 14
Epoch: 15
Epoch: 16
Epoch: 17
Epoch: 18
Epoch: 19
Average time per epoch= 2.05545995

In [20]:
A.get_hidden(X)


Out[20]:
[array([[ 0.,  0.],
       [ 0.,  0.],
       [ 1.,  1.],
       ..., 
       [ 1.,  1.],
       [ 1.,  1.],
       [ 1.,  1.]])]

In [339]:
x = np.zeros(2)
type(x)


Out[339]:
numpy.ndarray

In [537]:
import numpy as np
import numpy.random as npr
import pylab as pl
pl.rcParams['font.family'] = 'serif'
%matplotlib inline
from sklearn.decomposition import PCA

dim = X.shape[1]
sigmoid = lambda x : 1.0 / (1.0 + np.exp(-x))
class MLAutoencoder():
    def __init__(self,layer_dims=[32,16,8,4,2,4,8,16,32],
                 activation=relu):
        self.activation=activation
        self.layer_dims = layer_dims
        self.W = []
        self.b = []
        for i in range(1,len(layer_dims)):
            self.W.append(npr.randn(layer_dims[i-1],layer_dims[i]))
            self.b.append(npr.randn())
        self.num_params = sum([np.prod(w.shape) for w in self.W]) + len(self.b)
        self.bottleneck = np.argmin(np.array(layer_dims))
        
    def mats_to_vec(self,W,b):
        w_vecs = []
        for w in W:
            w_vecs.append(np.reshape(w,np.prod(w.shape)))
        w_vecs.append(np.array(b))
        return np.hstack(w_vecs)
    
    def vec_to_mats(self,w_vecs):
        ind = 0
        W = []
        
        for i in range(len(self.W)):
            size = np.prod(self.W[i].shape)
            W.append(np.reshape(w_vecs[ind:ind+size],self.W[i].shape))
            ind += size
        bias = w_vecs[ind:]
        return W,bias
    
    def predict(self,X):
        def predict_one(x):
            L = x
            for i,w in enumerate(self.W):
                L = self.activation(L.dot(w) + self.b[i])
            return L
        
        if len(X.shape) > 1:
            y = np.zeros(X.shape)
            for i,x in enumerate(X):
                y[i] = predict_one(x)
        else:
            y = predict_one(X)
        return y

    def transform(self,X):
        def transform_one(x):
            L = x
            for i in range(self.bottleneck):
                L = self.activation(L.dot(self.W[i])+self.b[i])
            return L
        
        if len(X.shape) > 1:
            y = np.zeros((len(X),self.layer_dims[self.bottleneck]))
            for i,x in enumerate(X):
                y[i] = transform_one(x)
        else:
            y = transform_one(X)
        return y
        
    def loss(self,y,y_pred):
        return np.sum(abs(y-y_pred))
    
    def score(self,X):
        X_pred = self.predict(X)
        assert(X_pred.shape == X.shape)
        
        return sum([self.loss(pred,truth) for (pred,truth) in zip(X_pred, X)]) 
    
    def gradient(self,func,x0,h=0.001):
        x0 = np.array(x0)#,dtype=float)
        y = func(x0)
        deriv = np.zeros(len(x0))
        for i in range(len(x0)):
            x = np.array(x0)
            x[i] += h
            deriv[i] = (func(x) - y)/h
        return deriv
    
    def vec_score(self,w,X):
        self.W,self.b = self.vec_to_mats(w)
        return self.score(X)

    def train(self,X_,batch_size=20,epochs=10,
              learning_rate=0.1,cooling=False,
              record=False):
        
        def report(counter,epoch):
            status = "Epoch {1} loss: {0:.3f}".format(self.score(X_),epoch)
            print(status)
            X_r = self.transform(X_)
            pl.scatter(X_r[:,0],X_r[:,1],c=Y,linewidth=0)
            pl.xlim((X_.min(),X_.max()))
            pl.ylim((X_.min(),X_.max()))
            pl.title(status)
            pl.savefig('ae/{0}.jpg'.format(counter))
            pl.close()
            
            if X_.shape[1] == 3:
                X_r = self.predict(X_)
                fig = pl.figure()
                ax = Axes3D(fig)
                
                #pl.xlim((0,1))
                #pl.ylim((0,1))
                #ax.zlim((0,1))
                ax.scatter(X_r[:,0],X_r[:,1],X_r[:,2],c=Y)
                pl.savefig('ae/reconstruction-{0}.jpg'.format(counter))
                pl.close()
        
        n = len(X_) /batch_size
        w = self.mats_to_vec(self.W,self.b)
        
        X = X_.copy()
        
        counter = 0
        report(counter,0)
        counter += 1
        
        if cooling:
            start_cooling = 100
            delta = learning_rate / (epochs - start_cooling)
        
        if record:
            history = np.zeros((epochs*len(X)/(batch_size-1),self.num_params))
            #history = [w]
            history[0] = w
            recordi = 1
            
        momentum=np.zeros(len(w))
        for t in range(epochs):
            npr.shuffle(X)
            
            if cooling and t > start_cooling:
                #learning_rate -= delta
                learning_rate *= 0.95
            
            for i in range(n):
                w_prev = w
                batch_X = X[i*batch_size:(i+1)*batch_size]
                loss_func = lambda w : self.vec_score(w,batch_X)
                #w -= learning_rate * self.gradient(loss_func,w)
                w -= learning_rate*(momentum+0.001)*self.gradient(loss_func,w)
                momentum+=w - w_prev
                if record:
                    #history.append(w)
                    history[recordi] = w
                    recordi+=1
                    
            report(counter,t+1)
            counter +=1
        if record:
            return history[:recordi]

In [553]:
from sklearn import datasets
data = datasets.load_digits()
X = data.data
Y = data.target

pca = PCA(4,whiten=True)
X_ = pca.fit_transform(X)
X_-=X_.min()
X_/= X_.max()
#X_-=0.5
#X_*=2

X_.min(),X_.max()


Out[553]:
(0.0, 1.0)

In [532]:
X.shape


Out[532]:
(1797, 64)

In [ ]:


In [554]:
ae = MLAutoencoder(layer_dims=[4,3,2,3,4],activation=sigmoid)
ae.num_params


Out[554]:
40

In [546]:
%timeit ae.score(X_)


10 loops, best of 3: 57 ms per loop

In [555]:
ae.train(X_,epochs=20)


Epoch 0 loss: 1082.089
Epoch 1 loss: 1019.495
Epoch 2 loss: 979.967
Epoch 3 loss: 954.648
Epoch 4 loss: 939.090
Epoch 5 loss: 928.816
Epoch 6 loss: 922.127
Epoch 7 loss: 917.531
Epoch 8 loss: 914.223
Epoch 9 loss: 912.081
Epoch 10 loss: 910.428
Epoch 11 loss: 909.293
Epoch 12 loss: 908.380
Epoch 13 loss: 907.653
Epoch 14 loss: 907.001
Epoch 15 loss: 906.423
Epoch 16 loss: 905.896
Epoch 17 loss: 905.428
Epoch 18 loss: 904.981
Epoch 19 loss: 904.550
Epoch 20 loss: 904.131

In [556]:
embedding = ae.transform(X_)

In [557]:
pl.scatter(embedding[:,0],embedding[:,1],c=Y,linewidths=0)


Out[557]:
<matplotlib.collections.PathCollection at 0x142c01dd0>

In [489]:
ae.score(X_)


Out[489]:
39115.722832346662

In [315]:
ae.b


Out[315]:
array([ 0.61339082,  0.20203179, -0.82895431,  0.62841949, -0.04191147,
       -0.53694577, -1.72011834,  0.80377583, -0.2336573 , -0.0421919 ])

In [316]:
ae.W,ae.b = ae.vec_to_mats(npr.randn(ae.num_params))
ae.b


Out[316]:
array([ 1.69129522,  0.43654915,  0.17751614, -0.05158694,  0.51030797,
        0.48721361, -0.03444049, -0.03327278, -0.61448917,  0.08084339])

In [334]:
ae.num_params


Out[334]:
3206

In [342]:
%timeit ae.vec_score(npr.randn(ae.num_params),X_[npr.rand(len(X_))<0.1])


10 loops, best of 3: 19.1 ms per loop

In [350]:
params = npr.randn(ae.num_params)
scores = [ae.vec_score(npr.randn(ae.num_params),X_[npr.rand(len(X_))<0.5]) for _ in range(50)]

In [351]:
pl.hist(scores,bins=50);



In [336]:
ae_loss(npr.randn(ae.num_params))


10 loops, best of 3: 186 ms per loop

In [459]:
X_.shape[0]*0.1


Out[459]:
179.70000000000002

In [490]:
def loss(params,thinning=0.1):
    return ae.vec_score(params,X_[npr.rand(len(X_))<thinning])#[:len(X_)/2])

In [491]:
def min_line_loss(vec,loss=loss,alpha=np.arange(0.0,1.5,0.1)):
    return np.min([loss(vec*a) for a in alpha])

In [492]:
[min_line_loss(params) for i in range(10)]


Out[492]:
[752.45251831688313,
 782.72773892008115,
 807.97640797371764,
 771.10214070061659,
 770.56893406053666,
 798.08333726147589,
 768.76000877361264,
 651.14767143148003,
 771.00160771342098,
 849.03985735316121]

In [493]:
def adaptive_metropolis_hastings(func,x0,num_steps=1000,step_size=0.1,verbose=False):
    locs = np.zeros((num_steps+1,len(x0)))
    vals = np.zeros(num_steps+1)
    locs[0] = x0
    vals[0] = func(x0)
    num_rejects=0
    num_accepts=0
    message = ''
    adaptation_rate=1.02
    steps = np.zeros(num_steps+1)
    steps[0] = step_size
    for i in range(num_steps):
        new_loc = locs[i] + npr.randn(len(locs[i]))*step_size
        new_val = func(new_loc)
        #accept = npr.rand() < np.exp(-(new_val - vals[i]))#/vals[i]#np.exp(-(new_val - vals[i]))
        accept = new_val <= vals[i]#*(1+npr.randn()*0.01)# or (npr.rand() < 0.05)
        if not accept:
            new_loc = locs[i]
            new_val = vals[i]
            num_rejects+=1
            num_accepts=0
            if num_rejects > 100:
                if step_size > 1e-3:
                    step_size/=adaptation_rate
                message = message+'\n reduced step size'
        else:
            num_rejects=0
            num_accepts+=1
            if num_accepts > 10:
                if step_size < 10.0:
                    step_size*=adaptation_rate
                message = message+ '\n increased step size'
        locs[i+1] = new_loc
        vals[i+1] = new_val
        if i % 100 ==0:
            print("{0}: {1:.3f}".format(i,new_val))
            if verbose:
                print(message)
            message = ''
        steps[i+1] = step_size
    return locs,vals,steps

In [462]:
locs,vals,steps=adaptive_metropolis_hastings(min_line_loss,npr.randn(ae.num_params))


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-462-9d8523d8d5b9> in <module>()
----> 1 locs,vals,steps=adaptive_metropolis_hastings(min_line_loss,npr.randn(ae.num_params))

<ipython-input-461-96a1c6179766> in adaptive_metropolis_hastings(func, x0, num_steps, step_size, verbose)
     12     for i in range(num_steps):
     13         new_loc = locs[i] + npr.randn(len(locs[i]))*step_size
---> 14         new_val = func(new_loc)
     15         #accept = npr.rand() < np.exp(-(new_val - vals[i]))#/vals[i]#np.exp(-(new_val - vals[i]))
     16         accept = new_val <= vals[i]#*(1+npr.randn()*0.01)# or (npr.rand() < 0.05)

<ipython-input-386-8e8a006492c3> in min_line_loss(vec, loss, alpha)
      1 def min_line_loss(vec,loss=loss,alpha=np.arange(0.0,1.5,0.1)):
----> 2     return np.min([loss(vec*a) for a in alpha])

<ipython-input-385-d0436fab750e> in loss(params, thinning)
      1 def loss(params,thinning=0.1):
----> 2     return ae.vec_score(params,X_[npr.rand(len(X_))<thinning])#[:len(X_)/2])

<ipython-input-318-7b62bc90fda3> in vec_score(self, w, X)
     90     def vec_score(self,w,X):
     91         self.W,self.b = self.vec_to_mats(w)
---> 92         return self.score(X)
     93 
     94     def train(self,X_,batch_size=20,epochs=10,

<ipython-input-318-7b62bc90fda3> in score(self, X)
     73 
     74     def score(self,X):
---> 75         X_pred = self.predict(X)
     76         assert(X_pred.shape == X.shape)
     77 

<ipython-input-318-7b62bc90fda3> in predict(self, X)
     49             y = np.zeros(X.shape)
     50             for i,x in enumerate(X):
---> 51                 y[i] = predict_one(x)
     52         else:
     53             y = predict_one(X)

KeyboardInterrupt: 
0: 686.283

In [517]:
locs,vals,steps=adaptive_metropolis_hastings(loss,npr.randn(ae.num_params)*0.1,num_steps=1000)


0: 292.838
100: 214.630
200: 214.630
300: 198.241
400: 198.241
500: 198.241
600: 198.241
700: 198.241
800: 198.241
900: 198.241

In [518]:
pl.plot(vals)


Out[518]:
[<matplotlib.lines.Line2D at 0x120534c10>]

In [519]:
pl.plot(steps)


Out[519]:
[<matplotlib.lines.Line2D at 0x1236c72d0>]

In [520]:
ae.W,ae.b = ae.vec_to_mats(locs[-1])

In [521]:
ae.score(X_)


Out[521]:
2623.4501720503631

In [522]:
vals[-1]


Out[522]:
198.24066227486074

In [523]:
embedding = ae.transform(X_)

In [525]:
pl.scatter(embedding[:,0],embedding[:,1],c=Y,linewidths=0)


Out[525]:
<matplotlib.collections.PathCollection at 0x13f6040d0>

In [453]:
alpha=np.arange(0.0,1.5,0.1)
a = alpha[np.argmin([ae.vec_score(locs[-1]*a,X_) for a in alpha])]
a


Out[453]:
0.0

In [418]:
ae.W,ae.b = ae.vec_to_mats(locs[-1]*a)

In [419]:
ae.score(X_)


Out[419]:
7506.2448705987626

In [ ]:
ae.transform

In [ ]:
pl.scatter(

In [300]:
ae = MLAutoencoder(layer_dims=[8,4,2,4,8])
ae.num_params


Out[300]:
84

In [210]:
ae.train(X_)


Epoch 0 loss: 7315.132
Epoch 1 loss: 3226.895
Epoch 2 loss: 3225.493
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-210-35506039ee86> in <module>()
----> 1 ae.train(X_)

<ipython-input-208-398582417f25> in train(self, X_, batch_size, epochs, learning_rate, cooling, record)
    155                     recordi+=1
    156 
--> 157             report(counter,t+1)
    158             counter +=1
    159         return history[:recordi]

<ipython-input-208-398582417f25> in report(counter, epoch)
     97 
     98         def report(counter,epoch):
---> 99             status = "Epoch {1} loss: {0:.3f}".format(self.score(X_),epoch)
    100             print(status)
    101             X_r = self.transform(X_)

<ipython-input-208-398582417f25> in score(self, X)
     73 
     74     def score(self,X):
---> 75         X_pred = self.predict(X)
     76         assert(X_pred.shape == X.shape)
     77 

<ipython-input-208-398582417f25> in predict(self, X)
     49             y = np.zeros(X.shape)
     50             for i,x in enumerate(X):
---> 51                 y[i] = predict_one(x)
     52         else:
     53             y = predict_one(X)

<ipython-input-208-398582417f25> in predict_one(x)
     43             L = x
     44             for i,w in enumerate(self.W):
---> 45                 L = self.activation(L.dot(w) + self.b[i])
     46             return L
     47 

<ipython-input-208-398582417f25> in <lambda>(x)
      7 
      8 dim = X.shape[1]
----> 9 sigmoid = lambda x : 1.0 / (1.0 + np.exp(-x))
     10 class MLAutoencoder():
     11     def __init__(self,layer_dims=[32,16,8,4,2,4,8,16,32],

KeyboardInterrupt: 


In [ ]:


In [12]:
Y = train_set[1]

In [13]:
X.shape


Out[13]:
(50000, 32)

In [14]:
Y.shape


Out[14]:
(50000,)

In [237]:
pl.scatter(X_[:,0],X_[:,1],c=Y,linewidths=0,alpha=0.5)


Out[237]:
<matplotlib.collections.PathCollection at 0x125ab9bd0>

In [238]:
X_r = ae.transform(X_)

In [239]:
pl.scatter(X_r[:,0],X_r[:,1],c=Y,linewidths=0,alpha=0.5)


Out[239]:
<matplotlib.collections.PathCollection at 0x11566e510>

In [52]:
#Y = Y[:10000]

In [53]:
#X = X_[:10000]

In [54]:
X_.min()


Out[54]:
0.0

In [176]:
relu = lambda x: x*(x>0)

In [185]:
tanh = lambda x:np.tanh(x)

In [243]:
from sklearn import datasets
data = datasets.load_diabetes()

X = data.data
Y = data.target

In [244]:
X.shape


Out[244]:
(442, 10)

In [245]:
pca = PCA()
pca.fit(X)


Out[245]:
PCA(copy=True, n_components=None, whiten=False)

In [246]:
pl.plot(pca.explained_variance_ratio_)


Out[246]:
[<matplotlib.lines.Line2D at 0x125c97e50>]

In [250]:
pl.scatter(pca.fit_transform(X)[:,0],
           pca.fit_transform(X)[:,1],
           c=Y,
           linewidths=0,
           alpha=0.5)
pl.colorbar()


Out[250]:
<matplotlib.colorbar.Colorbar instance at 0x110fdd710>

In [252]:
from sklearn.cross_decomposition import PLSRegression
pls = PLSRegression()
pls.fit(X,Y)
pls.score(X,Y)


Out[252]:
0.50832548177810266

In [253]:
pca = PCA(4,whiten=True)

In [254]:
activation = relu
if activation==relu:
    X_ = pca.fit_transform(X)
    X_-=X_.min()
    X_/= X_.max()
    
else:
    X_ = pca.fit_transform(X)
    X_-=X_.min()
    X_/= X_.max()
    X_-=0.5
    X_*=2

In [ ]:


In [ ]:


In [284]:
ae = MLAutoencoder(layer_dims=[4,3,2,3,4],activation=activation)
ae.num_params


Out[284]:
40

In [285]:
record = ae.train(X_,batch_size=10,epochs=10,learning_rate=0.001,record=True)


Epoch 0 loss: 1041.333
Epoch 1 loss: 482.683
Epoch 2 loss: 438.709
Epoch 3 loss: 438.197
Epoch 4 loss: 434.749
Epoch 5 loss: 434.677
Epoch 6 loss: 432.458
Epoch 7 loss: 432.150
Epoch 8 loss: 431.184
Epoch 9 loss: 431.163
Epoch 10 loss: 430.349

In [287]:
record[::10].shape


Out[287]:
(45, 40)

In [289]:
w = ae.mats_to_vec(ae.W,ae.b)
w = record[0]

gradients = []
for w in record[::10]:
    loc_grad = []
    for i in range(50):
        batch_X = X_[npr.rand(len(X)) < 0.1]
        loss_func = lambda w : ae.vec_score(w,batch_X)
        loc_grad.append(ae.gradient(loss_func,w))
    gradients.append(np.array(loc_grad))
gradients = np.array(gradients)


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-289-4a300fe45082> in <module>()
      8         batch_X = X_[npr.rand(len(X)) < 0.1]
      9         loss_func = lambda w : ae.vec_score(w,batch_X)
---> 10         loc_grad.append(ae.gradient(loss_func,w))
     11     gradients.append(np.array(loc_grad))
     12 gradients = np.array(gradients)

<ipython-input-235-d4644f4a6a49> in gradient(self, func, x0, h)
     85             x = np.array(x0)
     86             x[i] += h
---> 87             deriv[i] = (func(x) - y)/h
     88         return deriv
     89 

<ipython-input-289-4a300fe45082> in <lambda>(w)
      7     for i in range(50):
      8         batch_X = X_[npr.rand(len(X)) < 0.1]
----> 9         loss_func = lambda w : ae.vec_score(w,batch_X)
     10         loc_grad.append(ae.gradient(loss_func,w))
     11     gradients.append(np.array(loc_grad))

<ipython-input-235-d4644f4a6a49> in vec_score(self, w, X)
     90     def vec_score(self,w,X):
     91         self.W,self.b = self.vec_to_mats(w)
---> 92         return self.score(X)
     93 
     94     def train(self,X_,batch_size=20,epochs=10,

<ipython-input-235-d4644f4a6a49> in score(self, X)
     76         assert(X_pred.shape == X.shape)
     77 
---> 78         return sum([self.loss(pred,truth) for (pred,truth) in zip(X_pred, X)])
     79 
     80     def gradient(self,func,x0,h=0.0001):

<ipython-input-235-d4644f4a6a49> in loss(self, y, y_pred)
     70 
     71     def loss(self,y,y_pred):
---> 72         return np.sum(abs(y-y_pred))
     73 
     74     def score(self,X):

/Applications/Canopy.app/appdata/canopy-1.4.1.1975.macosx-x86_64/Canopy.app/Contents/lib/python2.7/site-packages/numpy/core/fromnumeric.pyc in sum(a, axis, dtype, out, keepdims)
   1712     else:
   1713         return _methods._sum(a, axis=axis, dtype=dtype,
-> 1714                             out=out, keepdims=keepdims)
   1715 
   1716 def product (a, axis=None, dtype=None, out=None, keepdims=False):

/Applications/Canopy.app/appdata/canopy-1.4.1.1975.macosx-x86_64/Canopy.app/Contents/lib/python2.7/site-packages/numpy/core/_methods.pyc in _sum(a, axis, dtype, out, keepdims)
     23 def _sum(a, axis=None, dtype=None, out=None, keepdims=False):
     24     return um.add.reduce(a, axis=axis, dtype=dtype,
---> 25                             out=out, keepdims=keepdims)
     26 
     27 def _prod(a, axis=None, dtype=None, out=None, keepdims=False):

KeyboardInterrupt: 

In [291]:
len(gradients)


Out[291]:
25

In [282]:
grads2 = np.array(grads2)

In [292]:
for grad in gradients:
    pl.plot(grad.mean(0))



In [297]:
pl.plot(gradients[0].mean(0))
w = record[0]
for i in range(50):
    batch_X = X_[npr.rand(len(X)) < 0.1]
    loss_func = lambda w : ae.vec_score(w,batch_X)
    pl.plot(ae.gradient(loss_func,w))



In [ ]:


In [130]:
record.shape


Out[130]:
(898, 80)

In [149]:
record = record[:-180]

In [150]:
differences = np.zeros((len(record),len(record)))

In [151]:
for i in range(len(record)):
    for j in range(len(record)):
        differences[i,j] = np.sqrt(sum((record[i]-record[j])**2))
    if i % 100 == 0:
        print(i)


0
100
200
300
400
500
600
700

In [124]:
sum(differences==0)


Out[124]:
array([1791, 1791, 1791, ..., 1791, 1791, 1791])

In [ ]:
# for a case with batch_size=10, step_size=0.01
pl.imshow(differences,interpolation='none')
pl.colorbar()

In [153]:
# for a case with batch_size=10, step_size=0.01
pl.imshow(differences,interpolation='none')
pl.colorbar()


Out[153]:
<matplotlib.colorbar.Colorbar instance at 0x11121b7a0>

In [111]:
# for a case with batch_size=20, step_size=1.0
pl.imshow(differences[8:-8,8:-8],interpolation='none')
pl.colorbar()


Out[111]:
<matplotlib.colorbar.Colorbar instance at 0x11b5d67e8>

In [ ]:
# how much does the "angle" change during training?

In [154]:
def theta(X,Y):
    norm_X = np.sqrt(sum((X)**2))
    norm_Y = np.sqrt(sum((Y)**2))
    return np.arccos(X.dot(Y) / (norm_X * norm_Y))

In [155]:
theta(record[1]-record[0],record[2]-record[1])


Out[155]:
0.69550530892888252

In [163]:
differences = record[1:] - record[:-1]

In [164]:
differences.sum(1).shape


Out[164]:
(717,)

In [165]:
pl.plot(differences.sum(1))


Out[165]:
[<matplotlib.lines.Line2D at 0x1103b6550>]

In [166]:
pl.hist(differences.sum(1),bins=50);



In [167]:
angles = np.zeros(len(differences)-1)
for i in range(1,len(differences)):
    angles[i-1] = theta(differences[i-1],differences[i])

In [171]:
pl.plot(angles)


Out[171]:
[<matplotlib.lines.Line2D at 0x121f41a90>]

In [170]:
pl.hist(angles,bins=50);



In [29]:
ae.num_params


Out[29]:
144

In [ ]:
ae.num_p

In [51]:
X_ = X[:2000]
Y = Y[:2000]

In [ ]:
X

In [57]:
ae.train(X_[:,:16])


Epoch 0 loss: 37692.536
Epoch 1 loss: 33701.885
Epoch 2 loss: 33439.839
Epoch 3 loss: 33207.664
Epoch 4 loss: 33060.173
Epoch 5 loss: 33035.047
Epoch 6 loss: 33015.856
Epoch 7 loss: 32949.710
Epoch 8 loss: 32979.320
Epoch 9 loss: 33011.463
Epoch 10 loss: 32958.327

In [ ]:
ae.n