notebook.community

Edit and run



In [1]:

    
# from https://triangleinequality.wordpress.com/2014/08/12/theano-autoencoders-and-mnist/

import numpy as np
import theano as th
from theano import tensor as T
from numpy import random as rng
 
class AutoEncoder(object):
    def __init__(self, X, hidden_size, activation_function,
                 output_function):
        #X is the data, an m x n numpy matrix
        #where rows correspond to datapoints
        #and columns correspond to features.
        assert type(X) is np.ndarray
        assert len(X.shape)==2
        self.X=X
        self.X=th.shared(name='X', value=np.asarray(self.X, 
                         dtype=th.config.floatX),borrow=True)
        #The config.floatX and borrow=True stuff is to get this to run
        #fast on the gpu. I recommend just doing this without thinking about
        #it until you understand the code as a whole, then learning more
        #about gpus and theano.
        self.n = X.shape[1]
        self.m = X.shape[0]
        #Hidden_size is the number of neurons in the hidden layer, an int.
        assert type(hidden_size) is int
        assert hidden_size > 0
        self.hidden_size=hidden_size
        initial_W = np.asarray(rng.uniform(
                 low=-4 * np.sqrt(6. / (self.hidden_size + self.n)),
                 high=4 * np.sqrt(6. / (self.hidden_size + self.n)),
                 size=(self.n, self.hidden_size)), dtype=th.config.floatX)
        self.W = th.shared(value=initial_W, name='W', borrow=True)
        self.b1 = th.shared(name='b1', value=np.zeros(shape=(self.hidden_size,),
                            dtype=th.config.floatX),borrow=True)
        self.b2 = th.shared(name='b2', value=np.zeros(shape=(self.n,),
                            dtype=th.config.floatX),borrow=True)
        self.activation_function=activation_function
        self.output_function=output_function
                     
    def train(self, n_epochs=100, mini_batch_size=1, learning_rate=0.1):
        index = T.lscalar()
        x=T.matrix('x')
        params = [self.W, self.b1, self.b2]
        hidden = self.activation_function(T.dot(x, self.W)+self.b1)
        output = T.dot(hidden,T.transpose(self.W))+self.b2
        output = self.output_function(output)
         
        #Use cross-entropy loss.
        L = -T.sum(x*T.log(output) + (1-x)*T.log(1-output), axis=1)
        cost=L.mean()       
        updates=[]
         
        #Return gradient with respect to W, b1, b2.
        gparams = T.grad(cost,params)
         
        #Create a list of 2 tuples for updates.
        for param, gparam in zip(params, gparams):
            updates.append((param, param-learning_rate*gparam))
         
        #Train given a mini-batch of the data.
        train = th.function(inputs=[index], outputs=[cost], updates=updates,
                            givens={x:self.X[index:index+mini_batch_size,:]})
                             
 
        import time
        start_time = time.clock()
        for epoch in xrange(n_epochs):
            print "Epoch:",epoch
            for row in xrange(0,self.m, mini_batch_size):
                train(row)
        end_time = time.clock()
        print "Average time per epoch=", (end_time-start_time)/n_epochs
                    
    def get_hidden(self,data):
        x=T.dmatrix('x')
        hidden = self.activation_function(T.dot(x,self.W)+self.b1)
        transformed_data = th.function(inputs=[x], outputs=[hidden])
        return transformed_data(data)
     
    def get_weights(self):
        return [self.W.get_value(), self.b1.get_value(), self.b2.get_value()]









    



Using gpu device 0: GeForce GT 750M



In [2]:

    
activation_function = T.nnet.sigmoid



In [4]:

    
output_function=activation_function



In [2]:

    
import cPickle, gzip, numpy

# Load the dataset
f = gzip.open('mnist.pkl.gz', 'rb')
train_set, valid_set, test_set = cPickle.load(f)
f.close()



In [3]:

    
from sklearn.decomposition import PCA



In [4]:

    
pca = PCA(32)



In [5]:

    
X = pca.fit_transform(train_set[0])



In [6]:

    
sum(pca.explained_variance_ratio_)









    Out[6]:





0.74393093166872859



In [7]:

    
X.shape









    Out[7]:





(50000, 32)



In [18]:

    
A = AutoEncoder(X, 2, activation_function, output_function)



In [19]:

    
A.train(20,20)









    



Epoch: 0
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9
Epoch: 10
Epoch: 11
Epoch: 12
Epoch: 13
Epoch: 14
Epoch: 15
Epoch: 16
Epoch: 17
Epoch: 18
Epoch: 19
Average time per epoch= 2.05545995



In [20]:

    
A.get_hidden(X)









    Out[20]:





[array([[ 0.,  0.],
       [ 0.,  0.],
       [ 1.,  1.],
       ..., 
       [ 1.,  1.],
       [ 1.,  1.],
       [ 1.,  1.]])]



In [339]:

    
x = np.zeros(2)
type(x)









    Out[339]:





numpy.ndarray



In [537]:

    
import numpy as np
import numpy.random as npr
import pylab as pl
pl.rcParams['font.family'] = 'serif'
%matplotlib inline
from sklearn.decomposition import PCA

dim = X.shape[1]
sigmoid = lambda x : 1.0 / (1.0 + np.exp(-x))
class MLAutoencoder():
    def __init__(self,layer_dims=[32,16,8,4,2,4,8,16,32],
                 activation=relu):
        self.activation=activation
        self.layer_dims = layer_dims
        self.W = []
        self.b = []
        for i in range(1,len(layer_dims)):
            self.W.append(npr.randn(layer_dims[i-1],layer_dims[i]))
            self.b.append(npr.randn())
        self.num_params = sum([np.prod(w.shape) for w in self.W]) + len(self.b)
        self.bottleneck = np.argmin(np.array(layer_dims))
        
    def mats_to_vec(self,W,b):
        w_vecs = []
        for w in W:
            w_vecs.append(np.reshape(w,np.prod(w.shape)))
        w_vecs.append(np.array(b))
        return np.hstack(w_vecs)
    
    def vec_to_mats(self,w_vecs):
        ind = 0
        W = []
        
        for i in range(len(self.W)):
            size = np.prod(self.W[i].shape)
            W.append(np.reshape(w_vecs[ind:ind+size],self.W[i].shape))
            ind += size
        bias = w_vecs[ind:]
        return W,bias
    
    def predict(self,X):
        def predict_one(x):
            L = x
            for i,w in enumerate(self.W):
                L = self.activation(L.dot(w) + self.b[i])
            return L
        
        if len(X.shape) > 1:
            y = np.zeros(X.shape)
            for i,x in enumerate(X):
                y[i] = predict_one(x)
        else:
            y = predict_one(X)
        return y

    def transform(self,X):
        def transform_one(x):
            L = x
            for i in range(self.bottleneck):
                L = self.activation(L.dot(self.W[i])+self.b[i])
            return L
        
        if len(X.shape) > 1:
            y = np.zeros((len(X),self.layer_dims[self.bottleneck]))
            for i,x in enumerate(X):
                y[i] = transform_one(x)
        else:
            y = transform_one(X)
        return y
        
    def loss(self,y,y_pred):
        return np.sum(abs(y-y_pred))
    
    def score(self,X):
        X_pred = self.predict(X)
        assert(X_pred.shape == X.shape)
        
        return sum([self.loss(pred,truth) for (pred,truth) in zip(X_pred, X)]) 
    
    def gradient(self,func,x0,h=0.001):
        x0 = np.array(x0)#,dtype=float)
        y = func(x0)
        deriv = np.zeros(len(x0))
        for i in range(len(x0)):
            x = np.array(x0)
            x[i] += h
            deriv[i] = (func(x) - y)/h
        return deriv
    
    def vec_score(self,w,X):
        self.W,self.b = self.vec_to_mats(w)
        return self.score(X)

    def train(self,X_,batch_size=20,epochs=10,
              learning_rate=0.1,cooling=False,
              record=False):
        
        def report(counter,epoch):
            status = "Epoch {1} loss: {0:.3f}".format(self.score(X_),epoch)
            print(status)
            X_r = self.transform(X_)
            pl.scatter(X_r[:,0],X_r[:,1],c=Y,linewidth=0)
            pl.xlim((X_.min(),X_.max()))
            pl.ylim((X_.min(),X_.max()))
            pl.title(status)
            pl.savefig('ae/{0}.jpg'.format(counter))
            pl.close()
            
            if X_.shape[1] == 3:
                X_r = self.predict(X_)
                fig = pl.figure()
                ax = Axes3D(fig)
                
                #pl.xlim((0,1))
                #pl.ylim((0,1))
                #ax.zlim((0,1))
                ax.scatter(X_r[:,0],X_r[:,1],X_r[:,2],c=Y)
                pl.savefig('ae/reconstruction-{0}.jpg'.format(counter))
                pl.close()
        
        n = len(X_) /batch_size
        w = self.mats_to_vec(self.W,self.b)
        
        X = X_.copy()
        
        counter = 0
        report(counter,0)
        counter += 1
        
        if cooling:
            start_cooling = 100
            delta = learning_rate / (epochs - start_cooling)
        
        if record:
            history = np.zeros((epochs*len(X)/(batch_size-1),self.num_params))
            #history = [w]
            history[0] = w
            recordi = 1
            
        momentum=np.zeros(len(w))
        for t in range(epochs):
            npr.shuffle(X)
            
            if cooling and t > start_cooling:
                #learning_rate -= delta
                learning_rate *= 0.95
            
            for i in range(n):
                w_prev = w
                batch_X = X[i*batch_size:(i+1)*batch_size]
                loss_func = lambda w : self.vec_score(w,batch_X)
                #w -= learning_rate * self.gradient(loss_func,w)
                w -= learning_rate*(momentum+0.001)*self.gradient(loss_func,w)
                momentum+=w - w_prev
                if record:
                    #history.append(w)
                    history[recordi] = w
                    recordi+=1
                    
            report(counter,t+1)
            counter +=1
        if record:
            return history[:recordi]



In [553]:

    
from sklearn import datasets
data = datasets.load_digits()
X = data.data
Y = data.target

pca = PCA(4,whiten=True)
X_ = pca.fit_transform(X)
X_-=X_.min()
X_/= X_.max()
#X_-=0.5
#X_*=2

X_.min(),X_.max()









    Out[553]:





(0.0, 1.0)



In [532]:

    
X.shape









    Out[532]:





(1797, 64)



In [ ]:



In [554]:

    
ae = MLAutoencoder(layer_dims=[4,3,2,3,4],activation=sigmoid)
ae.num_params









    Out[554]:





40



In [546]:

    
%timeit ae.score(X_)









    



10 loops, best of 3: 57 ms per loop



In [555]:

    
ae.train(X_,epochs=20)









    



Epoch 0 loss: 1082.089
Epoch 1 loss: 1019.495
Epoch 2 loss: 979.967
Epoch 3 loss: 954.648
Epoch 4 loss: 939.090
Epoch 5 loss: 928.816
Epoch 6 loss: 922.127
Epoch 7 loss: 917.531
Epoch 8 loss: 914.223
Epoch 9 loss: 912.081
Epoch 10 loss: 910.428
Epoch 11 loss: 909.293
Epoch 12 loss: 908.380
Epoch 13 loss: 907.653
Epoch 14 loss: 907.001
Epoch 15 loss: 906.423
Epoch 16 loss: 905.896
Epoch 17 loss: 905.428
Epoch 18 loss: 904.981
Epoch 19 loss: 904.550
Epoch 20 loss: 904.131



In [556]:

    
embedding = ae.transform(X_)



In [557]:

    
pl.scatter(embedding[:,0],embedding[:,1],c=Y,linewidths=0)









    Out[557]:





<matplotlib.collections.PathCollection at 0x142c01dd0>



In [489]:

    
ae.score(X_)









    Out[489]:





39115.722832346662



In [315]:

    
ae.b









    Out[315]:





array([ 0.61339082,  0.20203179, -0.82895431,  0.62841949, -0.04191147,
       -0.53694577, -1.72011834,  0.80377583, -0.2336573 , -0.0421919 ])



In [316]:

    
ae.W,ae.b = ae.vec_to_mats(npr.randn(ae.num_params))
ae.b









    Out[316]:





array([ 1.69129522,  0.43654915,  0.17751614, -0.05158694,  0.51030797,
        0.48721361, -0.03444049, -0.03327278, -0.61448917,  0.08084339])



In [334]:

    
ae.num_params









    Out[334]:





3206



In [342]:

    
%timeit ae.vec_score(npr.randn(ae.num_params),X_[npr.rand(len(X_))<0.1])









    



10 loops, best of 3: 19.1 ms per loop



In [350]:

    
params = npr.randn(ae.num_params)
scores = [ae.vec_score(npr.randn(ae.num_params),X_[npr.rand(len(X_))<0.5]) for _ in range(50)]



In [351]:

    
pl.hist(scores,bins=50);



In [336]:

    
ae_loss(npr.randn(ae.num_params))









    



10 loops, best of 3: 186 ms per loop



In [459]:

    
X_.shape[0]*0.1









    Out[459]:





179.70000000000002



In [490]:

    
def loss(params,thinning=0.1):
    return ae.vec_score(params,X_[npr.rand(len(X_))<thinning])#[:len(X_)/2])



In [491]:

    
def min_line_loss(vec,loss=loss,alpha=np.arange(0.0,1.5,0.1)):
    return np.min([loss(vec*a) for a in alpha])



In [492]:

    
[min_line_loss(params) for i in range(10)]









    Out[492]:





[752.45251831688313,
 782.72773892008115,
 807.97640797371764,
 771.10214070061659,
 770.56893406053666,
 798.08333726147589,
 768.76000877361264,
 651.14767143148003,
 771.00160771342098,
 849.03985735316121]



In [493]:

    
def adaptive_metropolis_hastings(func,x0,num_steps=1000,step_size=0.1,verbose=False):
    locs = np.zeros((num_steps+1,len(x0)))
    vals = np.zeros(num_steps+1)
    locs[0] = x0
    vals[0] = func(x0)
    num_rejects=0
    num_accepts=0
    message = ''
    adaptation_rate=1.02
    steps = np.zeros(num_steps+1)
    steps[0] = step_size
    for i in range(num_steps):
        new_loc = locs[i] + npr.randn(len(locs[i]))*step_size
        new_val = func(new_loc)
        #accept = npr.rand() < np.exp(-(new_val - vals[i]))#/vals[i]#np.exp(-(new_val - vals[i]))
        accept = new_val <= vals[i]#*(1+npr.randn()*0.01)# or (npr.rand() < 0.05)
        if not accept:
            new_loc = locs[i]
            new_val = vals[i]
            num_rejects+=1
            num_accepts=0
            if num_rejects > 100:
                if step_size > 1e-3:
                    step_size/=adaptation_rate
                message = message+'\n reduced step size'
        else:
            num_rejects=0
            num_accepts+=1
            if num_accepts > 10:
                if step_size < 10.0:
                    step_size*=adaptation_rate
                message = message+ '\n increased step size'
        locs[i+1] = new_loc
        vals[i+1] = new_val
        if i % 100 ==0:
            print("{0}: {1:.3f}".format(i,new_val))
            if verbose:
                print(message)
            message = ''
        steps[i+1] = step_size
    return locs,vals,steps



In [462]:

    
locs,vals,steps=adaptive_metropolis_hastings(min_line_loss,npr.randn(ae.num_params))









    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-462-9d8523d8d5b9> in <module>()
----> 1 locs,vals,steps=adaptive_metropolis_hastings(min_line_loss,npr.randn(ae.num_params))

<ipython-input-461-96a1c6179766> in adaptive_metropolis_hastings(func, x0, num_steps, step_size, verbose)
     12     for i in range(num_steps):
     13         new_loc = locs[i] + npr.randn(len(locs[i]))*step_size
---> 14         new_val = func(new_loc)
     15         #accept = npr.rand() < np.exp(-(new_val - vals[i]))#/vals[i]#np.exp(-(new_val - vals[i]))
     16         accept = new_val <= vals[i]#*(1+npr.randn()*0.01)# or (npr.rand() < 0.05)

<ipython-input-386-8e8a006492c3> in min_line_loss(vec, loss, alpha)
      1 def min_line_loss(vec,loss=loss,alpha=np.arange(0.0,1.5,0.1)):
----> 2     return np.min([loss(vec*a) for a in alpha])

<ipython-input-385-d0436fab750e> in loss(params, thinning)
      1 def loss(params,thinning=0.1):
----> 2     return ae.vec_score(params,X_[npr.rand(len(X_))<thinning])#[:len(X_)/2])

<ipython-input-318-7b62bc90fda3> in vec_score(self, w, X)
     90     def vec_score(self,w,X):
     91         self.W,self.b = self.vec_to_mats(w)
---> 92         return self.score(X)
     93 
     94     def train(self,X_,batch_size=20,epochs=10,

<ipython-input-318-7b62bc90fda3> in score(self, X)
     73 
     74     def score(self,X):
---> 75         X_pred = self.predict(X)
     76         assert(X_pred.shape == X.shape)
     77 

<ipython-input-318-7b62bc90fda3> in predict(self, X)
     49             y = np.zeros(X.shape)
     50             for i,x in enumerate(X):
---> 51                 y[i] = predict_one(x)
     52         else:
     53             y = predict_one(X)

KeyboardInterrupt: 





    



0: 686.283



In [517]:

    
locs,vals,steps=adaptive_metropolis_hastings(loss,npr.randn(ae.num_params)*0.1,num_steps=1000)









    



0: 292.838
100: 214.630
200: 214.630
300: 198.241
400: 198.241
500: 198.241
600: 198.241
700: 198.241
800: 198.241
900: 198.241



In [518]:

    
pl.plot(vals)









    Out[518]:





[<matplotlib.lines.Line2D at 0x120534c10>]



In [519]:

    
pl.plot(steps)









    Out[519]:





[<matplotlib.lines.Line2D at 0x1236c72d0>]



In [520]:

    
ae.W,ae.b = ae.vec_to_mats(locs[-1])



In [521]:

    
ae.score(X_)









    Out[521]:





2623.4501720503631



In [522]:

    
vals[-1]









    Out[522]:





198.24066227486074



In [523]:

    
embedding = ae.transform(X_)



In [525]:

    
pl.scatter(embedding[:,0],embedding[:,1],c=Y,linewidths=0)









    Out[525]:





<matplotlib.collections.PathCollection at 0x13f6040d0>



In [453]:

    
alpha=np.arange(0.0,1.5,0.1)
a = alpha[np.argmin([ae.vec_score(locs[-1]*a,X_) for a in alpha])]
a









    Out[453]:





0.0



In [418]:

    
ae.W,ae.b = ae.vec_to_mats(locs[-1]*a)



In [419]:

    
ae.score(X_)









    Out[419]:





7506.2448705987626



In [ ]:

    
ae.transform



In [ ]:

    
pl.scatter(



In [300]:

    
ae = MLAutoencoder(layer_dims=[8,4,2,4,8])
ae.num_params









    Out[300]:





84



In [210]:

    
ae.train(X_)









    



Epoch 0 loss: 7315.132
Epoch 1 loss: 3226.895
Epoch 2 loss: 3225.493





    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-210-35506039ee86> in <module>()
----> 1 ae.train(X_)

<ipython-input-208-398582417f25> in train(self, X_, batch_size, epochs, learning_rate, cooling, record)
    155                     recordi+=1
    156 
--> 157             report(counter,t+1)
    158             counter +=1
    159         return history[:recordi]

<ipython-input-208-398582417f25> in report(counter, epoch)
     97 
     98         def report(counter,epoch):
---> 99             status = "Epoch {1} loss: {0:.3f}".format(self.score(X_),epoch)
    100             print(status)
    101             X_r = self.transform(X_)

<ipython-input-208-398582417f25> in score(self, X)
     73 
     74     def score(self,X):
---> 75         X_pred = self.predict(X)
     76         assert(X_pred.shape == X.shape)
     77 

<ipython-input-208-398582417f25> in predict(self, X)
     49             y = np.zeros(X.shape)
     50             for i,x in enumerate(X):
---> 51                 y[i] = predict_one(x)
     52         else:
     53             y = predict_one(X)

<ipython-input-208-398582417f25> in predict_one(x)
     43             L = x
     44             for i,w in enumerate(self.W):
---> 45                 L = self.activation(L.dot(w) + self.b[i])
     46             return L
     47 

<ipython-input-208-398582417f25> in <lambda>(x)
      7 
      8 dim = X.shape[1]
----> 9 sigmoid = lambda x : 1.0 / (1.0 + np.exp(-x))
     10 class MLAutoencoder():
     11     def __init__(self,layer_dims=[32,16,8,4,2,4,8,16,32],

KeyboardInterrupt:



In [ ]:



In [12]:

    
Y = train_set[1]



In [13]:

    
X.shape









    Out[13]:





(50000, 32)



In [14]:

    
Y.shape









    Out[14]:





(50000,)



In [237]:

    
pl.scatter(X_[:,0],X_[:,1],c=Y,linewidths=0,alpha=0.5)









    Out[237]:





<matplotlib.collections.PathCollection at 0x125ab9bd0>



In [238]:

    
X_r = ae.transform(X_)



In [239]:

    
pl.scatter(X_r[:,0],X_r[:,1],c=Y,linewidths=0,alpha=0.5)









    Out[239]:





<matplotlib.collections.PathCollection at 0x11566e510>



In [52]:

    
#Y = Y[:10000]



In [53]:

    
#X = X_[:10000]



In [54]:

    
X_.min()









    Out[54]:





0.0



In [176]:

    
relu = lambda x: x*(x>0)



In [185]:

    
tanh = lambda x:np.tanh(x)



In [243]:

    
from sklearn import datasets
data = datasets.load_diabetes()

X = data.data
Y = data.target



In [244]:

    
X.shape









    Out[244]:





(442, 10)



In [245]:

    
pca = PCA()
pca.fit(X)









    Out[245]:





PCA(copy=True, n_components=None, whiten=False)



In [246]:

    
pl.plot(pca.explained_variance_ratio_)









    Out[246]:





[<matplotlib.lines.Line2D at 0x125c97e50>]



In [250]:

    
pl.scatter(pca.fit_transform(X)[:,0],
           pca.fit_transform(X)[:,1],
           c=Y,
           linewidths=0,
           alpha=0.5)
pl.colorbar()









    Out[250]:





<matplotlib.colorbar.Colorbar instance at 0x110fdd710>



In [252]:

    
from sklearn.cross_decomposition import PLSRegression
pls = PLSRegression()
pls.fit(X,Y)
pls.score(X,Y)









    Out[252]:





0.50832548177810266



In [253]:

    
pca = PCA(4,whiten=True)



In [254]:

    
activation = relu
if activation==relu:
    X_ = pca.fit_transform(X)
    X_-=X_.min()
    X_/= X_.max()
    
else:
    X_ = pca.fit_transform(X)
    X_-=X_.min()
    X_/= X_.max()
    X_-=0.5
    X_*=2



In [ ]:



In [ ]:



In [284]:

    
ae = MLAutoencoder(layer_dims=[4,3,2,3,4],activation=activation)
ae.num_params









    Out[284]:





40



In [285]:

    
record = ae.train(X_,batch_size=10,epochs=10,learning_rate=0.001,record=True)









    



Epoch 0 loss: 1041.333
Epoch 1 loss: 482.683
Epoch 2 loss: 438.709
Epoch 3 loss: 438.197
Epoch 4 loss: 434.749
Epoch 5 loss: 434.677
Epoch 6 loss: 432.458
Epoch 7 loss: 432.150
Epoch 8 loss: 431.184
Epoch 9 loss: 431.163
Epoch 10 loss: 430.349



In [287]:

    
record[::10].shape









    Out[287]:





(45, 40)



In [289]:

    
w = ae.mats_to_vec(ae.W,ae.b)
w = record[0]

gradients = []
for w in record[::10]:
    loc_grad = []
    for i in range(50):
        batch_X = X_[npr.rand(len(X)) < 0.1]
        loss_func = lambda w : ae.vec_score(w,batch_X)
        loc_grad.append(ae.gradient(loss_func,w))
    gradients.append(np.array(loc_grad))
gradients = np.array(gradients)









    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-289-4a300fe45082> in <module>()
      8         batch_X = X_[npr.rand(len(X)) < 0.1]
      9         loss_func = lambda w : ae.vec_score(w,batch_X)
---> 10         loc_grad.append(ae.gradient(loss_func,w))
     11     gradients.append(np.array(loc_grad))
     12 gradients = np.array(gradients)

<ipython-input-235-d4644f4a6a49> in gradient(self, func, x0, h)
     85             x = np.array(x0)
     86             x[i] += h
---> 87             deriv[i] = (func(x) - y)/h
     88         return deriv
     89 

<ipython-input-289-4a300fe45082> in <lambda>(w)
      7     for i in range(50):
      8         batch_X = X_[npr.rand(len(X)) < 0.1]
----> 9         loss_func = lambda w : ae.vec_score(w,batch_X)
     10         loc_grad.append(ae.gradient(loss_func,w))
     11     gradients.append(np.array(loc_grad))

<ipython-input-235-d4644f4a6a49> in vec_score(self, w, X)
     90     def vec_score(self,w,X):
     91         self.W,self.b = self.vec_to_mats(w)
---> 92         return self.score(X)
     93 
     94     def train(self,X_,batch_size=20,epochs=10,

<ipython-input-235-d4644f4a6a49> in score(self, X)
     76         assert(X_pred.shape == X.shape)
     77 
---> 78         return sum([self.loss(pred,truth) for (pred,truth) in zip(X_pred, X)])
     79 
     80     def gradient(self,func,x0,h=0.0001):

<ipython-input-235-d4644f4a6a49> in loss(self, y, y_pred)
     70 
     71     def loss(self,y,y_pred):
---> 72         return np.sum(abs(y-y_pred))
     73 
     74     def score(self,X):

/Applications/Canopy.app/appdata/canopy-1.4.1.1975.macosx-x86_64/Canopy.app/Contents/lib/python2.7/site-packages/numpy/core/fromnumeric.pyc in sum(a, axis, dtype, out, keepdims)
   1712     else:
   1713         return _methods._sum(a, axis=axis, dtype=dtype,
-> 1714                             out=out, keepdims=keepdims)
   1715 
   1716 def product (a, axis=None, dtype=None, out=None, keepdims=False):

/Applications/Canopy.app/appdata/canopy-1.4.1.1975.macosx-x86_64/Canopy.app/Contents/lib/python2.7/site-packages/numpy/core/_methods.pyc in _sum(a, axis, dtype, out, keepdims)
     23 def _sum(a, axis=None, dtype=None, out=None, keepdims=False):
     24     return um.add.reduce(a, axis=axis, dtype=dtype,
---> 25                             out=out, keepdims=keepdims)
     26 
     27 def _prod(a, axis=None, dtype=None, out=None, keepdims=False):

KeyboardInterrupt:



In [291]:

    
len(gradients)









    Out[291]:





25



In [282]:

    
grads2 = np.array(grads2)



In [292]:

    
for grad in gradients:
    pl.plot(grad.mean(0))



In [297]:

    
pl.plot(gradients[0].mean(0))
w = record[0]
for i in range(50):
    batch_X = X_[npr.rand(len(X)) < 0.1]
    loss_func = lambda w : ae.vec_score(w,batch_X)
    pl.plot(ae.gradient(loss_func,w))



In [ ]:



In [130]:

    
record.shape









    Out[130]:





(898, 80)



In [149]:

    
record = record[:-180]



In [150]:

    
differences = np.zeros((len(record),len(record)))



In [151]:

    
for i in range(len(record)):
    for j in range(len(record)):
        differences[i,j] = np.sqrt(sum((record[i]-record[j])**2))
    if i % 100 == 0:
        print(i)



In [124]:

    
sum(differences==0)









    Out[124]:





array([1791, 1791, 1791, ..., 1791, 1791, 1791])



In [ ]:

    
# for a case with batch_size=10, step_size=0.01
pl.imshow(differences,interpolation='none')
pl.colorbar()



In [153]:

    
# for a case with batch_size=10, step_size=0.01
pl.imshow(differences,interpolation='none')
pl.colorbar()









    Out[153]:





<matplotlib.colorbar.Colorbar instance at 0x11121b7a0>



In [111]:

    
# for a case with batch_size=20, step_size=1.0
pl.imshow(differences[8:-8,8:-8],interpolation='none')
pl.colorbar()









    Out[111]:





<matplotlib.colorbar.Colorbar instance at 0x11b5d67e8>



In [ ]:

    
# how much does the "angle" change during training?



In [154]:

    
def theta(X,Y):
    norm_X = np.sqrt(sum((X)**2))
    norm_Y = np.sqrt(sum((Y)**2))
    return np.arccos(X.dot(Y) / (norm_X * norm_Y))



In [155]:

    
theta(record[1]-record[0],record[2]-record[1])









    Out[155]:





0.69550530892888252



In [163]:

    
differences = record[1:] - record[:-1]



In [164]:

    
differences.sum(1).shape









    Out[164]:





(717,)



In [165]:

    
pl.plot(differences.sum(1))









    Out[165]:





[<matplotlib.lines.Line2D at 0x1103b6550>]



In [166]:

    
pl.hist(differences.sum(1),bins=50);



In [167]:

    
angles = np.zeros(len(differences)-1)
for i in range(1,len(differences)):
    angles[i-1] = theta(differences[i-1],differences[i])



In [171]:

    
pl.plot(angles)









    Out[171]:





[<matplotlib.lines.Line2D at 0x121f41a90>]



In [170]:

    
pl.hist(angles,bins=50);



In [29]:

    
ae.num_params









    Out[29]:





144



In [ ]:

    
ae.num_p



In [51]:

    
X_ = X[:2000]
Y = Y[:2000]



In [ ]:

    
X



In [57]:

    
ae.train(X_[:,:16])









    



Epoch 0 loss: 37692.536
Epoch 1 loss: 33701.885
Epoch 2 loss: 33439.839
Epoch 3 loss: 33207.664
Epoch 4 loss: 33060.173
Epoch 5 loss: 33035.047
Epoch 6 loss: 33015.856
Epoch 7 loss: 32949.710
Epoch 8 loss: 32979.320
Epoch 9 loss: 33011.463
Epoch 10 loss: 32958.327



In [ ]:

    
ae.n