In [71]:

    
%matplotlib inline



In [1]:

    
import matplotlib.pyplot as plt
import sklearn
from sklearn import datasets



In [2]:

    
import pandas as pd



In [3]:

    
import theano

I accomplished the above by running this command at the command prompt:

THEANO_FLAGS='mode=FAST_RUN,device=gpu,floatX=float32' jupyter notebook



In [4]:

    
#import theano
from theano import function, config, sandbox, shared 
import theano.tensor as T
import numpy as np
import scipy
import time

More `theano` setup in `jupyter` notebook boilerplate



In [6]:

    
print( theano.config.device )
print( theano.config.lib.cnmem)  # cf. http://deeplearning.net/software/theano/library/config.html
print( theano.config.print_active_device)# Print active device at when the GPU device is initialized.









    



gpu0
0.7
True



In [7]:

    
import os, sys
os.getcwd()
os.listdir( os.getcwd() )









    Out[7]:





['LogReg-sklearn.ipynb',
 'Linear Algebra Shootout: NumPy vs. Theano vs. TensorFlow_files',
 'LICENSE',
 'theano.pdf',
 'deeplearning.pdf',
 'sklearn_ML.ipynb',
 'LaTeXandpdfs',
 'supervised-theano.ipynb',
 'sanity_check_theano_uses_gpu.ipynb',
 '.git',
 'README.md',
 'Linear Algebra Shootout: NumPy vs. Theano vs. TensorFlow.html',
 '.ipynb_checkpoints',
 'theano_ML.ipynb',
 'deep-learning--ud730',
 'tf_sanitycheck.ipynb',
 'saved_models',
 'FedoraNVidiaInstallTips',
 'tf',
 'kaggle',
 'sampleinputdataX.ipynb',
 'ML',
 'Data',
 'simple_logreg.py',
 'Speeding up your Neural Network with Theano and the GPU \xe2\x80\x93 WildML_files',
 'coursera_Ng',
 'MorseTheory.ipynb',
 'tutorial_theano.ipynb',
 'best_model.pkl',
 'gpu_test.py',
 'Speeding up your Neural Network with Theano and the GPU \xe2\x80\x93 WildML.html']



In [ ]:



In [8]:

    
%run gpu_test.py THEANO_FLAGS='mode=FAST_RUN,device=gpu,floatX=float32,lib.cnmem=0.85' # note lib.cnmem option for CnMem









    



[GpuElemwise{exp,no_inplace}(<CudaNdarrayType(float32, vector)>), HostFromGpu(GpuElemwise{exp,no_inplace}.0)]
Looping 1000 times took 0.211632 seconds
Result is [ 1.23178029  1.61879349  1.52278066 ...,  2.20771813  2.29967761
  1.62323296]
Used the gpu



In [ ]:

sample data boilerplate



In [9]:

    
# Load the diabetes dataset
diabetes = sklearn.datasets.load_diabetes()



In [10]:

    
diabetes_X = diabetes.data
diabetes_Y = diabetes.target



In [11]:

    
#diabetes_X1 = diabetes_X[:,np.newaxis,2]
diabetes_X1 = diabetes_X[:,np.newaxis, 2].astype(theano.config.floatX)
#diabetes_Y  = diabetes_Y.reshape( diabetes_Y.shape[0], 1)
diabetes_Y = diabetes_Y.astype(theano.config.floatX)

Linear regression

cf. Linear Regression In Theano

1_linear_regression.py from github Newmu/Theano-Tutorials

Train on $m$ number of input data points



In [12]:

    
m_lin = diabetes_X1.shape[0]

input, output variables $x$, $y$ for Theano



In [28]:

    
#x1 = T.vector('x1')  # X1, input data, with only 1 feature, i.e. X \in \mathbb{R}^N, d=1 
#ylin = T.vector('ylin') # target variable for linear regression, so that Y \in \mathbb{R}

x1 = T.scalar('x1')  # X1, input data, with only 1 feature, i.e. X \in \mathbb{R}^N, d=1 
ylin = T.scalar('ylin') # target variable for linear regression, so that Y \in \mathbb{R}

Parameters (for a linear slope)

$$ (\theta^0, \theta^1) \in \mathbb{R}^2 $$



In [29]:

    
thet0_init_val = np.random.randn()
thet1_init_val = np.random.randn()



In [30]:

    
thet0 = theano.shared( value=thet0_init_val, name='thet0', borrow=True)  # \theta^0
thet1 = theano.shared( thet1_init_val, name='thet1', borrow=True)   # \theta^1

hypothesis function $h_{\theta}$

$$ h_{\theta}(x) = \theta_1 x + \theta_0 $$



In [31]:

    
#h_thet = T.dot( thet1, x1) + thet0
# whereas, Newmu uses
h_thet = thet1 * x1 + thet0

Cost function $J(\theta)$



In [32]:

    
# roshansanthosh uses 
#Jthet = T.sum( T.pow(h_thet-ylin,2))/(2*m_lin)

# whereas, Newmu uses
# Jthet = T.mean( T.sqr( thet_1*x1 + thet_0 - ylin ))

Jthet = T.mean( T.pow( h_thet-ylin,2))/2
#Jthet = sandbox.cuda.basic_ops.gpu_from_host( T.mean( 
#        sandbox.cuda.basic_ops.gpu_from_host( T.pow( h_thet-ylin,2))))/2

$$ \text{grad}_{\theta}J(\theta) = ( \text{grad}_{\theta^0} J , \text{grad}_{\theta^1} J ) $$



In [33]:

    
grad_thet0 = T.grad(Jthet, thet0)
grad_thet1 = T.grad(Jthet, thet1)



In [19]:

    
# so-called "learning rate"
gamma = 0.01

Note that "updates (iterable over pairs (shared_variable, new_expression) List, tuple or dict.) – expressions for new SharedVariable values" cf. Theano doc



In [34]:

    
train_lin = theano.function(inputs = [x1,ylin], outputs=Jthet, 
                        updates=[[thet1,thet1-gamma*grad_thet1],[thet0,thet0-gamma*grad_thet0]])



In [35]:

    
test_lin = theano.function([x1],h_thet)



In [100]:

    
#X1_lin_in = shared( diabetes_X1 ,'float32')
#Y_lin_out = shared( diabetes_Y, 'float32')



In [23]:

    
training_steps = 1000 # 10000



In [36]:

    
sh_diabetes_X1 = shared( diabetes_X1 , borrow=True)
sh_diabetes_Y  = shared( diabetes_Y, borrow=True)



In [37]:

    
"""
for i in range(training_steps):
    for x,y in zip( diabetes_X1, diabetes_Y):
        Jthet_val = train_lin( x, y )
        """

for i in range(training_steps):
#    for x,y in zip( sh_diabetes_X1, sh_diabetes_Y) :
#        Jthet_val = train_lin( x,y)
    Jthet_val = train_lin( sh_diabetes_X1, sh_diabetes_Y)









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-37-b6192b29b89d> in <module>()
      8 #    for x,y in zip( sh_diabetes_X1, sh_diabetes_Y) :
      9 #        Jthet_val = train_lin( x,y)
---> 10     Jthet_val = train_lin( sh_diabetes_X1, sh_diabetes_Y)

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    784                         s.storage[0] = s.type.filter(
    785                             arg, strict=s.strict,
--> 786                             allow_downcast=s.allow_downcast)
    787 
    788                     except Exception as e:

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/tensor/type.pyc in filter(self, data, strict, allow_downcast)
     84         if isinstance(data, Variable):
     85             raise TypeError(
---> 86                 'Expected an array-like object, but found a Variable: '
     87                 'maybe you are trying to call a function on a (possibly '
     88                 'shared) variable instead of a numeric array?')

TypeError: ('Bad input argument to theano function with name "<ipython-input-34-92f3da11396a>:2"  at index 0(0-based)', 'Expected an array-like object, but found a Variable: maybe you are trying to call a function on a (possibly shared) variable instead of a numeric array?')



In [27]:

    
print(Jthet_val)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-27-49f3322b82c4> in <module>()
----> 1 print(Jthet_val)

NameError: name 'Jthet_val' is not defined



In [129]:

    
print( thet0.get_value() ); print( thet1.get_value() )









    



151.746080721
942.701275851



In [134]:

    
test_lin_out = np.array( [ test_lin( x ) for x in diabetes_X1 ] )



In [137]:

    
plt.plot(diabetes_X1,diabetes_Y,'ro')
plt.plot(diabetes_X1,test_lin_out)









    Out[137]:





[<matplotlib.lines.Line2D at 0x7f4037fd3790>]



In [38]:

    
if any([x.op.__class__.__name__ in ['GpuGemm','GpuGemv'] for x in train_lin.maker.fgraph.toposort()]):
    print("Used the gpu")
else:
    print(train_lin.maker.fgraph.toposort())









    



[Elemwise{Composite{(((i0 * i1) + i2) - i3)}}(thet1, x1, thet0, ylin), Elemwise{Composite{(i0 - (i1 * i2))}}[(0, 0)](thet0, TensorConstant{0.00999999977648}, Elemwise{Composite{(((i0 * i1) + i2) - i3)}}.0), Elemwise{Composite{(i0 - (i1 * i2 * i3))}}[(0, 0)](thet1, TensorConstant{0.00999999977648}, Elemwise{Composite{(((i0 * i1) + i2) - i3)}}.0, x1), Elemwise{Composite{(i0 * sqr(i1))}}[(0, 1)](TensorConstant{0.5}, Elemwise{Composite{(((i0 * i1) + i2) - i3)}}.0)]



In [39]:

    
if np.any([isinstance(x.op,T.Elemwise) for x in train_lin.maker.fgraph.toposort()]):
    print("Used the cpu")









    



Used the cpu



In [ ]:

Linear Algebra and `theano`

cf. Week 1, Linear Algebra Review, Coursera, Machine Learning with Ng

I'll take this opportunity to provide a dictionary between the syntax of linear algebra math and numpy.

Essentially, what I did was take Coursera's Week 1, Linear Algebra Review and then translated the math into theano, and in particular, running theano on the GPU.

Other reference that I used was

https://simplyml.com/linear-algebra-shootout-numpy-vs-theano-vs-tensorflow-2/

Linear Algebra Shootout: NumPy vs. Theano vs. TensorFlow by Charanpal Dhanjal - 14/07/16

Matrix addition

cf. Coursera, Intro. to Machine Learning, Linear Algebra Review, Addition and Scalar Multiplication



In [69]:

    
A = T.matrix('A')
B = T.matrix('B')
#matadd = function([A,B], A+B)
#matadd = function([A,B],sandbox.cuda.basic_ops.gpu_from_host(A+B) )
# Note: we are just defining the expressions, nothing is evaluated here!  
C = sandbox.cuda.basic_ops.gpu_from_host(A+B)
matadd = function([A,B], C)



In [121]:

    
#A = T.dmatrix('A')
#B = T.dmatrix('B')

A = T.matrix('A')
B = T.matrix('B')

C_out = A + B
matadd_CPU = function([A,B], C_out)



In [73]:

    
A_eg = shared( np.array([[8,6,9],[10,1,10]]), 'float32')
B_eg = shared( np.array([[3,10,2],[6,1,-1]]), 'float32')



In [85]:

    
A_eg_CPU = np.array([[8,6,9],[10,1,10]])
B_eg_CPU = np.array([[3,10,2],[6,1,-1]])



In [96]:

    
print(A_eg_CPU)
print( type( A_eg_CPU ))
print( A_eg_CPU.shape)
print( B_eg_CPU.shape)









    



[[ 8  6  9]
 [10  1 10]]
<type 'numpy.ndarray'>
(2, 3)
(2, 3)



In [70]:

    
print( matadd.maker.fgraph.toposort() )









    



[GpuFromHost(B), GpuFromHost(A), GpuElemwise{Add}[(0, 0)](GpuFromHost.0, GpuFromHost.0)]



In [122]:

    
print( matadd_CPU.maker.fgraph.toposort() )









    



[GpuFromHost(B), GpuFromHost(A), GpuElemwise{Add}[(0, 0)](GpuFromHost.0, GpuFromHost.0), HostFromGpu(GpuElemwise{Add}[(0, 0)].0)]



In [71]:

    
matadd( A_eg, B_eg)









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-71-dd6866f5ad02> in <module>()
----> 1 matadd( A_eg, B_eg)

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    784                         s.storage[0] = s.type.filter(
    785                             arg, strict=s.strict,
--> 786                             allow_downcast=s.allow_downcast)
    787 
    788                     except Exception as e:

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/tensor/type.pyc in filter(self, data, strict, allow_downcast)
     84         if isinstance(data, Variable):
     85             raise TypeError(
---> 86                 'Expected an array-like object, but found a Variable: '
     87                 'maybe you are trying to call a function on a (possibly '
     88                 'shared) variable instead of a numeric array?')

TypeError: ('Bad input argument to theano function with name "<ipython-input-69-40293d053d65>:7"  at index 0(0-based)', 'Expected an array-like object, but found a Variable: maybe you are trying to call a function on a (possibly shared) variable instead of a numeric array?')

The way to do it, to "force" on the GPU, is like this (cf. Speeding up your Neural Network with Theano and the GPU - Wild ML):



In [76]:

    
np.random.randn( *A_eg_CPU.shape )









    Out[76]:





array([[ 1.01645633,  0.49394088, -0.22904526],
       [-0.44323914, -0.27948502,  0.335645  ]])



In [78]:

    
C_out = theano.shared( np.random.randn( *A_eg_CPU.shape).astype('float32') )



In [80]:

    
C_out.type()









    Out[80]:





<CudaNdarrayType(float32, matrix)>



In [110]:

    
#A_in = shared( A_eg_CPU, "float32")
#A_in = shared( A_eg_CPU, "float32")

A_in = shared( A_eg_CPU.astype("float32"), "float32")
B_in = shared( B_eg_CPU.astype("float32"), "float32")
#C_out_GPU = A_in + B_in
C_out_GPU = sandbox.cuda.basic_ops.gpu_from_host(A_in+B_in)



In [111]:

    
matadd_GPU = theano.function( [], C_out_GPU)



In [112]:

    
C_out_GPU_result = matadd_GPU()



In [113]:

    
C_out_GPU_result









    Out[113]:





CudaNdarray([[ 11.  16.  11.]
 [ 16.   2.   9.]])

Notice how DIFFERENT this setup or syntax is: we have to set up tensor or matrix shared variables A_n, B_in, which are then used to define the theano function, theano.function. "By using shared variables we ensure that they are present in the GPU memory". cf. Linear Algebra Shootout: NumPy vs. Theano vs. TensorFlow



In [114]:

    
print( matadd_GPU.maker.fgraph.toposort() )









    



[GpuElemwise{add,no_inplace}(float32, float32)]



In [128]:

    
#if np.any([isinstance(C_out_GPU.op, tensor.Elemwise ) and 
if np.any([isinstance( C_out_GPU.op, T.Elemwise ) and 
           ('Gpu' not in type( C_out_GPU.op).__name__) for x in matadd_GPU.maker.fgraph.toposort()]) :
    print('Used the cpu')
else:
    print('Used the gpu')









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-128-eedabe693a3f> in <module>()
      1 #if np.any([isinstance(C_out_GPU.op, tensor.Elemwise ) and
      2 if np.any([isinstance( C_out_GPU.op, T.Elemwise ) and 
----> 3            ('Gpu' not in type( C_out_GPU.op).__name__) for x in matadd_GPU.maker.fgraph.toposort()]) :
      4     print('Used the cpu')
      5 else:

AttributeError: 'CudaNdarrayVariable' object has no attribute 'op'



In [ ]:



In [124]:

    
matadd_CPU( A_eg_CPU.astype("float32"), B_eg_CPU.astype("float32") )









    Out[124]:





array([[ 11.,  16.,  11.],
       [ 16.,   2.,   9.]], dtype=float32)



In [55]:

    
type(A_eg)









    Out[55]:





theano.tensor.sharedvar.TensorSharedVariable



In [59]:

    
print( type( numpy.asarray(rng.rand(2000)) ) )
numpy.asarray(rng.rand(2000)).shape









    



<type 'numpy.ndarray'>






    Out[59]:





(2000,)

Bottom Line: there are 2 ways of doing linear algebra on the GPU

symbolic computation with the usual arguments

$$ A + B = C \in \text{Mat}_{\mathbb{R}}(M,N) $$

$ \forall \, A, B \in \text{Mat}_{\mathbb{R}}(M,N)$



In [132]:

    
A = T.matrix('A')
B = T.matrix('B')

C = sandbox.cuda.basic_ops.gpu_from_host( A + B ) # vs. 
# C = A + B  # this will result in an output array on the host, as opposed to CudaNdarray on device
matadd = function([A,B], C)



In [133]:

    
print( matadd.maker.fgraph.toposort() )









    



[GpuFromHost(B), GpuFromHost(A), GpuElemwise{Add}[(0, 0)](GpuFromHost.0, GpuFromHost.0)]



In [134]:

    
matadd( A_eg_CPU.astype("float32"), B_eg_CPU.astype("float32") )









    Out[134]:





CudaNdarray([[ 11.  16.  11.]
 [ 16.   2.   9.]])

with shared variables



In [140]:

    
A_in = shared( A_eg_CPU.astype("float32"), "float32")  # initialize with the input values, A_eg_CPU, anyway
B_in = shared( B_eg_CPU.astype("float32"), "float32")  # initialize with the input values B_eg_CPU, anyway

# C_out = A_in + B_in # this version will output to the host as a numpy.ndarray
# indeed, reading the graph,
"""
[GpuElemwise{add,no_inplace}(float32, float32), HostFromGpu(GpuElemwise{add,no_inplace}.0)]
"""
# this version immediately below, in 1 line, will result in a CudaNdarray on device
C_out = sandbox.cuda.basic_ops.gpu_from_host(A_in+B_in)

matadd_GPU = theano.function( [], C_out)



In [141]:

    
print( matadd_GPU.maker.fgraph.toposort() )









    



[GpuElemwise{add,no_inplace}(float32, float32)]



In [142]:

    
C_out_result = matadd_GPU()



In [143]:

    
C_out_result









    Out[143]:





CudaNdarray([[ 11.  16.  11.]
 [ 16.   2.   9.]])

Scalar Multiplication (on the GPU)

cf. Scalar Multiplication of Linear Algebra Review, coursera, Machine Learning Intro by Ng



In [149]:

    
A_2 = np.array( [[4,5],[1,7] ])



In [145]:

    
a = T.scalar('a')

F = sandbox.cuda.basic_ops.gpu_from_host( a*A )
scalarmul = theano.function([a,A],F)



In [146]:

    
print( scalarmul.maker.fgraph.toposort() )









    



[GpuFromHost(A), GpuFromHost(a), GpuDimShuffle{x,x}(GpuFromHost.0), GpuElemwise{Mul}[(0, 1)](GpuDimShuffle{x,x}.0, GpuFromHost.0)]



In [150]:

    
scalarmul( np.float32( 2.), A_2.astype("float32"))









    Out[150]:





CudaNdarray([[  8.  10.]
 [  2.  14.]])

Composition; Confirming that you can do composition of scalar multiplication on a matrix (or ring) addition

Being able to do composition is very important in math



In [152]:

    
scalarmul( np.float32(2.), matadd( A_eg_CPU.astype("float32"), B_eg_CPU.astype("float32") ) )









    Out[152]:





CudaNdarray([[ 22.  32.  22.]
 [ 32.   4.  18.]])



In [156]:

    
u = T.vector('u')
v = T.vector('v')

w = sandbox.cuda.basic_ops.gpu_from_host( u + v)
vecadd = theano.function( [u,v],w)

t = sandbox.cuda.basic_ops.gpu_from_host( a * u)
scalarmul_vec = theano.function([a,u], t)



In [157]:

    
print(vecadd.maker.fgraph.toposort())  
print(scalarmul_vec.maker.fgraph.toposort())









    



[GpuFromHost(v), GpuFromHost(u), GpuElemwise{Add}[(0, 0)](GpuFromHost.0, GpuFromHost.0)]
[GpuFromHost(u), GpuFromHost(a), GpuDimShuffle{x}(GpuFromHost.0), GpuElemwise{Mul}[(0, 1)](GpuDimShuffle{x}.0, GpuFromHost.0)]



In [162]:

    
u_eg = np.array( [4,6,7], dtype="float32")
v_eg = np.array( [2,1,0], dtype="float32")

print( u_eg.shape)









    



(3,)



In [161]:

    
scalarmul_vec( np.float32(0.5), u_eg )









    Out[161]:





CudaNdarray([ 2.   3.   3.5])



In [163]:

    
vecadd( scalarmul_vec( np.float32(0.5), u_eg ) , scalarmul_vec( np.float32(-3.), v_eg )  )









    Out[163]:





CudaNdarray([-4.   0.   3.5])

This was the computer equivalent to mathematical expression:

$$ \left[ \begin{matrix} 4 \\ 6 \\ 7 \end{matrix} \right] /2 - 3 * \left[ \begin{matrix} 2 \\ 1 \\ 0 \end{matrix} \right] $$

sAxy or A-V multiplication or so-called "Gemv", or Matrix Multiplication on a vector, or linear transformation on a R-module, or vector space

i.e.

$$ Av = B $$



In [164]:

    
B_out = sandbox.cuda.basic_ops.gpu_from_host( T.dot(A,v))
AVmul = theano.function([A,v], B_out)
print(AVmul.maker.fgraph.toposort())









    



[GpuFromHost(v), GpuFromHost(A), Shape_i{0}(A), GpuAllocEmpty(Shape_i{0}.0), GpuGemv{inplace}(GpuAllocEmpty.0, TensorConstant{1.0}, GpuFromHost.0, GpuFromHost.0, TensorConstant{0.0})]



In [165]:

    
AVmul( np.array([[1,0,3],[2,1,5],[3,1,2]]).astype("float32"), np.array([1,6,2]).astype("float32"))









    Out[165]:





CudaNdarray([  7.  18.  13.])



In [168]:

    
AVmul( np.array([[1,0,0],[0,1,0],[0,0,1]]).astype("float32"), np.array([1,6,2]).astype("float32"))









    Out[168]:





CudaNdarray([ 1.  6.  2.])

AB or Gemm or Matrix Multiplication, i.e. Ring multiplication

i.e.
$$ A*B = C $$



In [166]:

    
C_f = sandbox.cuda.basic_ops.gpu_from_host( T.dot(A,B)) 
matmul = theano.function([A,B], C_f)
print( matmul.maker.fgraph.toposort())









    



[GpuFromHost(B), GpuFromHost(A), GpuDot22(GpuFromHost.0, GpuFromHost.0)]



In [167]:

    
matmul( np.array( [[1,3],[2,4],[0,5]]  ).astype("float32"), np.array([[1,0],[2,3]]).astype("float32")  )









    Out[167]:





CudaNdarray([[  7.   9.]
 [ 10.  12.]
 [ 10.  15.]])

Inverse and Transpose

cf. Inverse and Transpose



In [170]:

    
Ainverse = sandbox.cuda.basic_ops.gpu_from_host( T.inv(A))
Ainv = theano.function([A], Ainverse)
print(Ainv.maker.fgraph.toposort())









    



[GpuFromHost(A), GpuElemwise{Inv}[(0, 0)](GpuFromHost.0)]



In [172]:

    
Atranspose = sandbox.cuda.basic_ops.gpu_from_host( A.T)
AT = theano.function([A],Atranspose)
print(AT.maker.fgraph.toposort())









    



[GpuFromHost(A), GpuDimShuffle{1,0}(GpuFromHost.0)]



In [ ]:



In [ ]:

Summation, sum, mean, scan



In [ ]:

Linear Regression (again), via Coursera's Machine Learning Intro by Ng, Programming Exercise 1 for Week 2

Boilerplate, load sample data



In [9]:

    
linregdata = pd.read_csv('./coursera_Ng/machine-learning-ex1/ex1/ex1data1.txt', header=None)



In [11]:

    
X_linreg_training = linregdata.as_matrix([0])  # pandas.DataFrame.as_matrix convert frame to its numpy-array representation
y_linreg_training = linregdata.as_matrix([1])
m_linreg_training = len(y_linreg_training)  # number of training examples  
print( X_linreg_training.shape, type(X_linreg_training)) 
print( y_linreg_training.shape, type(y_linreg_training)) 
print m_linreg_training









    



((97, 1), <type 'numpy.ndarray'>)
((97, 1), <type 'numpy.ndarray'>)
97

Try representing $\theta$, parameters or "weights", of size $|\theta|$ which should be equal to the number of features $n$ (or $d$).



In [109]:

    
# theta_linreg = T.vector('theta_linreg')
d = X_linreg_training.shape[1] # d = features

# Declare Theano symbolic variables
X = T.matrix('x')
y = T.vector('y')

Preprocess training data (due to numpy's treatment of arrays) (note, this is not needed, if you use pandas to choose which column(s) you want to make into a numpy array)



In [188]:

    
#X_linreg_training = X_linreg_training.reshape( m_linreg_training,1)
#y_linreg_training = y_linreg_training.reshape( m_linreg_training,1)



In [12]:

    
# Instead, the training data X and test data values y are going to be represented by Theano symbolic variable above
#X_linreg = theano.shared(X_linreg_training.astype("float32"),"float32")
#y_linreg = theano.shared(y_linreg_training.astype("float32"),"float32")



In [110]:

    
#theta_0 = np.zeros( ( d+1,1)); print(theta_0)
theta_0 = np.zeros(  d+1); print(theta_0)









    



[ 0.  0.]



In [111]:

    
theta = theano.shared( theta_0.astype("float32"), "theta")



In [89]:

    
alpha = np.float32(0.01) # learning rate gamma or alpha



In [112]:

    
# Construct Theano "expression graph"

predicted_vals = sandbox.cuda.basic_ops.gpu_from_host( T.dot(X,theta) )  # h_{\theta}
m = np.float32( y_linreg_training.shape[0] ) 
J_theta = sandbox.cuda.basic_ops.gpu_from_host( 
    T.dot( (T.dot(X,theta) - y).T, T.dot(X,theta) - y)  * np.float32( 0.5 ) * np.float32( 1./ m )   
    ) # cost function



In [113]:

    
update_theta = sandbox.cuda.basic_ops.gpu_from_host( 
        theta - alpha * T.grad( J_theta, theta) )



In [114]:

    
gradientDescent = theano.function( 
                            inputs=[X,y],
                            outputs=[predicted_vals,J_theta],  
                            updates=[(theta, update_theta)], 
                            name = "gradientDescent")



In [115]:

    
print( gradientDescent.maker.fgraph.toposort() )









    



[GpuFromHost(x), Shape_i{0}(x), GpuFromHost(y), GpuAllocEmpty(TensorConstant{1}), GpuDimShuffle{1,0}(GpuFromHost.0), GpuAllocEmpty(Shape_i{0}.0), GpuGemv{inplace}(GpuAllocEmpty.0, TensorConstant{1.0}, GpuFromHost.0, theta, TensorConstant{0.0}), GpuElemwise{Sub}[(0, 1)](GpuGemv{inplace}.0, GpuFromHost.0), GpuDimShuffle{0}(GpuElemwise{Sub}[(0, 1)].0), GpuDimShuffle{x,0}(GpuElemwise{Sub}[(0, 1)].0), GpuGemv{inplace}(GpuAllocEmpty.0, TensorConstant{1.0}, GpuDimShuffle{x,0}.0, GpuDimShuffle{0}.0, TensorConstant{0.0}), GpuElemwise{Mul}[(0, 1)](CudaNdarrayConstant{[ 0.00515464]}, GpuElemwise{Sub}[(0, 1)].0), GpuDimShuffle{}(GpuGemv{inplace}.0), GpuGemv{inplace}(theta, TensorConstant{-0.019999999553}, GpuDimShuffle{1,0}.0, GpuElemwise{Mul}[(0, 1)].0, TensorConstant{1.0}), GpuElemwise{Mul}[(0, 1)](CudaNdarrayConstant{0.00515463901684}, GpuDimShuffle{}.0)]



In [116]:

    
num_iters = 1500
J_History = []

Preprocess X to include intercepts



In [95]:

    
input_X_linreg = np.hstack( ( np.ones((m_linreg_training,1)), X_linreg_training ) ).astype("float32")



In [96]:

    
y_linreg_training_processed = y_linreg_training.reshape( m_linreg_training,).astype("float32")



In [117]:

    
J_History = [0 for iter in range(num_iters)]
for iter in range(num_iters):
    predicted_vals_out, J_out = \
        gradientDescent(input_X_linreg.astype("float32"), y_linreg_training_processed.astype("float32") ) 
    J_History[iter] = J_out



In [45]:

    
Deg = (np.random.randn(40,10).astype("float32"), np.random.randint(size=40,low=0,high=2).astype("float32") )



In [46]:

    
Deg[0].shape









    Out[46]:





(40, 10)



In [47]:

    
Deg[1].shape









    Out[47]:





(40,)



In [118]:

    
theta.get_value()









    Out[118]:





array([-3.63029242,  1.1663624 ], dtype=float32)



In [101]:

    
dir( J_History[0] )









    Out[101]:





['__add__',
 '__array__',
 '__class__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__doc__',
 '__format__',
 '__getattribute__',
 '__getitem__',
 '__hash__',
 '__iadd__',
 '__idiv__',
 '__init__',
 '__len__',
 '__new__',
 '__radd__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_dev_data',
 '_set_shape_i',
 '_set_stride',
 '_strides',
 'base',
 'copy',
 'dtype',
 'exp',
 'gpudata',
 'is_c_contiguous',
 'mem_size',
 'ndim',
 'reduce_sum',
 'reshape',
 'shape',
 'size',
 'strides',
 'take',
 'view',
 'zeros']



In [121]:

    
J_History[-5].gpudata









    Out[121]:





47407335424



In [120]:

    
plt.plot( [ele.gpudata for ele in J_History])









    Out[120]:





[<matplotlib.lines.Line2D at 0x7ffa1c17a7d0>]

Denny Britz's way:

http://www.wildml.com/2015/09/speeding-up-your-neural-network-with-theano-and-the-gpu/

Speeding up your Neural Network with Theano and the GPU

and his jupyter notebook

https://github.com/dennybritz/nn-theano/blob/master/nn-theano-gpu.ipynb

nn-theano/nn-theano-gpu.ipynb



In [73]:

    
input_X_linreg.shape









    Out[73]:





(97, 2)



In [122]:

    
# GPU NOTE: Conversion to float32 to store them on the GPU!
X = theano.shared( input_X_linreg.astype('float32'), name='X' )
y = theano.shared( y_linreg_training.astype('float32'), name='y')



In [123]:

    
# GPU NOTE: Conversion to float32 to store them on the GPU!  
theta = theano.shared( np.vstack(theta_0).astype("float32"), name='theta')



In [136]:

    
# Construct Theano "expression graph"

predicted_vals = sandbox.cuda.basic_ops.gpu_from_host( 
    T.dot(X,theta) )  # h_{\theta}
m = np.float32( y_linreg_training.shape[0] )
# cost function J_theta, J_{\theta}
J_theta = sandbox.cuda.basic_ops.gpu_from_host( 
    (
        T.dot( (T.dot(X,theta) - y).T, T.dot(X,theta) - y) * np.float32(0.5) * np.float32( 1./m)  
    ).reshape([]) )   # cost function  # reshape is to force "broadcast" into 0-dim. scalar for cost function



In [137]:

    
update_theta = sandbox.cuda.basic_ops.gpu_from_host( 
        theta - alpha * T.grad( J_theta, theta) )



In [138]:

    
# Note that we removed the input values because we will always use the same shared variable
# GPU Note: Removed the input values to avoid copying data to the GPU.
gradientDescent = theano.function( 
                            inputs=[],
#                            outputs=[predicted_vals,J_theta],  
                            updates=[(theta, update_theta)], 
                            name = "gradientDescent")



In [139]:

    
print( gradientDescent.maker.fgraph.toposort() )









    



[Shape_i{1}(theta), GpuDimShuffle{1,0}(X), GpuGemm{no_inplace}(y, TensorConstant{1.0}, X, theta, TensorConstant{-1.0}), MakeVector{dtype='int64'}(Shape_i{1}.0, Shape_i{1}.0), GpuReshape{2}(CudaNdarrayConstant{1.0}, MakeVector{dtype='int64'}.0), GpuElemwise{mul,no_inplace}(CudaNdarrayConstant{[[ 0.00515464]]}, GpuReshape{2}.0), GpuDot22(GpuGemm{no_inplace}.0, GpuElemwise{mul,no_inplace}.0), GpuDimShuffle{1,0}(GpuElemwise{mul,no_inplace}.0), GpuDot22(GpuGemm{no_inplace}.0, GpuDimShuffle{1,0}.0), GpuGemm{inplace}(theta, TensorConstant{-0.00999999977648}, GpuDimShuffle{1,0}.0, GpuDot22.0, TensorConstant{1.0}), GpuGemm{inplace}(GpuGemm{inplace}.0, TensorConstant{-0.00999999977648}, GpuDimShuffle{1,0}.0, GpuDot22.0, TensorConstant{1.0})]



In [141]:

    
#J_History = [0 for iter in range(num_iters)]
for iter in range(num_iters):
    gradientDescent( )



In [78]:

    
print( np.vstack( theta_0).shape )
print( y_linreg_training.shape )









    



(2, 1)
(97, 1)



In [149]:

    
theta.get_value()









    Out[149]:





array([[-3.63076854],
       [ 1.16641033]], dtype=float32)



In [152]:

    
# Profiling
print( theano.config.profile )  # Do the vm/cvm linkers profile the execution time of Theano functions?
print( theano.config.profile_memory ) # Do the vm/cvm linkers profile the memory usage of Theano functions? It only works when profile=True.









    



False
False



In [154]:

    
theano.printing.debugprint(gradientDescent)









    



GpuGemm{inplace} [id A] ''   10
 |GpuGemm{inplace} [id B] ''   9
 | |theta [id C]
 | |TensorConstant{-0.00999999977648} [id D]
 | |GpuDimShuffle{1,0} [id E] ''   1
 | | |X [id F]
 | |GpuDot22 [id G] ''   8
 | | |GpuGemm{no_inplace} [id H] ''   2
 | | | |y [id I]
 | | | |TensorConstant{1.0} [id J]
 | | | |X [id F]
 | | | |theta [id C]
 | | | |TensorConstant{-1.0} [id K]
 | | |GpuDimShuffle{1,0} [id L] ''   7
 | |   |GpuElemwise{mul,no_inplace} [id M] ''   5
 | |     |CudaNdarrayConstant{[[ 0.00515464]]} [id N]
 | |     |GpuReshape{2} [id O] ''   4
 | |       |CudaNdarrayConstant{1.0} [id P]
 | |       |MakeVector{dtype='int64'} [id Q] ''   3
 | |         |Shape_i{1} [id R] ''   0
 | |         | |theta [id C]
 | |         |Shape_i{1} [id R] ''   0
 | |TensorConstant{1.0} [id J]
 |TensorConstant{-0.00999999977648} [id D]
 |GpuDimShuffle{1,0} [id E] ''   1
 |GpuDot22 [id S] ''   6
 | |GpuGemm{no_inplace} [id H] ''   2
 | |GpuElemwise{mul,no_inplace} [id M] ''   5
 |TensorConstant{1.0} [id J]



In [157]:

    
#print( gradientDescent.profile.print_summary() )
dir( gradientDescent.profile)









    Out[157]:





['__class__',
 '__delattr__',
 '__doc__',
 '__format__',
 '__getattribute__',
 '__hash__',
 '__init__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__']

Testing the Linear Regression with (Batch) Gradient Descent classes in `./ML/`



In [9]:

    
import sys
import os



In [10]:

    
#sys.path.append( os.getcwd() + '/ML')
sys.path.append( os.getcwd() + '/ML' )



In [10]:

    
from linreg_gradDes import LinearReg, LinearReg_loaded
#from ML import LinearReg, LinearReg_loaded

Boilerplate for sample input data



In [11]:

    
linregdata1 = pd.read_csv('./coursera_Ng/machine-learning-ex1/ex1/ex1data1.txt', header=None)
linregdata1.as_matrix([0]).shape
linregdata1.as_matrix([1]).shape









    Out[11]:





(97, 1)



In [12]:

    
features = linregdata1.as_matrix([0]).shape[1]
numberoftraining = linregdata1.as_matrix([0]).shape[0]
LinReg_housing = LinearReg( features, numberoftraining , 0.01)



In [13]:

    
Xin   = LinReg_housing.preprocess_X( linregdata1.as_matrix([0]))
ytest = linregdata1.as_matrix([1]).flatten()



In [14]:

    
%time LinReg_housing.build_model( Xin, ytest )









    



CPU times: user 171 ms, sys: 28.9 ms, total: 200 ms
Wall time: 198 ms






    Out[14]:





array([-3.63029242,  1.1663624 ], dtype=float32)



In [19]:

    
LinRegloaded_housing = LinearReg_loaded( linregdata1.as_matrix([0]), linregdata1.as_matrix([1]), 
                                        features, numberoftraining )



In [20]:

    
%time LinRegloaded_housing.build_model()









    



CPU times: user 136 ms, sys: 16.4 ms, total: 152 ms
Wall time: 132 ms






    Out[20]:





array([[-3.63028979],
       [ 1.16636217]], dtype=float32)



In [53]:

    
print( LinReg_housing.gradientDescent.maker.fgraph.toposort() )
print( LinRegloaded_housing.gradientDescent.maker.fgraph.toposort() )









    



[GpuFromHost(X), Shape_i{0}(X), GpuFromHost(y), GpuAllocEmpty(TensorConstant{1}), GpuDimShuffle{1,0}(GpuFromHost.0), GpuAllocEmpty(Shape_i{0}.0), GpuGemv{inplace}(GpuAllocEmpty.0, TensorConstant{1.0}, GpuFromHost.0, theta, TensorConstant{0.0}), GpuElemwise{Sub}[(0, 1)](GpuGemv{inplace}.0, GpuFromHost.0), GpuDimShuffle{0}(GpuElemwise{Sub}[(0, 1)].0), GpuDimShuffle{x,0}(GpuElemwise{Sub}[(0, 1)].0), GpuGemv{inplace}(GpuAllocEmpty.0, TensorConstant{1.0}, GpuDimShuffle{x,0}.0, GpuDimShuffle{0}.0, TensorConstant{0.0}), GpuElemwise{Mul}[(0, 1)](CudaNdarrayConstant{[ 0.00515464]}, GpuElemwise{Sub}[(0, 1)].0), GpuDimShuffle{}(GpuGemv{inplace}.0), GpuGemv{inplace}(theta, TensorConstant{-0.019999999553}, GpuDimShuffle{1,0}.0, GpuElemwise{Mul}[(0, 1)].0, TensorConstant{1.0}), GpuElemwise{Mul}[(0, 1)](CudaNdarrayConstant{0.00515463901684}, GpuDimShuffle{}.0)]
[Shape_i{1}(theta), GpuDimShuffle{1,0}(X), GpuGemm{no_inplace}(y, TensorConstant{1.0}, X, theta, TensorConstant{-1.0}), MakeVector{dtype='int64'}(Shape_i{1}.0, Shape_i{1}.0), GpuReshape{2}(CudaNdarrayConstant{1.0}, MakeVector{dtype='int64'}.0), GpuElemwise{mul,no_inplace}(CudaNdarrayConstant{[[ 0.00515464]]}, GpuReshape{2}.0), GpuDot22(GpuGemm{no_inplace}.0, GpuElemwise{mul,no_inplace}.0), GpuDimShuffle{1,0}(GpuElemwise{mul,no_inplace}.0), GpuDot22(GpuGemm{no_inplace}.0, GpuDimShuffle{1,0}.0), GpuGemm{inplace}(theta, TensorConstant{-0.00999999977648}, GpuDimShuffle{1,0}.0, GpuDot22.0, TensorConstant{1.0}), GpuGemm{inplace}(GpuGemm{inplace}.0, TensorConstant{-0.00999999977648}, GpuDimShuffle{1,0}.0, GpuDot22.0, TensorConstant{1.0})]

Other (sample) datasets

Consider feature normalization



In [40]:

    
def featureNormalize(X):
    """
    FEATURENORMALIZE Normalizes the features in X  
    FEATURENORMALIZE(X) returns a normalized version of X where  
    the mean value of each feature is 0 and the standard deviation  
    is 1.  This is often a good preprocessing step to do when 
    working with learning algorithms.
    
    """
    # You need to set these values correctly  
    X_norm = (X-X.mean(axis=0))/X.std(axis=0)
    mu = X.mean(axis=0)
    sigma = X.std(axis=0)
    
    return [X_norm, mu, sigma]



In [41]:

    
linregdata2 = pd.read_csv('./coursera_Ng/machine-learning-ex1/ex1/ex1data2.txt', header=None)



In [42]:

    
features = linregdata2.as_matrix().shape[1] - 1
numberoftraining = linregdata2.as_matrix().shape[0]
Xdat  = linregdata2.as_matrix( range(features) )
ytest = linregdata2.as_matrix( [features])



In [46]:

    
[Xnorm, mus,sigmas] = featureNormalize(Xdat)



In [48]:

    
LinReg_housing2 = LinearReg( features, numberoftraining, 0.01)
processed_X = LinReg_housing2.preprocess_X( Xnorm )



In [49]:

    
%time LinReg_housing2.build_model( processed_X, ytest.flatten(), 400)









    



CPU times: user 59.1 ms, sys: 8.88 ms, total: 67.9 ms
Wall time: 66.2 ms






    Out[49]:





array([ 334302.125    ,   99411.4609375,    3267.0065918], dtype=float32)



In [50]:

    
LinRegloaded_housing2 = LinearReg_loaded( Xnorm, ytest, 
                                        features, numberoftraining )



In [51]:

    
%time LinRegloaded_housing2.build_model(  400)









    



CPU times: user 48.9 ms, sys: 2.49 ms, total: 51.3 ms
Wall time: 45.4 ms






    Out[51]:





array([[ 334302.21875   ],
       [  99411.453125  ],
       [   3267.00976562]], dtype=float32)

Diabetes data from `sklearn`, sci-kit learn



In [ ]:



In [54]:

    
# Load the diabetes dataset
diabetes = sklearn.datasets.load_diabetes()



In [55]:

    
diabetes_X = diabetes.data
diabetes_Y = diabetes.target



In [61]:

    
#diabetes_X1 = diabetes_X[:,np.newaxis,2]
diabetes_X1 = diabetes_X[:,np.newaxis, 2].astype(theano.config.floatX)
#diabetes_Y  = diabetes_Y.reshape( diabetes_Y.shape[0], 1)
diabetes_Y = np.vstack( diabetes_Y.astype(theano.config.floatX) )



In [63]:

    
features1 = 1 
numberoftraining = diabetes_Y.shape[0]



In [67]:

    
LinReg_diabetes = LinearReg( features1, numberoftraining, 0.01)



In [70]:

    
processed_X = LinReg_diabetes.preprocess_X( diabetes_X1 )



In [71]:

    
%time LinReg_diabetes.build_model( processed_X, diabetes_Y.flatten(), 10000)









    



CPU times: user 753 ms, sys: 118 ms, total: 871 ms
Wall time: 867 ms






    Out[71]:





array([ 152.13273621,  192.24055481], dtype=float32)



In [74]:

    
LinRegloaded_diabetes = LinearReg_loaded( diabetes_X1, diabetes_Y, 
                                        features1, numberoftraining )



In [75]:

    
%time LinRegloaded_diabetes.build_model(  10000)









    



CPU times: user 707 ms, sys: 45.2 ms, total: 752 ms
Wall time: 718 ms






    Out[75]:





array([[ 152.13198853],
       [ 192.2406311 ]], dtype=float32)

Multiple number of features case:



In [76]:

    
features = diabetes_X.shape[1]



In [77]:

    
LinReg_diabetes = LinearReg( features, numberoftraining, 0.01)
processed_X = LinReg_diabetes.preprocess_X( diabetes_X )



In [78]:

    
%time LinReg_diabetes.build_model( processed_X, diabetes_Y.flatten(), 10000)









    



CPU times: user 855 ms, sys: 113 ms, total: 968 ms
Wall time: 964 ms






    Out[78]:





array([ 152.13273621,   40.02508163,   -5.81352949,  162.25823975,
        117.35097504,   38.3995285 ,   24.88706589, -100.40937042,
         99.55418396,  149.29826355,   92.1962738 ], dtype=float32)



In [79]:

    
LinRegloaded_diabetes = LinearReg_loaded( diabetes_X, diabetes_Y, 
                                        features, numberoftraining )



In [80]:

    
%time LinRegloaded_diabetes.build_model(  10000)









    



CPU times: user 702 ms, sys: 64.4 ms, total: 766 ms
Wall time: 728 ms






    Out[80]:





array([[ 152.13198853],
       [  40.02506256],
       [  -5.81354237],
       [ 162.25799561],
       [ 117.35108948],
       [  38.39954376],
       [  24.88703156],
       [-100.40942383],
       [  99.55430603],
       [ 149.29826355],
       [  92.1962738 ]], dtype=float32)

`ex2` Linear Regression, on d=2 features



In [5]:

    
data_ex1data2 = pd.read_csv('./coursera_Ng/machine-learning-ex1/ex1/ex1data2.txt', header=None)
X_ex1data2 = data_ex1data2.iloc[:,0:2]
y_ex1data2 = data_ex1data2.iloc[:,2]
m_ex1data2 = y_ex1data2.shape[0]
X_ex1data2=X_ex1data2.values.astype(np.float32)
y_ex1data2=y_ex1data2.values.reshape((m_ex1data2,1)).astype(np.float32)
print(type(X_ex1data2))
print(type(y_ex1data2))
print(X_ex1data2.shape)
print(y_ex1data2.shape)
print(m_ex1data2)
print(X_ex1data2[:5])
print(y_ex1data2[:5])









    



<type 'numpy.ndarray'>
<type 'numpy.ndarray'>
(47, 2)
(47, 1)
47
[[  2.10400000e+03   3.00000000e+00]
 [  1.60000000e+03   3.00000000e+00]
 [  2.40000000e+03   3.00000000e+00]
 [  1.41600000e+03   2.00000000e+00]
 [  3.00000000e+03   4.00000000e+00]]
[[ 399900.]
 [ 329900.]
 [ 369000.]
 [ 232000.]
 [ 539900.]]



In [46]:

    
((X_ex1data2[:,1] - X_ex1data2[:,1].mean())/( X_ex1data2[:,1].std()) ).std()









    Out[46]:





0.99999988



In [49]:

    
# feature Normalize
#X_ex1data2_norm = sklearn.preprocessing.Normalizer.transform(X_ex1data2 )
X_ex1data2_norm = (X_ex1data2  - np.mean(X_ex1data2, axis=0)) / np.std(X_ex1data2, axis=0)
print(X_ex1data2_norm[:,0].mean())
print(X_ex1data2_norm[:,0].std())
print(X_ex1data2_norm[:,1].mean())
print(X_ex1data2_norm[:,1].std())









    



-7.10183e-08
1.0
2.69489e-08
1.0



In [51]:

    
# X_ex1data2_norm[:5];



In [61]:

    
X=T.matrix(dtype=theano.config.floatX)
y=T.matrix(dtype=theano.config.floatX)

Theta=theano.shared(np.zeros((2,1)).astype(theano.config.floatX))
b = theano.shared(np.zeros(1).astype(theano.config.floatX))



In [16]:

    
print(b.get_value().shape)









    



(1,)



In [62]:

    
yhat = T.dot( X, Theta) + b



In [63]:

    
# L2 norm
J = np.cast[theano.config.floatX](0.5)*T.mean( T.sqr( yhat-y))



In [64]:

    
alpha=0.01  # learning rate
# sandbox.cuda.basic_ops.gpu_from_host
updateThetab = [ Theta-np.float32(alpha)*T.grad(J,Theta), b-np.float32(alpha)*T.grad(J,b)]
gradientDescent_step = theano.function(inputs=[X,y], 
                                          outputs=J,
                                          updates = zip([Theta,b],updateThetab) )



In [66]:

    
num_iters =400
JList=[]
for iter in range(num_iters):
    err = gradientDescent_step(X_ex1data2_norm,y_ex1data2)
    JList.append(err)



In [67]:

    
# Final mode:
print(Theta.get_value())
print(b.get_value())









    



[[ 99411.44601356]
 [  3267.01771421]]
[ 334302.0699632]



In [72]:

    
# JList[-10:]
plt.plot(JList)
plt.show()

Multi-class Classification

cf. ex3, Programming Exercise 3: Multi-class Classification and Neural Networks, Machine Learning

1 Multi-class Classification



In [83]:

    
os.getcwd()









    Out[83]:





'/home/topolo/PropD/MLgrabbag'



In [86]:

    
os.listdir( './coursera_Ng/machine-learning-ex3/' )









    Out[86]:





['ex3.pdf', 'ex3']



In [87]:

    
os.listdir( './coursera_Ng/machine-learning-ex3/ex3' )









    Out[87]:





['ex3data1.mat',
 'submit.m',
 'sigmoid.m',
 'lrCostFunction.m',
 'predictOneVsAll.m',
 'oneVsAll.m',
 'predict.m',
 'ex3.m',
 'lib',
 'displayData.m',
 'ex3_nn.m',
 'fmincg.m',
 'ex3weights.mat']



In [90]:

    
# Load saved matrices from file 
multiclscls_data = scipy.io.loadmat('./coursera_Ng/machine-learning-ex3/ex3/ex3data1.mat')

import the classes from ML



In [9]:

    
import sys
import os



In [9]:

    
os.getcwd()









    Out[9]:





'/home/topolo/PropD/MLgrabbag'



In [10]:

    
#sys.path.append( os.getcwd() + '/ML')
sys.path.append( os.getcwd() + '/ML' )



In [11]:

    
from gradDes import LogReg



In [12]:

    
# Test case for Cost function J_{\theta} with regularization

theta_t = np.vstack( np.array( [-2, -1, 1, 2]) )
X_t = np.array( [i/10. for i in range(1,16)]).reshape((3,5)).T
#X_t = np.hstack( ( np.ones((5,1)), X_t) ) # no need to preprocess the input data X with column of 1's
y_t = np.vstack( np.array( [1,0,1,0,1]))



In [13]:

    
MulClsCls_digits = LogReg( X_t, y_t, 3,5,0.01, 3.  )



In [14]:

    
MulClsCls_digits.calculate_cost()









    Out[14]:





CudaNdarray(0.125)



In [19]:

    
MulClsCls_digits.z.get_value()









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-19-f79030e398ab> in <module>()
----> 1 MulClsCls_digits.z.get_value()

AttributeError: 'CudaNdarrayVariable' object has no attribute 'get_value'



In [21]:

    
print( MulClsCls_digits.X.get_value() )
MulClsCls_digits.y.get_value()









    



[[ 1.          0.1         0.60000002  1.10000002]
 [ 1.          0.2         0.69999999  1.20000005]
 [ 1.          0.30000001  0.80000001  1.29999995]
 [ 1.          0.40000001  0.89999998  1.39999998]
 [ 1.          0.5         1.          1.5       ]]






    Out[21]:





array([[ 1.],
       [ 0.],
       [ 1.],
       [ 0.],
       [ 1.]], dtype=float32)



In [23]:

    
calc_z_test = theano.function([], MulClsCls_digits.z)



In [24]:

    
calc_z_test()









    Out[24]:





CudaNdarray([[ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]])



In [25]:

    
MulClsCls_digits.theta.set_value( theta_t.astype('float32') )



In [27]:

    
calc_z_test()









    Out[27]:





CudaNdarray([[ 0.70000017]
 [ 0.9000001 ]
 [ 1.0999999 ]
 [ 1.29999983]
 [ 1.5       ]])



In [35]:

    
MulClsCls_digits.calculate_cost()









    Out[35]:





CudaNdarray(1.93287348747)



In [43]:

    
print( 1/(1+np.exp( np.dot( -np.hstack( ( np.ones((5,1)), X_t) ), theta_t) ) )  )
h_test = 1/(1+np.exp( np.dot( -np.hstack( ( np.ones((5,1)), X_t) ), theta_t) ) ) 
print( np.dot( (h_test - y_t).T, h_test- y_t) * 0.5/5 ) # non-regularized J_theta cost term
np.dot( theta_t[1:].T, theta_t[1:]) * 3 / (2.* 5)









    



[[ 0.66818777]
 [ 0.7109495 ]
 [ 0.75026011]
 [ 0.78583498]
 [ 0.81757448]]
[[ 0.13287343]]






    Out[43]:





array([[ 1.8]])



In [33]:

    
MulClsCls_digits.predict()









    Out[33]:





array([[ 0.66818786],
       [ 0.71094954],
       [ 0.75026011],
       [ 0.78583503],
       [ 0.81757444]], dtype=float32)



In [ ]:

    
MulClsCls_digit



In [44]:

    
theano.config.floatX









    Out[44]:





'float32'



In [ ]:



In [ ]:

Neural Networks

Model representation

cf. 2 Neural Networks, 2.1 Model representation, ex3.pdf



In [45]:

    
os.getcwd()









    Out[45]:





'/home/topolo/PropD/MLgrabbag'



In [46]:

    
os.listdir( './coursera_Ng/machine-learning-ex3/' )









    Out[46]:





['ex3.pdf', 'ex3']



In [47]:

    
os.listdir( './coursera_Ng/machine-learning-ex3/ex3/' )









    Out[47]:





['ex3data1.mat',
 'submit.m',
 'sigmoid.m',
 'token.mat',
 'lrCostFunction.m',
 'predictOneVsAll.m',
 'oneVsAll.m',
 'predict.m',
 'ex3.m',
 'lib',
 'displayData.m',
 'ex3_nn.m',
 'fmincg.m',
 'ex3weights.mat']

$ \Theta_1, \Theta_2 $



In [48]:

    
# Load saved matrices from file 
nn3_data = scipy.io.loadmat('./coursera_Ng/machine-learning-ex3/ex3/ex3weights.mat')



In [53]:

    
print( nn3_data.keys() )
print( type( nn3_data['Theta1']) )
print( type( nn3_data['Theta2']) )
print( nn3_data['Theta1'].shape )
print( nn3_data['Theta2'].shape )









    



['Theta2', '__version__', '__header__', 'Theta1', '__globals__']
<type 'numpy.ndarray'>
<type 'numpy.ndarray'>
(25, 401)
(10, 26)



In [54]:

    
Theta1[0]









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-54-ff0b751d0b88> in <module>()
----> 1 Theta1[0]

NameError: name 'Theta1' is not defined

Feedforward



In [61]:

    
%load_ext tikzmagic









    



---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-61-445a5d6380b0> in <module>()
----> 1 get_ipython().magic(u'load_ext tikzmagic')

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s)
   2161         magic_name, _, magic_arg_s = arg_s.partition(' ')
   2162         magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2163         return self.run_line_magic(magic_name, magic_arg_s)
   2164 
   2165     #-------------------------------------------------------------------------

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
   2082                 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
   2083             with self.builtin_trap:
-> 2084                 result = fn(*args,**kwargs)
   2085             return result
   2086 

<decorator-gen-64> in load_ext(self, module_str)

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
    191     # but it's overkill for just that one bit of state.
    192     def magic_deco(arg):
--> 193         call = lambda f, *a, **k: f(*a, **k)
    194 
    195         if callable(arg):

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/magics/extension.pyc in load_ext(self, module_str)
     64         if not module_str:
     65             raise UsageError('Missing module name.')
---> 66         res = self.shell.extension_manager.load_extension(module_str)
     67 
     68         if res == 'already loaded':

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/extensions.pyc in load_extension(self, module_str)
     82             if module_str not in sys.modules:
     83                 with prepended_to_syspath(self.ipython_extension_dir):
---> 84                     __import__(module_str)
     85             mod = sys.modules[module_str]
     86             if self._call_load_ipython_extension(mod):

ImportError: No module named tikzmagic

$$ \begin{tikzpicture} \matrix (m) [matrix of math nodes, row sep=3em, column sep=4em, minimum width=2em] { \mathbb{R}^{s_l} & \mathbb{R}^{ s_l +1 } & \mathbb{R}^{s_{l+1} } & \mathbb{R}^{s_{l+1} } \\ a^{(l)} & (a_0^{(l)} = 1, a^{(l)} ) & z^{(l+1)} & g(z^{(l+1)}) = a^{(l+1)} \\ }; \path[->] (m-1-1) edge node [above] {$a_0^{(l)}=1$} (m-1-2) (m-1-2) edge node [above] {$\Theta^{(l)}$} (m-1-3) (m-1-3) edge node [above] {$g$} (m-1-4) ; \path[|->] (m-2-1) edge node [above] {$a_0^{(l)}=1$} (m-2-2) (m-2-2) edge node [above] {$\Theta^{(l)}$} (m-2-3) (m-2-3) edge node [above] {$g$} (m-2-4) ; \end{tikzpicture} $$



In [ ]:



In [66]:

    
np.random.seed(0)
s_l = 400 # (layer) size of layer l, i.e. number of nodes, units in layer l
s_lp1 = 25
al = theano.shared( np.random.randn(s_l+1,1).astype('float32'), name="al")
#alp1 = theano.shared( np.random.randn(s_lp1,1).astype('float32'), name="al")
#Thetal = theano.shared( np.random.randn( s_lp1,s_l+1).astype('float32') , name="Thetal")

# Feedforward, forward propagation
#z = T.dot( Thetal, al)
#g = T.nnet.sigmoid( z)









    



---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-66-ca1c7934296c> in <module>()
      2 s_l = 400 # (layer) size of layer l, i.e. number of nodes, units in layer l
      3 s_lp1 = 25
----> 4 al = theano.shared( np.random.randn(s_l+1,1).astype('float32'), name="al")
      5 #alp1 = theano.shared( np.random.randn(s_lp1,1).astype('float32'), name="al")
      6 #Thetal = theano.shared( np.random.randn( s_lp1,s_l+1).astype('float32') , name="Thetal")

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/sharedvalue.pyc in shared(value, name, strict, allow_downcast, **kwargs)
    245             try:
    246                 var = ctor(value, name=name, strict=strict,
--> 247                            allow_downcast=allow_downcast, **kwargs)
    248                 utils.add_tag_trace(var)
    249                 return var

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/sandbox/cuda/var.pyc in float32_shared_constructor(value, name, strict, allow_downcast, borrow, broadcastable, target)
    238         # type.broadcastable is guaranteed to be a tuple, which this next
    239         # function requires
--> 240         deviceval = type_support_filter(value, type.broadcastable, False, None)
    241 
    242     try:

RuntimeError: Cuda error 'unspecified launch failure' while copying %lli data element to device memory



In [8]:

    
s_l = 25
s_lp1 = 10



In [19]:

    
rng = np.random.RandomState(99)
Theta_values = np.asarray( rng.uniform( 
    low=-np.sqrt( 6. / (s_l+ s_lp1)), 
    high=np.sqrt( 6./(s_l + s_lp1)), size=(s_lp1,s_l+1)), dtype=theano.config.floatX )
print( Theta_values.shape )
print( Theta_values.dtype )
#Theta_values *= np.float32(4)
Theta_values *= 4.

print( Theta_values.dtype)
Theta_values.shape









    



(10, 26)
float32
float32






    Out[19]:





(10, 26)



In [14]:

    
np.float32( 4)









    Out[14]:





(10, 26)

From Deep Learning Tutorials of LISA lab of University of Montreal; `logistic_sgd.py`, `mlp.py`



In [8]:

    
%env









    Out[8]:





{'BASH_FUNC_module()': '() {  eval `/usr/bin/modulecmd bash $*`\n}',
 'BASH_FUNC_scl()': '() {  local CMD=$1;\n if [ "$CMD" = "load" -o "$CMD" = "unload" ]; then\n eval "module $@";\n else\n /usr/bin/scl "$@";\n fi\n}',
 'CLICOLOR': '1',
 'CVS_RSH': 'ssh',
 'DBUS_SESSION_BUS_ADDRESS': 'unix:abstract=/tmp/dbus-N7ppIfAX8e,guid=e833f00fdfe69074bad66b6a58a4c651',
 'DESKTOP_SESSION': 'gnome',
 'DISPLAY': ':1',
 'GDMSESSION': 'gnome',
 'GDM_LANG': 'en_US.UTF-8',
 'GIT_PAGER': 'cat',
 'GJS_DEBUG_OUTPUT': 'stderr',
 'GJS_DEBUG_TOPICS': 'JS ERROR;JS LOG',
 'GNOME_DESKTOP_SESSION_ID': 'this-is-deprecated',
 'HISTCONTROL': 'ignoredups',
 'HISTSIZE': '1000',
 'HOME': '/home/topolo',
 'HOSTNAME': 'localhost.localdomain',
 'JPY_PARENT_PID': '3902',
 'LANG': 'en_US.UTF-8',
 'LD_LIBRARY_PATH': '/usr/local/lib:/usr/local/lib::/usr/local/cuda/lib64:/usr/local/lib64:/usr/local/cuda/lib64:/usr/local/lib64',
 'LESSOPEN': '||/usr/bin/lesspipe.sh %s',
 'LOADEDMODULES': '',
 'LOGNAME': 'topolo',
 'LS_COLORS': 'rs=0:di=38;5;33:ln=38;5;51:mh=00:pi=40;38;5;11:so=38;5;13:do=38;5;5:bd=48;5;232;38;5;11:cd=48;5;232;38;5;3:or=48;5;232;38;5;9:mi=01;05;37;41:su=48;5;196;38;5;15:sg=48;5;11;38;5;16:ca=48;5;196;38;5;226:tw=48;5;10;38;5;16:ow=48;5;10;38;5;21:st=48;5;21;38;5;15:ex=38;5;40:*.tar=38;5;9:*.tgz=38;5;9:*.arc=38;5;9:*.arj=38;5;9:*.taz=38;5;9:*.lha=38;5;9:*.lz4=38;5;9:*.lzh=38;5;9:*.lzma=38;5;9:*.tlz=38;5;9:*.txz=38;5;9:*.tzo=38;5;9:*.t7z=38;5;9:*.zip=38;5;9:*.z=38;5;9:*.Z=38;5;9:*.dz=38;5;9:*.gz=38;5;9:*.lrz=38;5;9:*.lz=38;5;9:*.lzo=38;5;9:*.xz=38;5;9:*.bz2=38;5;9:*.bz=38;5;9:*.tbz=38;5;9:*.tbz2=38;5;9:*.tz=38;5;9:*.deb=38;5;9:*.rpm=38;5;9:*.jar=38;5;9:*.war=38;5;9:*.ear=38;5;9:*.sar=38;5;9:*.rar=38;5;9:*.alz=38;5;9:*.ace=38;5;9:*.zoo=38;5;9:*.cpio=38;5;9:*.7z=38;5;9:*.rz=38;5;9:*.cab=38;5;9:*.jpg=38;5;13:*.jpeg=38;5;13:*.gif=38;5;13:*.bmp=38;5;13:*.pbm=38;5;13:*.pgm=38;5;13:*.ppm=38;5;13:*.tga=38;5;13:*.xbm=38;5;13:*.xpm=38;5;13:*.tif=38;5;13:*.tiff=38;5;13:*.png=38;5;13:*.svg=38;5;13:*.svgz=38;5;13:*.mng=38;5;13:*.pcx=38;5;13:*.mov=38;5;13:*.mpg=38;5;13:*.mpeg=38;5;13:*.m2v=38;5;13:*.mkv=38;5;13:*.webm=38;5;13:*.ogm=38;5;13:*.mp4=38;5;13:*.m4v=38;5;13:*.mp4v=38;5;13:*.vob=38;5;13:*.qt=38;5;13:*.nuv=38;5;13:*.wmv=38;5;13:*.asf=38;5;13:*.rm=38;5;13:*.rmvb=38;5;13:*.flc=38;5;13:*.avi=38;5;13:*.fli=38;5;13:*.flv=38;5;13:*.gl=38;5;13:*.dl=38;5;13:*.xcf=38;5;13:*.xwd=38;5;13:*.yuv=38;5;13:*.cgm=38;5;13:*.emf=38;5;13:*.ogv=38;5;13:*.ogx=38;5;13:*.aac=38;5;45:*.au=38;5;45:*.flac=38;5;45:*.m4a=38;5;45:*.mid=38;5;45:*.midi=38;5;45:*.mka=38;5;45:*.mp3=38;5;45:*.mpc=38;5;45:*.ogg=38;5;45:*.ra=38;5;45:*.wav=38;5;45:*.oga=38;5;45:*.opus=38;5;45:*.spx=38;5;45:*.xspf=38;5;45:',
 'MAIL': '/var/spool/mail/topolo',
 'MODULEPATH': '/etc/scl/modulefiles:/etc/scl/modulefiles:/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles',
 'MODULESHOME': '/usr/share/Modules',
 'OLDPWD': '/',
 'PAGER': 'cat',
 'PATH': '/usr/local/cuda-7.5/bin:/home/topolo/Public/anaconda2/bin:/usr/local/cuda-7.5/bin:/home/topolo/Public/anaconda2/bin:/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/home/topolo/.local/bin:/home/topolo/bin',
 'PWD': '/home/topolo',
 'QT_IM_MODULE': 'ibus',
 'SESSION_MANAGER': 'local/unix:@/tmp/.ICE-unix/2593,unix/unix:/tmp/.ICE-unix/2593',
 'SHELL': '/bin/bash',
 'SHLVL': '2',
 'SSH_ASKPASS': '/usr/libexec/openssh/gnome-ssh-askpass',
 'SSH_AUTH_SOCK': '/run/user/1001/keyring/ssh',
 'TERM': 'xterm-color',
 'THEANO_FLAGS': 'mode=FAST_RUN,floatX=float32,device=gpu0,lib.cnmem=0.80',
 'USER': 'topolo',
 'USERNAME': 'topolo',
 'VTE_VERSION': '4205',
 'WINDOWID': '29360134',
 'WINDOWPATH': '2',
 'XAUTHORITY': '/run/user/1001/gdm/Xauthority',
 'XDG_CURRENT_DESKTOP': 'GNOME',
 'XDG_MENU_PREFIX': 'gnome-',
 'XDG_RUNTIME_DIR': '/run/user/1001',
 'XDG_SEAT': 'seat0',
 'XDG_SESSION_DESKTOP': 'gnome',
 'XDG_SESSION_ID': '1',
 'XDG_SESSION_TYPE': 'x11',
 'XDG_VTNR': '2',
 'XMODIFIERS': '@im=ibus',
 '_': '/home/topolo/Public/anaconda2/bin/jupyter'}



In [10]:

    
os.getcwd()









    Out[10]:





'/home/topolo/PropD/MLgrabbag'



In [11]:

    
print( sys.path )









    



['', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/thermopy-0.5.4-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/pytools-2016.2.6-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/appdirs-1.4.0-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python27.zip', '/home/topolo/Public/anaconda2/lib/python2.7', '/home/topolo/Public/anaconda2/lib/python2.7/plat-linux2', '/home/topolo/Public/anaconda2/lib/python2.7/lib-tk', '/home/topolo/Public/anaconda2/lib/python2.7/lib-old', '/home/topolo/Public/anaconda2/lib/python2.7/lib-dynload', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/PyDispatcher-2.0.5-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/Sphinx-1.3.5-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/characteristic-14.3.0-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/cssselect-0.9.1-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/pyasn1_modules-0.0.5-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/service_identity-14.0.0-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/setuptools-20.3-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/extensions', '/home/topolo/.ipython']



In [12]:

    
#sys.path.append( os.getcwd() + '/ML')
sys.path.append( '../DeepLearningTutorials/code/' )



In [14]:

    
#from logistic_sgd import LogisticRegression, load_data, sgd_optimization_mnist, predict
import logistic_sgd



In [10]:

    
MNIST_MTLdat = logistic_sgd.load_data("../DeepLearningTutorials/data/mnist.pkl.gz") # list of training data









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-1526aa8fd8a5> in <module>()
----> 1 MNIST_MTLdat = logistic_sgd.load_data("../DeepLearningTutorials/data/mnist.pkl.gz") # list of training data

NameError: name 'logistic_sgd' is not defined



In [36]:

    
print(len(MNIST_MTLdat))
print(type(MNIST_MTLdat))
for ele in MNIST_MTLdat: print type(ele), len(ele) # test_set_x, test_set_y, valid_set_x, valid_set_y, train_set_x,









    



3
<type 'list'>
<type 'tuple'> 2
<type 'tuple'> 2
<type 'tuple'> 2



In [37]:

    
print( MNIST_MTLdat[0][0].get_value().shape)
print( type(MNIST_MTLdat[0][1]))
print( MNIST_MTLdat[0][1].get_scalar_constant_value )









    



(50000, 784)
<class 'theano.tensor.var.TensorVariable'>
<bound method TensorVariable.get_scalar_constant_value of Elemwise{Cast{int32}}.0>



In [38]:

    
print( type( MNIST_MTLdat[1][1] ) )
MNIST_MTLdat[1][1].shape









    



<class 'theano.tensor.var.TensorVariable'>






    Out[38]:





Shape.0



In [39]:

    
dir(MNIST_MTLdat[0][1]) ;



In [25]:

    
import gzip
import six.moves.cPickle as pickle
with gzip.open("../DeepLearningTutorials/data/mnist.pkl.gz", 'rb') as f:
    try:
        train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
    except:
        train_set, valid_set, test_set = pickle.load(f)



In [26]:

    
print( type( train_set[0] ))
print( train_set[0].shape )
print( type( train_set[1]))
print( train_set[1].shape )
print( type( valid_set[0] ))
print( valid_set[0].shape )
print( type( valid_set[1]))
print( valid_set[1].shape )
print( type( test_set[0] ))
print( test_set[0].shape )
print( type( test_set[1]))
print( test_set[1].shape )









    



<type 'numpy.ndarray'>
(50000, 784)
<type 'numpy.ndarray'>
(50000,)
<type 'numpy.ndarray'>
(10000, 784)
<type 'numpy.ndarray'>
(10000,)
<type 'numpy.ndarray'>
(10000, 784)
<type 'numpy.ndarray'>
(10000,)



In [15]:

    
X = train_set[0].T



In [17]:

    
pd.DataFrame(X.T).describe()









    Out[17]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      774
      775
      776
      777
      778
      779
      780
      781
      782
      783
    
  
  
    
      count
      50000.0
      50000.0
      50000.0
      50000.0
      50000.0
      50000.0
      50000.0
      50000.0
      50000.0
      50000.0
      ...
      50000.000000
      50000.000000
      50000.000000
      50000.000000
      50000.000000
      50000.000000
      50000.0
      50000.0
      50000.0
      50000.0
    
    
      mean
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.000739
      0.000354
      0.000204
      0.000090
      0.000071
      0.000009
      0.0
      0.0
      0.0
      0.0
    
    
      std
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.022778
      0.015422
      0.012079
      0.007217
      0.007181
      0.001483
      0.0
      0.0
      0.0
      0.0
    
    
      min
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.0
      0.0
      0.0
      0.0
    
    
      25%
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.0
      0.0
      0.0
      0.0
    
    
      50%
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.0
      0.0
      0.0
      0.0
    
    
      75%
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.0
      0.0
      0.0
      0.0
    
    
      max
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.992188
      0.992188
      0.988281
      0.988281
      0.992188
      0.242188
      0.0
      0.0
      0.0
      0.0
    
  

8 rows × 784 columns



In [18]:

    
28*28









    Out[18]:





784



In [94]:

    
X_i = theano.shared( X.astype("float32"))



In [95]:

    
m = X_i.get_value().shape[1]



In [45]:

    
a1 = T.stack( [ theano.shared( np.ones((1,m)).astype("float32") ) , X_i ] , axis=1 )



In [41]:

    
print( type(a1) )
#print( a1.get_scalar_constant_value() )
dir(a1)
a1.get_parents()









    



<class 'theano.tensor.var.TensorVariable'>






    Out[41]:





[Join(TensorConstant{2}, DimShuffle{0,1,x}.0, DimShuffle{0,1,x}.0)]



In [46]:

    
a1.ndim









    Out[46]:





3



In [96]:

    
a1_0 = theano.shared( np.ones((1,m)).astype("float32"),name='a1_0')



In [97]:

    
a1 = T.stack( [a1_0,X_i], axis=0)



In [98]:

    
d = X_i.get_value().shape[0]
s_2 = d/2
rng1 = np.random.RandomState(1234)
Theta1_values = np.asarray( rng1.uniform( low=-np.sqrt(6./(d+s_2)),high=np.sqrt(6./(d+s_2)),size=(s_2,d+1)),
                           dtype=theano.config.floatX)
Theta1 = theano.shared(value=Theta1_values, name="Theta",borrow=True)



In [99]:

    
#rng1.uniform( low=-np.sqrt(6./(d+s_2)),high=np.sqrt(6./(d+s_2)),size=(s_2,d+1))
z1 = T.dot( Theta1, a1)
a2 = T.tanh(z1)



In [100]:

    
passthru1 = theano.function( [], a2)



In [101]:

    
print(d)
passthru1()









    



784






    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-101-eaf6b97a7ef3> in <module>()
      1 print(d)
----> 2 passthru1()

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    869                     node=self.fn.nodes[self.fn.position_of_error],
    870                     thunk=thunk,
--> 871                     storage_map=getattr(self.fn, 'storage_map', None))
    872             else:
    873                 # old-style linkers raise their own exceptions

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
    312         # extra long error message in that case.
    313         pass
--> 314     reraise(exc_type, exc_value, exc_trace)
    315 
    316 

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    857         t0_fn = time.time()
    858         try:
--> 859             outputs = self.fn()
    860         except Exception:
    861             if hasattr(self.fn, 'position_of_error'):

ValueError: GpuJoin: Wrong inputs for input 0 related to inputs 0.!
Apply node that caused the error: GpuJoin(TensorConstant{2}, GpuDimShuffle{0,1,x}.0, GpuDimShuffle{0,1,x}.0)
Toposort index: 9
Inputs types: [TensorType(int8, scalar), CudaNdarrayType(float32, (False, False, True)), CudaNdarrayType(float32, (False, False, True))]
Inputs shapes: [(), (1, 50000, 1), (784, 50000, 1)]
Inputs strides: [(), (0, 1, 0), (50000, 1, 0)]
Inputs values: [array(2, dtype=int8), 'not shown', 'not shown']
Outputs clients: [[GpuDimShuffle{1,0,2}(GpuJoin.0)]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.



In [108]:

    
print(X.shape)
X_i = theano.shared( X.astype("float32"))
#m = X_i.get_value().shape[1]
m = X.shape[1]
print(m)
a1_0 = theano.shared( np.ones((1,m)).astype("float32"),name='a1_0')
print(a1_0.get_value().shape)
a1 = T.stack( [a1_0,X_i], axis=0)
addintercept = theano.function([],a1)









    



(784, 50000)
50000
(1, 50000)



In [109]:

    
addintercept()









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-109-f3e06b86efb8> in <module>()
----> 1 addintercept()

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    869                     node=self.fn.nodes[self.fn.position_of_error],
    870                     thunk=thunk,
--> 871                     storage_map=getattr(self.fn, 'storage_map', None))
    872             else:
    873                 # old-style linkers raise their own exceptions

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
    312         # extra long error message in that case.
    313         pass
--> 314     reraise(exc_type, exc_value, exc_trace)
    315 
    316 

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    857         t0_fn = time.time()
    858         try:
--> 859             outputs = self.fn()
    860         except Exception:
    861             if hasattr(self.fn, 'position_of_error'):

ValueError: GpuJoin: Wrong inputs for input 1 related to inputs 0.!
Apply node that caused the error: GpuJoin(TensorConstant{0}, GpuDimShuffle{x,0,1}.0, GpuDimShuffle{x,0,1}.0)
Toposort index: 2
Inputs types: [TensorType(int8, scalar), CudaNdarrayType(float32, (True, False, False)), CudaNdarrayType(float32, (True, False, False))]
Inputs shapes: [(), (1, 1, 50000), (1, 784, 50000)]
Inputs strides: [(), (0, 0, 1), (0, 50000, 1)]
Inputs values: [array(0, dtype=int8), 'not shown', 'not shown']
Outputs clients: [[HostFromGpu(GpuJoin.0)]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.



In [131]:

    
d = X_i.get_value().shape[0]
print(d)
s_2 = d/2
print(s_2)
rng1 = np.random.RandomState(1234)
Theta1_values = np.asarray( rng1.uniform( low=-np.sqrt(6./(d+s_2)),high=np.sqrt(6./(d+s_2)),size=(s_2,d)),
                           dtype=theano.config.floatX)
Theta1 = theano.shared(value=Theta1_values, name="Theta1",borrow=True)
b_values = np.vstack( np.zeros(s_2) ).astype(theano.config.floatX)
b1 = theano.shared(value=b_values, name='b1',borrow=True)
a1_values=np.array( np.zeros( (d,m)), dtype=theano.config.floatX)
a1 = theano.shared(value=a1_values, name='a1', borrow=True)
lin_z2 = T.dot( Theta1, a1) + T.tile(b1,(1,m))
#lin_z2 = T.dot( Theta1, a1)



In [132]:

    
test_mult = theano.function([],lin_z2)

print( type(b_values))
b_values.dtype









    



<type 'numpy.ndarray'>






    Out[132]:





dtype('float32')



In [133]:

    
test_mult()









    Out[133]:





array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]], dtype=float32)



In [126]:

    
print( b1.get_value().shape )
T.tile( b1, (0,m))









    



(392, 1)






    Out[126]:





Reshape{2}.0

`NN.py`, load `NN.py` for `Layer` class for Neural Net for Multiple Layers



In [13]:

    
import sys
import os



In [14]:

    
#sys.path.append( os.getcwd() + '/ML')
sys.path.append( os.getcwd() + '/ML' )



In [11]:

    
from NN import Layer, cost_functional, cost_functional_noreg, gradientDescent_step

Boilerplate sample data, from Coursera's Machine Learning Introduction



In [12]:

    
# Load Training Data
print("Loading and Visualizing Data ... \n")
ex4data1 = scipy.io.loadmat('./coursera_Ng/machine-learning-ex4/ex4/ex4data1.mat')









    



Loading and Visualizing Data ...



In [14]:

    
ex4data1.keys()









    Out[14]:





['y', 'X', '__version__', '__header__', '__globals__']



In [16]:

    
print( ex4data1['X'].shape )
print( ex4data1['y'].shape )









    



(5000, 400)
(5000, 1)



In [13]:

    
test_rng = np.random.RandomState(1234)
#Theta1 = Layer( test_rng, 1, 400,25, 5000)



In [29]:

    
#help(Theta1.al.set_value); # Beginning with Theano 0.3.1, set_value will work in-place on the GPU, if ... source on CPU
Theta1.al.set_value( ex4data1['X'].T.astype(theano.config.floatX))



In [21]:

    
Theta1.alp1









    Out[21]:





Elemwise{tanh,no_inplace}.0



In [30]:

    
print( type( Theta1.alp1 ) )
Theta2 = Layer( test_rng, 2, 25,10,5000, al=Theta1.alp1 )









    



<class 'theano.tensor.var.TensorVariable'>



In [24]:

    
Theta2.alp1









    Out[24]:





Elemwise{tanh,no_inplace}.0



In [35]:

    
predicted = theano.function([],sandbox.cuda.basic_ops.gpu_from_host( Theta2.alp1 ) )



In [37]:

    
predicted().shape









    Out[37]:





(10, 5000)



In [39]:

    
print( ex4data1['y'].shape )
pd.DataFrame( ex4data1['y']).describe()









    



(5000, 1)






    Out[39]:






  
    
      
      0
    
  
  
    
      count
      5000.000000
    
    
      mean
      5.500000
    
    
      std
      2.872569
    
    
      min
      1.000000
    
    
      25%
      3.000000
    
    
      50%
      5.500000
    
    
      75%
      8.000000
    
    
      max
      10.000000



In [21]:

    
# recall that whereas the original labels (in the variable y) were 1, 2, ..., 10, for the purpose of training a 
# neural network, we need to recode the labels as vectors containing only values 0 or 1
K=10
m = ex4data1['y'].shape[0]
y_prob = [np.zeros(K) for row in ex4data1['y']]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_prob[i][ ex4data1['y'][i]-1] = 1
y_prob = np.array(y_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print(y_prob.shape)



In [47]:

    
print( type(y_prob) )
type( np.asarray( y_prob, dtype=theano.config.floatX) )









    



<type 'numpy.ndarray'>






    Out[47]:





numpy.ndarray



In [48]:

    
help( T.nlinalg.trace )









    



Help on function trace in module theano.tensor.nlinalg:

trace(X)
    Returns the sum of diagonal elements of matrix X.
    
    Notes
    -----
    Works on GPU since 0.6rc4.



In [49]:

    
y_sh_var = theano.shared( np.asarray( y_prob,dtype=theano.config.floatX),name='y')



In [54]:

    
h_test = Theta2.alp1
J = sandbox.cuda.basic_ops.gpu_from_host(
        (-T.nlinalg.trace( T.dot( T.log( h_test ), y_sh_var.T)) - T.nlinalg.trace( 
        T.dot( T.log( np.float32(1.)-h_test),(np.float32(1.)- y_sh_var.T ) )))/np.float32(m)
    )



In [55]:

    
print(type(J))
test_cost_func = theano.function([],J)









    



<class 'theano.sandbox.cuda.var.CudaNdarrayVariable'>



In [56]:

    
test_cost_func()









    Out[56]:





CudaNdarray(nan)



In [58]:

    
J_test_build = sandbox.cuda.basic_ops.gpu_from_host( -T.nlinalg.trace( T.dot( T.log(h_test),y_sh_var.T) ) )
test_cost_build_func = theano.function([], J_test_build)



In [59]:

    
test_cost_build_func()









    Out[59]:





CudaNdarray(nan)

Sanity check using ex4.m, Exercise 4 or Programming Exercise 4 from Coursera's Machine Learning Introduction by Ng



In [14]:

    
Theta_testvals = scipy.io.loadmat('./coursera_Ng/machine-learning-ex4/ex4/ex4weights.mat')



In [16]:

    
print( Theta_testvals.keys() )
print( Theta_testvals['Theta1'].shape )
print( Theta_testvals['Theta2'].shape )
Theta1_testval = Theta_testvals['Theta1'][:,1:]
b1_testval = Theta_testvals['Theta1'][:,0:1]
print( Theta1_testval.shape )
print( b1_testval.shape )
Theta2_testval = Theta_testvals['Theta2'][:,1:]
b2_testval = Theta_testvals['Theta2'][:,0:1]
print( Theta2_testval.shape )
print( b2_testval.shape )









    



['Theta2', '__version__', '__header__', 'Theta1', '__globals__']
(25, 401)
(10, 26)
(25, 400)
(25, 1)
(10, 25)
(10, 1)



In [15]:

    
Theta1 = Layer( test_rng, 1, 400,25, 5000, activation=T.nnet.sigmoid)



In [17]:

    
Theta1.Theta.set_value( Theta1_testval.astype("float32"))
Theta1.b.set_value( b1_testval.astype('float32') )
Theta1.al.set_value( ex4data1['X'].T.astype('float32'))

For $\Theta^{(2)}$, the key to connecting $\Theta^{(2)}$ with $\Theta^{(1)}$ is to set the argument in class Layer with al=Theta1.alp1,



In [18]:

    
Theta2 = Layer( test_rng, 2, 25,10,5000, al=Theta1.alp1 , activation=T.nnet.sigmoid)



In [19]:

    
Theta2.Theta.set_value( Theta2_testval.astype('float32'))
Theta2.b.set_value( b2_testval.astype('float32'))



In [107]:

    
h_test = Theta2.alp1
J = sandbox.cuda.basic_ops.gpu_from_host(
    T.mean( T.sum( 
        - y_sh_var * T.log( h_test ) - ( np.float32( 1) - y_sh_var) * T.log( np.float32(1) - h_test), axis =0), axis=0)
  )
#J = sandbox.cuda.basic_ops.gpu_from_host( 
#    T.log(h_test) * y_sh_var
#    )



In [108]:

    
test_cost_func = theano.function([],J)



In [109]:

    
test_cost_func()









    Out[109]:





CudaNdarray(0.287629187107)



In [80]:

    
print(type( y_sh_var) )
print( y_sh_var.get_value().shape )
print( type( h_test ))









    



<class 'theano.sandbox.cuda.var.CudaNdarraySharedVariable'>
(10, 5000)
<class 'theano.tensor.var.TensorVariable'>



In [94]:

    
checklayer2 = theano.function([], sandbox.cuda.basic_ops.gpu_from_host(Theta1.alp1))



In [102]:

    
checklayer2()









    Out[102]:





CudaNdarray([[ 0.49435964  0.49435964  0.49435964 ...,  0.49435964  0.49435964
   0.49435964]
 [ 0.47542453  0.47542453  0.47542453 ...,  0.47542453  0.47542453
   0.47542453]
 [ 0.52900642  0.52900642  0.52900642 ...,  0.52900642  0.52900642
   0.52900642]
 ..., 
 [ 0.45432255  0.45432255  0.45432255 ...,  0.45432255  0.45432255
   0.45432255]
 [ 0.33134761  0.33134761  0.33134761 ...,  0.33134761  0.33134761
   0.33134761]
 [ 0.41315612  0.41315612  0.41315612 ...,  0.41315612  0.41315612
   0.41315612]])



In [112]:

    
testreg = theano.function([], T.sum( Theta1.Theta * Theta1.Theta ) )



In [113]:

    
testreg()









    Out[113]:





array(392.58416748046875, dtype=float32)



In [114]:

    
range(1,3)









    Out[114]:





[1, 2]



In [115]:

    
Thetas_lst = [ Theta1.Theta, Theta2.Theta ]



In [118]:

    
T.sum( [ T.sum( theta*theta) for theta in Thetas_lst] )









    Out[118]:





Sum{acc_dtype=float64}.0



In [22]:

    
cost_func_test = cost_functional(3, 1, y_prob, Theta2.alp1, [Theta1.Theta, Theta2.Theta])



In [23]:

    
cost_test = theano.function([], cost_func_test)



In [24]:

    
cost_test() # (this value should be about 0.383770)









    Out[24]:





CudaNdarray(0.383769869804)



In [26]:

    
grad_test = T.grad( cost_func_test,[Theta1.Theta, Theta2.Theta])



In [27]:

    
grad_test_test = theano.function([], grad_test)



In [31]:

    
print( type(grad_test_test() ) )
print( len( grad_test_test() ))
print( type(grad_test_test()[0] ))
print( grad_test_test()[0].shape )
print( grad_test_test()[1].shape )









    



<type 'list'>
2
<type 'numpy.ndarray'>
(25, 400)
(10, 25)



In [41]:

    
print( range(6))
print( list( "Ernest") )
zip( range(6), list("Ernest"))
print( type(grad_test))









    



[0, 1, 2, 3, 4, 5]
['E', 'r', 'n', 'e', 's', 't']
<type 'list'>



In [40]:

    
print( grad_test_test.maker.fgraph.toposort() )









    



[Shape_i{1}(b1), Shape_i{0}(b1), GpuDimShuffle{1,0}(al), Shape_i{1}(y), Shape_i{1}(b2), Shape_i{0}(b2), GpuDimShuffle{1,0}(Theta2), GpuElemwise{mul,no_inplace}(CudaNdarrayConstant{[[  9.99999975e-05]]}, Theta2), GpuElemwise{mul,no_inplace}(CudaNdarrayConstant{[[  9.99999975e-05]]}, Theta1), GpuAlloc(b1, TensorConstant{1}, TensorConstant{5000}, Shape_i{0}.0, Shape_i{1}.0), InplaceDimShuffle{x,x}(Shape_i{1}.0), GpuAlloc(b2, TensorConstant{1}, TensorConstant{5000}, Shape_i{0}.0, Shape_i{1}.0), Elemwise{Mul}[(0, 1)](TensorConstant{5000}, Shape_i{1}.0), Elemwise{Mul}[(0, 1)](TensorConstant{5000}, Shape_i{1}.0), GpuDimShuffle{0,2,1,3}(GpuAlloc.0), Elemwise{Cast{float32}}(InplaceDimShuffle{x,x}.0), GpuDimShuffle{0,2,1,3}(GpuAlloc.0), MakeVector{dtype='int64'}(Shape_i{0}.0, Elemwise{Mul}[(0, 1)].0), MakeVector{dtype='int64'}(Shape_i{0}.0, Elemwise{Mul}[(0, 1)].0), GpuFromHost(Elemwise{Cast{float32}}.0), GpuReshape{2}(GpuDimShuffle{0,2,1,3}.0, MakeVector{dtype='int64'}.0), GpuReshape{2}(GpuDimShuffle{0,2,1,3}.0, MakeVector{dtype='int64'}.0), GpuElemwise{true_div,no_inplace}(CudaNdarrayConstant{[[-1.]]}, GpuFromHost.0), GpuGemm{inplace}(GpuReshape{2}.0, TensorConstant{1.0}, Theta1, al, TensorConstant{1.0}), GpuElemwise{ScalarSigmoid}[(0, 0)](GpuGemm{inplace}.0), GpuDimShuffle{1,0}(GpuElemwise{ScalarSigmoid}[(0, 0)].0), GpuGemm{inplace}(GpuReshape{2}.0, TensorConstant{1.0}, Theta2, GpuElemwise{ScalarSigmoid}[(0, 0)].0, TensorConstant{1.0}), GpuElemwise{Composite{(((i0 * i1 * (i2 - scalar_sigmoid(i3))) / i4) - (i5 * (i2 - i1) * scalar_sigmoid(i3)))}}[(0, 3)](CudaNdarrayConstant{[[-1.]]}, y, CudaNdarrayConstant{[[ 1.]]}, GpuGemm{inplace}.0, GpuFromHost.0, GpuElemwise{true_div,no_inplace}.0), GpuGemm{inplace}(GpuElemwise{mul,no_inplace}.0, TensorConstant{1.0}, GpuElemwise{Composite{(((i0 * i1 * (i2 - scalar_sigmoid(i3))) / i4) - (i5 * (i2 - i1) * scalar_sigmoid(i3)))}}[(0, 3)].0, GpuDimShuffle{1,0}.0, TensorConstant{2.0}), GpuDot22(GpuDimShuffle{1,0}.0, GpuElemwise{Composite{(((i0 * i1 * (i2 - scalar_sigmoid(i3))) / i4) - (i5 * (i2 - i1) * scalar_sigmoid(i3)))}}[(0, 3)].0), HostFromGpu(GpuGemm{inplace}.0), GpuElemwise{Composite{((i0 * i1) * (i2 - i1))}}[(0, 0)](GpuDot22.0, GpuElemwise{ScalarSigmoid}[(0, 0)].0, CudaNdarrayConstant{[[ 1.]]}), GpuGemm{inplace}(GpuElemwise{mul,no_inplace}.0, TensorConstant{1.0}, GpuElemwise{Composite{((i0 * i1) * (i2 - i1))}}[(0, 0)].0, GpuDimShuffle{1,0}.0, TensorConstant{2.0}), HostFromGpu(GpuGemm{inplace}.0)]



In [43]:

    
0.01 * grad_test









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-43-c3291a50120e> in <module>()
----> 1 0.01 * grad_test

TypeError: can't multiply sequence by non-int of type 'float'



In [44]:

    
test_update = [(Theta,sandbox.cuda.basic_ops.gpu_from_host( Theta - np.float32(0.01)*T.grad(cost_func_test, Theta)+0.0001*Theta ) ) for Theta in [Theta1.Theta, Theta2.Theta] ]



In [46]:

    
test_gradDes_step = theano.function( inputs=[], updates= test_update )



In [47]:

    
test_gradDes_step()









    Out[47]:





[]



In [52]:

    
print( Theta1.Theta.get_value() )
print( Theta2.Theta.get_value() )









    



[[ -1.05634514e-08   2.19436180e-09  -6.92295362e-06 ...,  -1.30543685e-05
   -5.04227410e-06   2.80491941e-09]
 [  7.66243780e-09  -9.75969350e-09   1.04755338e-06 ...,  -5.60192129e-05
    2.00961935e-07   3.54457574e-09]
 [ -8.77740458e-09   8.16117751e-09  -1.47759499e-06 ...,  -1.20964221e-04
   -2.33693959e-06  -7.50741602e-09]
 ..., 
 [ -8.89359164e-09  -9.82064385e-09  -7.78459707e-06 ...,   2.35335647e-05
   -3.25518340e-06   9.02587516e-09]
 [  3.05208303e-10   2.56086108e-09  -2.11960196e-06 ...,  -8.61849287e-04
    9.43547930e-05   3.83799614e-09]
 [  8.85963747e-09  -6.57579602e-10  -8.81727192e-06 ...,  -1.80388656e-06
   -8.14549094e-06   8.79540707e-09]]
[[-1.21257138 -0.10188229 -2.36874819 -1.0578922  -2.20846629  0.56389523
   1.21117842  2.21053886  0.4446061  -1.18257177  1.04299855 -1.60575604
   1.30433381  1.37189186  1.74843192 -0.23368138 -1.52030313  1.15336025
   0.10369149 -0.37211585 -0.61536551 -0.12569839 -2.27216721 -0.71843761
  -1.29703891]
 [ 0.61565566 -1.26563799  1.85764742 -0.91862833 -0.05503076 -0.38593763
   1.2953428  -1.56859624 -0.97036505 -2.18357611 -2.85063267 -2.07754731
   1.63180149  0.34905949  1.82808101 -2.44199824 -0.85639215 -0.29828632
  -2.07969451 -1.2934581   0.8999148   0.28309527  2.31204581 -2.46469688
   1.45671725]
 [-1.94558346  2.01381588 -3.12348628 -0.23620027  1.38695455  0.9099192
  -1.54790509 -0.79839182 -0.65606695  0.73545998 -2.58620143  0.47215798
   0.55355227  2.51281595 -2.41699743 -1.63915682  1.20285499 -1.20258307
  -1.83465064 -1.88032556 -0.34059626  0.23694935 -1.06149018  1.02769864
  -0.47695836]
 [ 0.46304011  0.58498383 -0.16503577  1.93284273 -0.2296816  -1.84750748
   0.49016848  1.07157159 -3.31940198  1.54129529  0.37375814 -0.86493742
  -2.583004    0.97072506 -0.51027173 -0.68435007 -1.64730716  0.21155307
  -0.27425268  1.72617733  1.32432389 -2.64011979 -0.08056725 -2.03531981
  -1.46138978]
 [-2.04503059  2.05719876  1.95121229  0.17639595 -2.16163683 -0.40398875
   1.8017633  -1.56294954 -0.2525554   0.23588987  0.71664256  1.07700384
  -0.3546088  -1.67760444 -0.12940609 -0.67495829  1.14078426  1.32445085
   3.21191907 -2.15911388 -2.60191083 -3.22298121 -1.89632535 -0.87497073
   2.51064777]
 [ 0.43445611 -0.93170726  0.18392649 -0.36082     0.61964542  0.38628966
  -2.65177917  2.29734659 -2.08839846 -1.86401701  1.06068861  0.77570206
   2.13490796 -1.14985681 -0.52086854  0.99753791 -1.48324752 -2.31418347
   0.29520378 -0.38708907 -2.20630646  0.3070533  -1.17658365 -1.63479984
  -0.82476246]
 [ 1.21576905 -1.50111604 -2.03216481 -1.52382553 -2.43757415 -2.37595034
  -1.40001822 -0.88744533 -0.63285488  1.50465775 -1.58092761  0.58605266
  -0.77548492  0.942671    2.10941553  0.54484761  0.43778127 -1.28037572
  -0.0436146   1.47765326 -1.13288772 -0.72854507  0.04735166  1.65762866
   1.68558455]
 [-0.72256583 -3.15261006  0.36581546  0.19813281 -0.73067629  1.65280986
  -2.30059648 -1.87487686  0.98105556 -1.58841705  1.35448146  2.17917943
  -1.99260521 -2.00392246 -0.38865316 -2.34017301 -2.91749477  0.99408847
  -2.70504951 -1.27153015  1.86110783 -1.20531952 -0.38018176  0.70879132
  -2.11035943]
 [ 0.53607166  1.30320907 -1.03383625 -4.03126812  0.58179194 -2.65745735
   0.80388159 -1.09253371  2.49935699  0.36204222  0.66201895 -0.92170537
  -0.83132339 -2.0022192  -2.94928217  0.64570653 -1.10126281  0.74517834
   0.5851267  -1.99566114  0.62597275  1.80614579 -0.22312002 -1.40457022
  -2.13213754]
 [-1.43959904 -1.2182219   0.71100444  0.45221624 -0.35957071  0.62291443
  -0.67012274 -0.7069872   0.06312034 -1.2321192  -1.74663413 -2.71989202
  -2.21460128 -1.69325113 -0.90936852  0.87861484  1.18677163 -1.87060738
   0.39800486  1.72131801 -1.36948287  0.85815626 -0.24782105  1.2802242
  -1.3276583 ]]



In [53]:

    
test_gradDes_step()









    Out[53]:





[]



In [54]:

    
print( Theta1.Theta.get_value() )
print( Theta2.Theta.get_value() )









    



[[ -1.05644871e-08   2.19457674e-09  -6.92367394e-06 ...,  -1.30557455e-05
   -5.04279706e-06   2.80519430e-09]
 [  7.66318919e-09  -9.76065007e-09   1.04766264e-06 ...,  -5.60250264e-05
    2.00982896e-07   3.54492324e-09]
 [ -8.77826434e-09   8.16197776e-09  -1.47774847e-06 ...,  -1.20976787e-04
   -2.33718265e-06  -7.50815143e-09]
 ..., 
 [ -8.89446294e-09  -9.82160664e-09  -7.78540652e-06 ...,   2.35360112e-05
   -3.25552196e-06   9.02675978e-09]
 [  3.05238224e-10   2.56111221e-09  -2.11982092e-06 ...,  -8.61938810e-04
    9.43645937e-05   3.83837229e-09]
 [  8.86050611e-09  -6.57644050e-10  -8.81818869e-06 ...,  -1.80411416e-06
   -8.14633404e-06   8.79626860e-09]]
[[-1.21269774 -0.10189326 -2.36899543 -1.05800319 -2.20869637  0.56395203
   1.21130395  2.21076775  0.44465062 -1.18269479  1.04310584 -1.60592437
   1.30446815  1.37203324  1.74861288 -0.2337063  -1.52046156  1.15347874
   0.10370216 -0.3721545  -0.61543089 -0.12571317 -2.27240396 -0.71851313
  -1.29717469]
 [ 0.61571926 -1.2657696   1.85784066 -0.91872346 -0.05503564 -0.38597718
   1.29547703 -1.56875956 -0.9704659  -2.18380332 -2.8509295  -2.07776403
   1.63197136  0.34909612  1.82827091 -2.44225264 -0.8564809  -0.29831624
  -2.07991028 -1.29359245  0.90000927  0.28312474  2.31228638 -2.46495295
   1.45686901]
 [-1.9457854   2.01402569 -3.12381077 -0.23622425  1.38709962  0.91001403
  -1.54806602 -0.79847473 -0.65613556  0.73553669 -2.58646989  0.47220758
   0.55360955  2.51307726 -2.41724944 -1.63932729  1.20297968 -1.20270777
  -1.83484173 -1.88052094 -0.34063154  0.23697387 -1.06160128  1.02780485
  -0.47700837]
 [ 0.46308801  0.58504438 -0.16505313  1.93304372 -0.22970556 -1.8477
   0.49021927  1.07168269 -3.31974745  1.54145551  0.37379676 -0.86502802
  -2.58327293  0.97082567 -0.51032478 -0.68442118 -1.64747822  0.21157469
  -0.27428094  1.7263571   1.32446122 -2.64039493 -0.08057581 -2.03553128
  -1.46154177]
 [-2.04524326  2.05741334  1.95141554  0.17641492 -2.16186166 -0.40403011
   1.80195129 -1.56311166 -0.25258079  0.23591475  0.7167182   1.07711673
  -0.35464483 -1.67777836 -0.12941965 -0.6750282   1.14090323  1.32458937
   3.21225333 -2.15933871 -2.60218096 -3.22331572 -1.89652169 -0.8750608
   2.51090932]
 [ 0.43450102 -0.93180406  0.18394522 -0.36085787  0.61970943  0.3863298
  -2.65205503  2.29758549 -2.08861589 -1.86421084  1.06079876  0.7757827
   2.13512993 -1.14997661 -0.52092284  0.99764156 -1.48340154 -2.31442451
   0.2952342  -0.3871294  -2.20653629  0.30708465 -1.17670619 -1.63497019
  -0.82484829]
 [ 1.21589518 -1.5012722  -2.03237605 -1.52398372 -2.43782783 -2.37619758
  -1.40016377 -0.88753754 -0.63292104  1.50481379 -1.58109224  0.58611315
  -0.77556568  0.94276875  2.10963464  0.54490387  0.43782642 -1.28050911
  -0.04361925  1.47780681 -1.13300586 -0.72862113  0.04735615  1.65780067
   1.68575966]
 [-0.72264111 -3.1529386   0.3658531   0.19815198 -0.73075318  1.65298057
  -2.30083585 -1.87507212  0.98115718 -1.58858275  1.35462141  2.17940545
  -1.99281275 -2.00413132 -0.38869336 -2.34041619 -2.917799    0.99419045
  -2.70533133 -1.27166259  1.86130106 -1.20544505 -0.38022164  0.70886451
  -2.11057878]
 [ 0.53612489  1.30334234 -1.03394532 -4.03168917  0.58184916 -2.65773559
   0.80396318 -1.09264815  2.49961329  0.3620764   0.66208446 -0.92180538
  -0.83141011 -2.00242877 -2.94958925  0.64577097 -1.10137868  0.74525356
   0.5851863  -1.99586976  0.62603438  1.80633044 -0.2231469  -1.40471911
  -2.13235974]
 [-1.43974864 -1.21834874  0.71107888  0.4522633  -0.35960764  0.62297934
  -0.67019254 -0.70706058  0.06312715 -1.23224759 -1.74681592 -2.72017503
  -2.21483088 -1.69342732 -0.90946311  0.87870657  1.18689513 -1.87080216
   0.39804676  1.7214973  -1.36962521  0.85824573 -0.24784632  1.28035724
  -1.32779622]]



In [23]:

    
gradDes_test_res = gradientDescent_step(cost_func_test, [Theta1.Theta, Theta2.Theta], 0.01, 0.00001 )



In [25]:

    
print( type(gradDes_test_res) )
gradDes_step_test = gradDes_test_res[1]









    



<type 'tuple'>



In [26]:

    
gradDes_step_test()









    Out[26]:





[]



In [27]:

    
print( Theta1.Theta.get_value() )
print( Theta2.Theta.get_value() )









    



[[ -1.05625011e-08   2.19416441e-09  -6.92233061e-06 ...,  -1.30531944e-05
   -5.04182026e-06   2.80466694e-09]
 [  7.66174768e-09  -9.75881509e-09   1.04745914e-06 ...,  -5.60141707e-05
    2.00943859e-07   3.54425689e-09]
 [ -8.77661499e-09   8.16044299e-09  -1.47746198e-06 ...,  -1.20953329e-04
   -2.33672927e-06  -7.50674101e-09]
 ..., 
 [ -8.89279139e-09  -9.81976012e-09  -7.78389676e-06 ...,   2.35314474e-05
   -3.25489032e-06   9.02506336e-09]
 [  3.05180825e-10   2.56063060e-09  -2.11941119e-06 ...,  -8.61771696e-04
    9.43463019e-05   3.83765109e-09]
 [  8.85883988e-09  -6.57520427e-10  -8.81647793e-06 ...,  -1.80372422e-06
   -8.14475789e-06   8.79461570e-09]]
[[-1.21246231 -0.10187311 -2.3685348  -1.05779707 -2.20826769  0.5638445
   1.21106946  2.21034002  0.44456607 -1.18246531  1.04290473 -1.60561156
   1.3042165   1.37176836  1.74827456 -0.23366036 -1.5201664   1.15325642
   0.10368215 -0.37208235 -0.61531013 -0.12568706 -2.27196264 -0.718373
  -1.29692221]
 [ 0.61560023 -1.26552403  1.85748029 -0.91854566 -0.05502581 -0.38590291
   1.29522622 -1.5684551  -0.97027773 -2.18337965 -2.85037613 -2.07736039
   1.63165462  0.34902808  1.8279165  -2.44177866 -0.85631508 -0.29825947
  -2.07950735 -1.29334176  0.8998338   0.28306979  2.31183767 -2.46447515
   1.45658612]
 [-1.94540834  2.01363468 -3.12320518 -0.23617902  1.38682961  0.90983731
  -1.54776573 -0.79831994 -0.65600789  0.73539382 -2.58596873  0.47211552
   0.55350244  2.51258993 -2.41677999 -1.63900936  1.20274675 -1.20247483
  -1.83448553 -1.8801564  -0.34056559  0.23692803 -1.06139469  1.02760613
  -0.47691542]
 [ 0.46299845  0.58493114 -0.16502091  1.93266881 -0.22966093 -1.84734118
   0.49012437  1.07147515 -3.31910324  1.54115653  0.37372452 -0.86485958
  -2.58277154  0.97063768 -0.51022583 -0.6842885  -1.64715886  0.21153402
  -0.27422801  1.72602201  1.32420468 -2.63988233 -0.08056    -2.03513646
  -1.46125829]
 [-2.04484653  2.05701351  1.95103669  0.17638008 -2.16144228 -0.40395239
   1.80160105 -1.56280887 -0.25253269  0.23586865  0.71657807  1.0769068
  -0.35457689 -1.67745352 -0.12939446 -0.67489755  1.14068162  1.32433164
   3.21163011 -2.15891957 -2.6016767  -3.22269106 -1.89615464 -0.874892
   2.51042175]
 [ 0.43441701 -0.93162346  0.18390995 -0.36078751  0.61958963  0.38625491
  -2.65154052  2.29713964 -2.08821058 -1.86384928  1.06059313  0.77563226
   2.1347158  -1.14975333 -0.52082163  0.99744815 -1.48311412 -2.3139751
   0.29517719 -0.38705423 -2.20610809  0.30702567 -1.17647779 -1.63465273
  -0.82468826]
 [ 1.21565962 -1.50098097 -2.03198195 -1.52368844 -2.4373548  -2.37573647
  -1.39989233 -0.88736546 -0.6327979   1.50452232 -1.58078539  0.58599991
  -0.77541512  0.94258612  2.10922575  0.54479861  0.43774188 -1.28026056
  -0.04361067  1.47752023 -1.1327858  -0.7284795   0.0473474   1.65747941
   1.68543291]
 [-0.7225008  -3.15232635  0.36578253  0.19811498 -0.73061055  1.65266109
  -2.30038953 -1.87470806  0.98096728 -1.588274    1.35435963  2.17898345
  -1.9924258  -2.00374198 -0.3886182  -2.33996224 -2.91723228  0.993999
  -2.70480609 -1.27141571  1.86094034 -1.20521104 -0.38014755  0.70872754
  -2.11016965]
 [ 0.53602344  1.30309176 -1.03374326 -4.03090572  0.5817396  -2.65721822
   0.80380923 -1.09243536  2.49913216  0.36200964  0.66195935 -0.92162246
  -0.83124858 -2.00203896 -2.94901681  0.64564842 -1.10116363  0.74511129
   0.58507401 -1.99548161  0.62591642  1.8059833  -0.22309995 -1.40444386
  -2.13194585]
 [-1.43946946 -1.21811223  0.71094042  0.45217556 -0.35953835  0.62285841
  -0.67006248 -0.7069236   0.06311466 -1.23200822 -1.74647701 -2.71964717
  -2.21440196 -1.69309878 -0.90928668  0.87853581  1.18666482 -1.87043905
   0.39796904  1.72116315 -1.36935961  0.85807902 -0.24779876  1.28010905
  -1.32753885]]



In [28]:

    
gradDes_step_test()









    Out[28]:





[]



In [29]:

    
print( Theta1.Theta.get_value() )
print( Theta2.Theta.get_value() )









    



[[ -1.05625855e-08   2.19418195e-09  -6.92242747e-06 ...,  -1.30533963e-05
   -5.04188938e-06   2.80468937e-09]
 [  7.66180897e-09  -9.75889325e-09   1.04747403e-06 ...,  -5.60149419e-05
    2.00946744e-07   3.54428531e-09]
 [ -8.77668516e-09   8.16050871e-09  -1.47748244e-06 ...,  -1.20955003e-04
   -2.33676201e-06  -7.50680140e-09]
 ..., 
 [ -8.89286245e-09  -9.81983916e-09  -7.78400590e-06 ...,   2.35317766e-05
   -3.25493579e-06   9.02513619e-09]
 [  3.05183268e-10   2.56065102e-09  -2.11943939e-06 ...,  -8.61783628e-04
    9.43476116e-05   3.83768217e-09]
 [  8.85891094e-09  -6.57525701e-10  -8.81660071e-06 ...,  -1.80378925e-06
   -8.14486793e-06   8.79468587e-09]]
[[-1.21247959 -0.10187493 -2.36856866 -1.05781281 -2.20829916  0.56385064
   1.21108603  2.21037006  0.44457057 -1.18248188  1.04291832 -1.6056354
   1.30423355  1.37178624  1.74829817 -0.23366424 -1.52018797  1.15327108
   0.10368349 -0.37208751 -0.61532009 -0.12569052 -2.27199483 -0.71838391
  -1.29694128]
 [ 0.61560839 -1.26554167  1.85750628 -0.91855812 -0.05502573 -0.38590774
   1.29524386 -1.56847727 -0.97029126 -2.18341041 -2.85041642 -2.07738996
   1.63167763  0.34903327  1.82794189 -2.44181347 -0.85632676 -0.29826254
  -2.07953596 -1.29335976  0.89984727  0.28307381  2.3118701  -2.46450949
   1.45660675]
 [-1.94543517  2.01366329 -3.12324858 -0.23618175  1.38684976  0.9098503
  -1.54778731 -0.7983309  -0.65601742  0.73540437 -2.5860045   0.47212264
   0.55350989  2.51262522 -2.41681457 -1.63903248  1.20276332 -1.20249116
  -1.83451152 -1.8801825  -0.34057021  0.23693123 -1.06141019  1.02761996
  -0.47692251]
 [ 0.46300465  0.58493906 -0.16502343  1.93269575 -0.22966421 -1.84736741
   0.49013108  1.07148981 -3.31914997  1.54117799  0.37372953 -0.86487234
  -2.58280778  0.97065091 -0.51023299 -0.68429804 -1.64718163  0.2115366
  -0.27423158  1.72604644  1.3242228  -2.63991976 -0.0805613  -2.03516483
  -1.4612788 ]
 [-2.04487514  2.05704284  1.95106435  0.17638318 -2.16147256 -0.4039574
   1.8016268  -1.56283033 -0.25253534  0.23587231  0.71658915  1.07692266
  -0.354581   -1.67747641 -0.12939636 -0.67490661  1.14069796  1.32435095
   3.21167541 -2.15895009 -2.60171247 -3.2227354  -1.89618027 -0.87490332
   2.51045728]
 [ 0.43442282 -0.93163645  0.18391213 -0.36079288  0.61959785  0.38626033
  -2.65157771  2.29717159 -2.08824015 -1.86387539  1.06060791  0.77564305
   2.1347456  -1.14976966 -0.52082902  0.99746203 -1.48313475 -2.31400776
   0.29518107 -0.38705969 -2.20613933  0.30702943 -1.17649448 -1.63467586
  -0.82469988]
 [ 1.21567631 -1.50100219 -2.03201032 -1.52370954 -2.4373889  -2.37576985
  -1.39991188 -0.8873778  -0.63280708  1.50454307 -1.58080781  0.58600765
  -0.77542609  0.94259894  2.10925508  0.54480588  0.43774763 -1.2802788
  -0.0436114   1.47754073 -1.13280201 -0.72849     0.04734763  1.65750217
   1.6854564 ]
 [-0.72251105 -3.15237093  0.36578727  0.19811635 -0.73062164  1.65268302
  -2.30042195 -1.87473452  0.98098069 -1.58829641  1.35437787  2.17901349
  -1.99245393 -2.00377035 -0.38862342 -2.33999467 -2.917274    0.99401158
  -2.70484447 -1.27143371  1.86096621 -1.20522809 -0.38015315  0.70873696
  -2.11019921]
 [ 0.53602844  1.30310774 -1.03375936 -4.0309639   0.58174449 -2.65725732
   0.80381852 -1.09245145  2.49916363  0.36201128  0.66196531 -0.92163956
  -0.8312605  -2.00206828 -2.94905853  0.6456548  -1.10118032  0.74511951
   0.58508086 -1.9955107   0.62592179  1.8060056  -0.22310673 -1.40446639
  -2.13197637]
 [-1.43948936 -1.2181294   0.71095073  0.45218194 -0.35954288  0.62286729
  -0.67007202 -0.70693338  0.06311581 -1.23202562 -1.74650168 -2.71968532
  -2.21443224 -1.69312251 -0.90929943  0.8785485   1.18668139 -1.87046552
   0.39797512  1.72118759 -1.36937869  0.85809124 -0.24780172  1.28012693
  -1.32755733]]



In [30]:

    
y_prob.shape









    Out[30]:





(10, 5000)



In [31]:

    
ex4data1['y'].shape









    Out[31]:





(5000, 1)



In [51]:

    
pd.DataFrame( ex4data1['y']).describe()









    Out[51]:






  
    
      
      0
    
  
  
    
      count
      5000.000000
    
    
      mean
      5.500000
    
    
      std
      2.872569
    
    
      min
      1.000000
    
    
      25%
      3.000000
    
    
      50%
      5.500000
    
    
      75%
      8.000000
    
    
      max
      10.000000



In [39]:

    
print( Theta2.alp1.shape )
print( Theta2.alp1.shape.ndim )
# Theta2.alp1.shape.get_scalar_constant_value()
predicted_logreg = theano.function([],Theta2.alp1)









    



Shape.0
1



In [42]:

    
pd.DataFrame( predicted_logreg().T ).describe()









    Out[42]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
    
  
  
    
      count
      5000.000000
      5.000000e+03
      5.000000e+03
      5.000000e+03
      5000.000000
      5.000000e+03
      5000.000000
      5000.000000
      5000.000000
      5000.000000
    
    
      mean
      0.100628
      1.003652e-01
      1.003967e-01
      1.004417e-01
      0.100253
      1.005637e-01
      0.100360
      0.100543
      0.100846
      0.100248
    
    
      std
      0.280952
      2.710659e-01
      2.669757e-01
      2.724109e-01
      0.267206
      2.798822e-01
      0.275913
      0.264708
      0.264970
      0.284639
    
    
      min
      0.000010
      4.299332e-07
      9.454787e-07
      2.587024e-07
      0.000002
      7.190226e-07
      0.000002
      0.000003
      0.000008
      0.000001
    
    
      25%
      0.000301
      8.055457e-04
      7.226729e-04
      1.883787e-04
      0.000919
      2.311849e-04
      0.000240
      0.001162
      0.000871
      0.000253
    
    
      50%
      0.001198
      4.066701e-03
      4.143638e-03
      1.152211e-03
      0.003805
      1.742870e-03
      0.002029
      0.004935
      0.004056
      0.001377
    
    
      75%
      0.006197
      1.748446e-02
      1.921718e-02
      1.193071e-02
      0.017761
      9.229897e-03
      0.011941
      0.020477
      0.018032
      0.006297
    
    
      max
      0.993053
      9.996013e-01
      9.982013e-01
      9.986625e-01
      0.999188
      9.985297e-01
      0.999378
      0.998737
      0.996482
      0.998724



In [46]:

    
pd.DataFrame(predicted_logreg().T).describe().iloc[1:-1,:].plot()









    Out[46]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fc5ee56a110>



In [49]:

    
print( np.argmax( predicted_logreg(), axis=0).shape )
np.vstack( np.argmax( predicted_logreg(),axis=0) ).shape









    



(5000,)






    Out[49]:





(5000, 1)



In [52]:

    
pd.DataFrame( np.vstack( np.argmax(predicted_logreg(),axis=0)) + 1).describe()









    Out[52]:






  
    
      
      0
    
  
  
    
      count
      5000.000000
    
    
      mean
      5.504000
    
    
      std
      2.874933
    
    
      min
      1.000000
    
    
      25%
      3.000000
    
    
      50%
      6.000000
    
    
      75%
      8.000000
    
    
      max
      10.000000



In [55]:

    
res = np.float32( ( np.vstack( np.argmax( predicted_logreg(),axis=0)) + 1 ) == ex4data1['y'] )
pd.DataFrame(res).describe()









    Out[55]:






  
    
      
      0
    
  
  
    
      count
      5000.000000
    
    
      mean
      0.975200
    
    
      std
      0.155534
    
    
      min
      0.000000
    
    
      25%
      1.000000
    
    
      50%
      1.000000
    
    
      75%
      1.000000
    
    
      max
      1.000000



In [56]:

    
range(1,3)









    Out[56]:





[1, 2]



In [57]:

    
predicted_logreg().shape









    Out[57]:





(10, 5000)



In [64]:

    
print(y_prob.shape); print( np.argmax( y_prob,axis=0 ).shape)









    



(10, 5000)
(5000,)

Summary for Neural Net with Multiple Layers for logistic regression (but can be extended to linear regression)

Load boilerplate training data:



In [9]:

    
sys.path.append( os.getcwd() + '/ML' )



In [10]:

    
from NN import Layer, cost_functional, cost_functional_noreg, gradientDescent_step, MLP



In [11]:

    
# Load Training Data
print("Loading and Visualizing Data ... \n")
ex4data1 = scipy.io.loadmat('./coursera_Ng/machine-learning-ex4/ex4/ex4data1.mat')

# recall that whereas the original labels (in the variable y) were 1, 2, ..., 10, for the purpose of training a 
# neural network, we need to recode the labels as vectors containing only values 0 or 1
K=10
m = ex4data1['y'].shape[0]
y_prob = [np.zeros(K) for row in ex4data1['y']]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_prob[i][ ex4data1['y'][i]-1] = 1
y_prob = np.array(y_prob).T.astype(theano.config.floatX)  # size dims. (K,m)

print(ex4data1['X'].T.shape)
print(y_prob.shape)









    



Loading and Visualizing Data ... 

(400, 5000)
(10, 5000)



In [12]:

    
digitsMLP = MLP(3,[400,25,10], 5000, ex4data1['X'].T, y_prob, T.nnet.sigmoid, 1., 0.1, 0.0000)



In [17]:

    
digitsMLP.train_model(100000)



In [18]:

    
digitsMLP.accuracy_log_reg()









    



Accuracy score : 0.991200 






    Out[18]:





0.99119999999999997



In [19]:

    
print( digitsMLP.Thetas[0].Theta.get_value() )
digitsMLP.Thetas[1].Theta.get_value()









    



[[-0.03968292  0.01570808 -0.00801071 ...,  0.05324144 -0.03536524
  -0.02519361]
 [ 0.02765682  0.02882705 -0.06191737 ..., -0.01663327  0.03339296
  -0.02493248]
 [ 0.01494016 -0.03611031  0.04792143 ...,  0.0510212  -0.00466521
  -0.00434607]
 ..., 
 [ 0.03901267 -0.03169481  0.06044593 ..., -0.0364962  -0.03772442
   0.01351396]
 [ 0.03186403  0.05424397  0.02720479 ...,  0.01087107 -0.01080637
   0.01786175]
 [ 0.06252811 -0.04008792  0.06331227 ..., -0.04469054  0.02635518
   0.04384876]]






    Out[19]:





array([[ 1.20451474,  0.83254075,  0.3577919 ,  2.09141254,  0.30367732,
         1.70919907, -0.68671805,  1.94035482,  0.6540705 , -2.20026374,
         1.0235399 , -1.10299885,  0.87818861,  2.8505857 , -2.22271323,
        -2.00406885, -2.60233808, -0.85654968, -2.76958466, -1.48524833,
        -3.17246437, -0.06278938,  0.616503  ,  3.52648425,  0.98629874],
       [ 2.60660052, -0.10535918,  1.22893441,  2.44127226, -3.89616203,
        -3.1201973 , -2.51445174,  1.34601641,  3.96324277, -2.53724122,
        -3.20322871,  0.95207882, -2.91599441, -2.66748142,  1.32648623,
         0.81661856,  1.32140303, -0.87079829, -0.53782797, -1.90582633,
         3.33778262, -0.50736022, -0.66713619, -1.36594701,  2.1127429 ],
       [-1.43893147,  1.59595168,  0.58790714,  3.8221848 ,  4.65350819,
        -2.22888541, -1.92654026, -1.48021591, -3.24884081, -2.0549016 ,
         2.35445762,  3.15103745, -1.86801529,  2.50599551, -0.51754326,
        -0.61664522, -1.13638568, -3.49592352, -0.14462674,  0.52218926,
         0.94804603,  0.48102653, -2.2381053 , -2.80585504, -2.67067432],
       [-0.97880727, -3.42777109, -3.0412941 ,  0.07771082,  0.56903011,
        -0.83924389,  1.42225981,  2.75040054, -1.39653993,  1.39573848,
         1.54833257, -2.28482318, -1.19908345, -1.23934305,  0.1012596 ,
         0.41159233, -4.30142689, -1.29635179,  2.10533142, -0.36743957,
         1.919065  , -5.38604879, -0.68395364,  1.60128868, -0.92604429],
       [-2.21723104,  0.94654542,  2.71510959, -3.15821409,  1.2421279 ,
         1.54322195,  2.38756323, -0.35632336,  1.59020221, -2.20207787,
        -2.31908298, -0.23842566,  2.842659  , -1.16831934, -1.10033369,
         2.3584559 , -2.86785245,  2.43999338,  2.25221205, -2.86637926,
        -3.29994965,  0.2062892 , -2.27796054, -1.90024221,  5.06210709],
       [-0.41967511,  2.81876731, -3.19176626, -2.13696814, -1.76408064,
         1.12407899,  3.01087523,  2.11157274, -1.18705463,  1.32382548,
        -0.06723422,  0.01781415,  2.63719058, -2.33022046, -1.4483918 ,
        -2.91259265,  1.15207839, -1.17236662, -2.97872806, -0.56889361,
        -1.30872059, -1.56629968,  1.08266664, -1.98741114, -0.89212441],
       [-0.6513139 , -0.67716223, -3.03075981,  2.60289288, -2.83682179,
        -1.9186002 , -2.81462193, -1.34299827, -2.06861496, -2.43334413,
         2.26945639, -0.2679739 , -0.33434728, -1.78550529, -0.60505742,
         1.65669274,  0.72454143,  2.54930949,  1.80095565, -0.6691587 ,
        -2.79404688,  0.49601215,  3.74682832,  1.82545471,  0.06220638],
       [-0.31317323, -2.17267513,  3.26154256, -0.96487314, -3.19127011,
         4.00027752,  1.55789506, -3.2021575 , -3.13566351,  2.24683571,
        -1.71402073, -1.77391756, -3.39949989,  1.01905394, -0.05652857,
         1.37401104, -0.78254008, -2.96524358, -2.75739622, -1.48058116,
        -1.42676282, -1.46033561,  2.61433244, -2.85393929, -3.30281448],
       [ 2.51441741, -2.35930371, -2.45756555, -2.2026875 ,  3.0209465 ,
         1.47433209,  1.19607747, -2.47334051, -0.01969391,  3.09615278,
        -2.00905395,  0.31061357, -2.11481953, -1.13638878, -2.71792459,
        -1.84998   ,  2.16689348, -3.52886462,  0.64545244, -2.02436543,
        -0.21113941,  4.98415565, -3.85935426,  1.63133836, -1.07641482],
       [-1.74175525, -2.37122393,  1.55441034,  0.23550114, -1.96717358,
        -1.83210754, -3.25745535, -1.22520483, -0.62219352,  2.43008757,
        -0.22606266, -0.01252492,  1.41700852,  0.24551152, -0.73388177,
        -3.98788357,  1.05824101,  3.25433969,  0.86931092, -0.00776808,
         0.05484827, -0.38054886,  0.17874011, -1.70390856, -0.95664531]], dtype=float32)



In [20]:

    
digitsMLP.predicted_vals_logreg()









    Out[20]:





array([[  1.69714658e-05,   6.81859674e-05,   4.40727854e-06, ...,
          5.32231061e-03,   2.53793423e-05,   1.24584597e-06],
       [  1.01746270e-03,   4.22267512e-05,   4.14312631e-03, ...,
          2.78708263e-04,   5.46442550e-07,   2.42858031e-03],
       [  1.28256984e-03,   2.17894791e-03,   8.93946737e-04, ...,
          7.34846219e-02,   1.61182688e-04,   1.05979457e-03],
       ..., 
       [  5.44993207e-04,   4.99824178e-04,   2.40481794e-02, ...,
          1.57466289e-04,   6.36213226e-03,   3.31715518e-03],
       [  8.89896415e-04,   6.08195027e-04,   1.27942930e-03, ...,
          8.00565720e-01,   9.88285422e-01,   7.53709316e-01],
       [  9.96679068e-01,   9.98024583e-01,   9.72293377e-01, ...,
          6.17099488e-07,   1.53781421e-05,   1.62253886e-01]], dtype=float32)



In [21]:

    
testL1a2 = theano.function([], digitsMLP.Thetas[0].alp1 )
print( testL1a2() )
testL2a2 = theano.function([], digitsMLP.Thetas[1].al )
print( testL2a2() )









    



[[  1.68952462e-03   9.66312247e-04   3.05149867e-03 ...,   6.01312637e-01
    7.69796073e-02   1.04411095e-02]
 [  5.63030466e-02   3.84460762e-02   3.80754247e-02 ...,   5.98353744e-01
    2.61052395e-04   2.27537050e-04]
 [  9.90030646e-01   9.98632133e-01   9.82155979e-01 ...,   1.93933040e-01
    4.72371355e-02   2.82941740e-02]
 ..., 
 [  3.31877563e-05   3.73444600e-05   8.24407116e-03 ...,   2.87410337e-03
    5.70651256e-02   5.32165647e-01]
 [  3.52760107e-04   9.42657294e-04   9.13141354e-04 ...,   6.50500178e-01
    9.91682410e-01   3.51258606e-01]
 [  5.73539697e-02   9.71803325e-04   3.29945865e-03 ...,   6.90432638e-02
    1.61013941e-05   1.03116455e-03]]
[[  1.68952462e-03   9.66312247e-04   3.05149867e-03 ...,   6.01312637e-01
    7.69796073e-02   1.04411095e-02]
 [  5.63030466e-02   3.84460762e-02   3.80754247e-02 ...,   5.98353744e-01
    2.61052395e-04   2.27537050e-04]
 [  9.90030646e-01   9.98632133e-01   9.82155979e-01 ...,   1.93933040e-01
    4.72371355e-02   2.82941740e-02]
 ..., 
 [  3.31877563e-05   3.73444600e-05   8.24407116e-03 ...,   2.87410337e-03
    5.70651256e-02   5.32165647e-01]
 [  3.52760107e-04   9.42657294e-04   9.13141354e-04 ...,   6.50500178e-01
    9.91682410e-01   3.51258606e-01]
 [  5.73539697e-02   9.71803325e-04   3.29945865e-03 ...,   6.90432638e-02
    1.61013941e-05   1.03116455e-03]]



In [33]:

    
[1,2,3,4,5] + [8,1,5]









    Out[33]:





[1, 2, 3, 4, 5, 8, 1, 5]



In [22]:

    
print( digitsMLP.y.shape )
y_cls_test = np.vstack( np.argmax( digitsMLP.y, axis=0) )
print( y_cls_test.shape )
pd.DataFrame( y_cls_test ).describe()









    



(10, 5000)
(5000, 1)






    Out[22]:






  
    
      
      0
    
  
  
    
      count
      5000.000000
    
    
      mean
      4.500000
    
    
      std
      2.872569
    
    
      min
      0.000000
    
    
      25%
      2.000000
    
    
      50%
      4.500000
    
    
      75%
      7.000000
    
    
      max
      9.000000



In [23]:

    
pred_y_cls_test = np.vstack( np.argmax( digitsMLP.predicted_vals_logreg() , axis=0))
print( pred_y_cls_test.shape )
pd.DataFrame( pred_y_cls_test ).describe()









    



(5000, 1)






    Out[23]:






  
    
      
      0
    
  
  
    
      count
      5000.000000
    
    
      mean
      4.499000
    
    
      std
      2.879315
    
    
      min
      0.000000
    
    
      25%
      2.000000
    
    
      50%
      5.000000
    
    
      75%
      7.000000
    
    
      max
      9.000000



In [24]:

    
np.mean( pred_y_cls_test == y_cls_test )









    Out[24]:





0.99119999999999997

Testing on MNIST, from University of Montreal, Deep Learning Tutorial, data



In [27]:

    
K=10
m = len(train_set[1])
y_train_prob = [np.zeros(K) for row in train_set[1]]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_train_prob[i][ train_set[1][i]] = 1
y_train_prob = np.array(y_train_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print( y_train_prob.shape )









    



(10, 50000)



In [28]:

    
print( pd.DataFrame( y_train_prob).describe() )









    



           0          1          2          3          4          5      \
count  10.000000  10.000000  10.000000  10.000000  10.000000  10.000000   
mean    0.100000   0.100000   0.100000   0.100000   0.100000   0.100000   
std     0.316228   0.316228   0.316228   0.316228   0.316228   0.316228   
min     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
25%     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
50%     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
75%     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
max     1.000000   1.000000   1.000000   1.000000   1.000000   1.000000   

           6          7          8          9        ...          49990  \
count  10.000000  10.000000  10.000000  10.000000    ...      10.000000   
mean    0.100000   0.100000   0.100000   0.100000    ...       0.100000   
std     0.316228   0.316228   0.316228   0.316228    ...       0.316228   
min     0.000000   0.000000   0.000000   0.000000    ...       0.000000   
25%     0.000000   0.000000   0.000000   0.000000    ...       0.000000   
50%     0.000000   0.000000   0.000000   0.000000    ...       0.000000   
75%     0.000000   0.000000   0.000000   0.000000    ...       0.000000   
max     1.000000   1.000000   1.000000   1.000000    ...       1.000000   

           49991      49992      49993      49994      49995      49996  \
count  10.000000  10.000000  10.000000  10.000000  10.000000  10.000000   
mean    0.100000   0.100000   0.100000   0.100000   0.100000   0.100000   
std     0.316228   0.316228   0.316228   0.316228   0.316228   0.316228   
min     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
25%     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
50%     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
75%     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
max     1.000000   1.000000   1.000000   1.000000   1.000000   1.000000   

           49997      49998      49999  
count  10.000000  10.000000  10.000000  
mean    0.100000   0.100000   0.100000  
std     0.316228   0.316228   0.316228  
min     0.000000   0.000000   0.000000  
25%     0.000000   0.000000   0.000000  
50%     0.000000   0.000000   0.000000  
75%     0.000000   0.000000   0.000000  
max     1.000000   1.000000   1.000000  

[8 rows x 50000 columns]



In [29]:

    
m,d= train_set[0].shape
MNIST_MTL = MLP(3,[d,25,10], m, train_set[0].T, y_train_prob, T.nnet.sigmoid, 1., 0.1, 0.00001)



In [31]:

    
MNIST_MTL.accuracy_log_reg()









    



Accuracy score : 0.095720 






    Out[31]:





0.09572



In [32]:

    
print( MNIST_MTL.Thetas[0].Theta.get_value() )
MNIST_MTL.Thetas[1].Theta.get_value()









    



[[-0.21252947  0.08412755 -0.04290283 ...,  0.07965862 -0.23788518
   0.22836781]
 [ 0.21750133 -0.28106511  0.2671701  ...,  0.05657074 -0.29780233
  -0.34037825]
 [-0.02203849  0.19995408  0.29139307 ..., -0.0802632   0.14923781
  -0.01326215]
 ..., 
 [-0.09192752  0.2681919   0.26859927 ...,  0.06894684  0.12018485
  -0.32358104]
 [-0.13953739  0.02424429 -0.20282558 ...,  0.13586117 -0.29190949
   0.01420764]
 [ 0.14536618 -0.06120829 -0.19793963 ...,  0.02141157 -0.26165357
  -0.22347094]]






    Out[32]:





array([[ 0.39743981,  0.47411704, -0.06570012, -1.42164946, -1.16744244,
        -0.44029245,  1.55344975,  0.42985651,  0.42142299,  0.34747389,
        -0.81492281, -1.56219399,  0.82000202, -0.40216985,  1.47232902,
         0.35999769,  0.68932515,  0.82125103, -0.06185548, -0.27060255,
        -0.25692338, -0.86920112, -1.25981688,  1.49232507,  0.52447045],
       [-0.45812011, -1.22509992, -1.14734924,  0.8219015 ,  0.67774659,
        -0.54277843, -0.51398909, -1.19371367,  1.41380441,  0.30840605,
        -0.11502755,  0.03701442, -0.82230645,  0.57699567,  1.3465414 ,
         0.46789521,  0.72777843, -0.74098152,  1.17320132,  0.3995336 ,
         0.79102081, -0.1648806 , -0.07733183, -1.60657287, -0.64839828],
       [-0.26341805,  1.1494323 , -0.1576034 , -0.51010013, -1.4537679 ,
        -0.32284725, -0.11438043, -0.16837011, -0.92213452, -1.61426508,
        -1.34188533, -0.43147799, -0.61895543,  1.38522243, -0.58895016,
        -0.7618435 ,  0.89010906, -0.43544725,  0.9864412 ,  1.43864715,
        -1.04564011,  1.22792351,  1.51681113,  1.48080194,  0.77915668],
       [ 0.20965257,  0.96099746, -1.38709569, -0.71545523,  1.10635912,
        -1.29045534, -1.57748663, -0.95929617, -0.24077024, -0.35265234,
        -0.75665629, -1.07101023, -0.00545067, -0.99316341,  1.4602592 ,
         0.7212196 , -0.10515147,  0.49578574,  1.45931995,  1.07267094,
        -0.01084974,  1.01463413, -0.73990732, -0.75446784, -1.58813906],
       [ 1.26138949, -0.47427896, -0.4497835 , -0.19745249, -0.21095291,
        -0.08235157,  1.11536026,  0.04610212,  1.11256003, -0.46868172,
         0.98976576,  0.5969364 ,  1.2798928 , -0.48845506, -1.03150034,
         0.96056491,  0.30198944,  1.47925031,  0.36784145,  0.33264968,
         0.26245806,  0.48660216,  0.96751767, -1.27767527,  0.42533398],
       [-1.35573721, -0.85225189,  0.39260316,  1.22144449, -0.31289348,
        -0.9116798 , -1.23236263,  0.81047082,  0.68190771,  0.23868939,
         0.48585162,  0.1120138 , -0.15501086,  0.46083274,  1.03472054,
         1.32915258, -1.44029725, -0.71790475,  1.60148978,  0.20379403,
         1.15068221, -1.49505317, -0.42173779, -0.72793669,  0.96851325],
       [ 1.43791103, -0.69099313, -0.79151177,  0.21312031,  1.36474013,
        -1.19343758, -0.45168665,  0.60482723,  0.41439977, -1.3197763 ,
        -1.09207129,  0.23710582,  0.06775524, -0.72686183,  0.96668959,
         0.09902351,  0.4042387 , -0.44409871, -0.26330748,  1.35316038,
        -1.48465347,  1.03650403,  0.71476686,  1.63380992,  0.13157721],
       [-0.90288603,  1.17905378,  1.22328877,  1.41004956,  0.29629964,
        -0.9095031 , -0.23757875,  1.6151613 , -1.52695167, -1.07278395,
         1.56079459,  0.52057123, -1.35828531, -1.01550984,  0.75505394,
        -0.20916736, -0.56811482, -0.02100989,  1.45307636, -1.61759782,
        -0.51052654, -1.48488081,  1.15462708, -1.02267063, -1.04683352],
       [ 1.52449572,  1.1317147 ,  0.56625313,  1.33766675, -0.3755441 ,
         0.9018296 ,  0.18880704, -0.07619639, -0.89904916,  1.51346755,
        -0.34904653, -0.45106241,  1.63571787,  0.12569541,  1.17675114,
        -0.60786849, -0.92819321,  0.43607047,  1.23182929, -0.37070179,
         0.63889045, -1.0101124 , -1.28258407,  1.46705651, -0.69088858],
       [-1.50364363,  1.22934222,  0.83844817,  0.65174961, -1.37190723,
        -0.32444778, -0.26416573,  0.29394153, -0.66541278,  0.23971787,
         0.32632565, -1.46561682, -0.02179751, -1.46791589, -0.15427259,
        -0.18021606, -0.39853364, -0.67619216, -0.01778612, -0.78865045,
        -1.53309178,  0.3225404 ,  1.01434851,  0.08351628,  1.15306926]], dtype=float32)



In [33]:

    
MNIST_MTL.predicted_vals_logreg()









    Out[33]:





array([[ 0.77596611,  0.75060475,  0.423906  , ...,  0.60422331,
         0.40988356,  0.31822193],
       [ 0.34422615,  0.67563272,  0.273395  , ...,  0.20291083,
         0.3232716 ,  0.4690749 ],
       [ 0.93080932,  0.28231877,  0.59183002, ...,  0.95939362,
         0.69641775,  0.79752219],
       ..., 
       [ 0.00841192,  0.01366395,  0.53732932, ...,  0.08155674,
         0.07537536,  0.07692103],
       [ 0.90377617,  0.96592015,  0.98436731, ...,  0.98901641,
         0.98044145,  0.9684335 ],
       [ 0.0747021 ,  0.01449165,  0.20927182, ...,  0.16606021,
         0.27188161,  0.05889074]], dtype=float32)



In [34]:

    
MNIST_MTL.train_model(100000)



In [35]:

    
MNIST_MTL.accuracy_log_reg()









    



Accuracy score : 0.980420 






    Out[35]:





0.98041999999999996



In [36]:

    
print( MNIST_MTL.Thetas[0].Theta.get_value() )
MNIST_MTL.Thetas[1].Theta.get_value()









    



[[-0.47362512  0.18747789 -0.09561399 ...,  0.17751911 -0.53015506
   0.50892061]
 [ 0.48472929 -0.62637842  0.5954116  ...,  0.12606943 -0.66368026
  -0.7585848 ]
 [-0.04911496  0.44561294  0.64935881 ..., -0.1788629   0.33259752
  -0.02955511]
 ..., 
 [-0.20487256  0.59768224  0.59859228 ...,  0.15364726  0.26784071
  -0.72111851]
 [-0.31096086  0.05403042 -0.45201683 ...,  0.30278146 -0.65051121
   0.03166183]
 [ 0.3239423  -0.13641159 -0.44111899 ...,  0.0477186  -0.58310646
  -0.49803132]]






    Out[36]:





array([[  7.11062133e-01,   4.57554549e-01,   9.75120664e-02,
         -5.07937050e+00,  -3.83276868e+00,   5.47499716e-01,
          7.71603918e+00,   1.36168015e+00,   3.17900872e+00,
         -2.18454540e-01,  -2.86413026e+00,  -4.37900066e+00,
         -2.17216992e+00,  -6.29720163e+00,   3.04203176e+00,
         -5.22348732e-02,   1.34919095e+00,   4.03648329e+00,
         -4.14651203e+00,  -5.77749729e+00,  -2.90662193e+00,
         -3.27267551e+00,  -4.66104698e+00,   5.65531969e+00,
          4.85670775e-01],
       [ -9.64691222e-01,  -6.14411163e+00,  -7.89063311e+00,
          2.64467096e+00,   1.46627915e+00,  -3.22474170e+00,
         -1.01905584e+00,  -1.88596475e+00,   7.01221895e+00,
          1.92705405e+00,   2.55174327e+00,   1.04782128e+00,
         -5.82779360e+00,   3.49170709e+00,   6.75916958e+00,
          2.16688132e+00,   5.75238085e+00,  -3.99549007e+00,
          1.14223826e+00,  -3.51164603e+00,   1.34314454e+00,
          3.65567946e+00,   1.13587379e-01,  -5.55841112e+00,
         -5.92364883e+00],
       [ -6.39417708e-01,   3.38985538e+00,  -3.20516133e+00,
         -5.39822042e-01,  -5.74882793e+00,  -5.57149768e-01,
          4.90440190e-01,  -2.76023102e+00,  -4.99793005e+00,
         -6.13943529e+00,  -4.86437082e+00,   1.80150628e+00,
         -2.43367410e+00,   5.27764797e+00,  -6.62898493e+00,
          4.37618375e-01,   4.27750015e+00,  -2.12696409e+00,
          4.19394159e+00,   5.73637605e-01,   1.46028674e+00,
         -1.85208154e+00,   3.50097370e+00,   6.81654167e+00,
          3.72835803e+00],
       [  5.72197735e-01,   1.68034172e+00,  -3.00833821e+00,
         -4.48841381e+00,   5.16429281e+00,  -4.16888475e+00,
         -6.96294880e+00,  -1.28992057e+00,  -2.63191819e-01,
         -1.80943739e+00,  -6.06960821e+00,  -6.07639885e+00,
         -1.68026042e+00,  -2.49091673e+00,   2.72927237e+00,
          4.20622349e+00,  -6.85564899e+00,   8.40938282e+00,
          3.71062899e+00,   4.54962540e+00,  -1.98130476e+00,
          4.28268671e+00,  -3.92813444e+00,  -4.68170261e+00,
         -3.60260940e+00],
       [  2.54607844e+00,  -7.96052980e+00,  -2.66434860e+00,
         -2.93493319e+00,  -2.69420886e+00,   2.50106549e+00,
          3.34071350e+00,  -2.57470822e+00,   4.58119583e+00,
         -6.90563297e+00,   3.39330125e+00,   2.23719907e+00,
          3.48375678e+00,  -5.02125692e+00,  -8.19848251e+00,
         -1.71341038e+00,  -5.05596447e+00,   6.00544357e+00,
         -6.56114340e-01,  -1.66288626e+00,   6.16928673e+00,
         -2.33877707e+00,   3.89905548e+00,  -5.27366161e+00,
          4.28496420e-01],
       [ -3.05176616e+00,  -1.78837430e+00,   7.71145201e+00,
          4.71982098e+00,  -3.77251506e+00,  -1.15936108e+01,
         -1.03705215e+01,   6.77015543e+00,   1.57322168e-01,
         -8.24020922e-01,   1.67905545e+00,  -1.78405321e+00,
         -7.02612519e-01,   1.29097593e+00,   4.61267138e+00,
          7.27420330e+00,  -6.49334812e+00,  -9.20294094e+00,
          4.45572805e+00,   6.66842163e-01,   5.05022955e+00,
         -4.36325932e+00,  -5.60956860e+00,  -2.40490198e+00,
          4.14503574e+00],
       [  3.16660190e+00,  -4.45554256e+00,  -8.53040874e-01,
          1.87503129e-01,   3.07649398e+00,  -4.22100449e+00,
         -3.50551677e+00,   1.83885932e+00,   2.24337983e+00,
         -4.28148890e+00,  -4.76952076e+00,   2.70211458e+00,
          3.59806252e+00,  -7.04305601e+00,   3.61042929e+00,
         -5.86637259e+00,   4.11456347e+00,  -3.98823214e+00,
         -4.80216026e+00,   4.52647972e+00,  -7.22660494e+00,
          1.45237148e-01,  -3.31796020e-01,   6.53981590e+00,
         -1.41842246e+00],
       [ -1.80336964e+00,   8.29802036e-01,   3.64742970e+00,
         -2.13532329e+00,   2.97970319e+00,  -2.54826999e+00,
          5.18451631e-01,   7.25896454e+00,  -3.89744210e+00,
         -3.08217072e+00,   7.56499863e+00,   4.09100199e+00,
         -7.79509687e+00,  -3.42566800e+00,   1.08090663e+00,
         -2.89872932e+00,  -2.74361014e+00,  -1.76804709e+00,
          6.18264246e+00,  -5.52989483e+00,  -2.50305200e+00,
         -5.23402739e+00,   6.55207300e+00,  -5.23101330e+00,
         -6.57023430e+00],
       [  3.09581041e+00,  -2.72198647e-01,   1.52220082e+00,
          6.30821896e+00,  -3.80636024e+00,   2.36185217e+00,
         -1.18582726e+00,  -5.26250982e+00,  -6.28981304e+00,
          3.22874022e+00,  -6.88107872e+00,   2.49213648e+00,
          4.94375658e+00,  -1.33158898e+00,  -9.53457475e-01,
         -9.96722794e+00,  -7.79621029e+00,   2.17832088e+00,
         -4.02353048e+00,  -2.39235783e+00,   2.79603124e+00,
         -3.93508315e+00,  -4.85469151e+00,   4.19522619e+00,
         -4.63762474e+00],
       [ -3.33755684e+00,   4.57075739e+00,  -4.37777138e+00,
          2.42956066e+00,  -3.74254751e+00,  -8.17862332e-01,
          2.52893835e-01,  -5.16166925e+00,  -4.43093204e+00,
          8.01136875e+00,  -5.95063388e-01,  -8.98904037e+00,
          1.42401373e+00,  -7.24769258e+00,  -1.48296952e+00,
         -8.67474616e-01,  -2.32061291e+00,  -2.56787968e+00,
         -1.69821453e+00,  -6.43771887e+00,  -5.80188227e+00,
          4.79170799e+00,  -7.73105025e-01,  -6.07473543e-04,
          6.16166925e+00]], dtype=float32)



In [37]:

    
MNIST_MTL.predicted_vals_logreg()









    Out[37]:





array([[  4.52048823e-07,   1.00000000e+00,   7.51884666e-10, ...,
          6.04108479e-08,   1.88120055e-06,   1.62847478e-08],
       [  5.50135458e-11,   3.89633271e-07,   3.38999451e-09, ...,
          7.60669491e-06,   4.72258321e-09,   7.85911197e-06],
       [  2.77830568e-05,   2.95011669e-05,   3.45770331e-06, ...,
          8.52322955e-06,   7.60071725e-02,   1.38454227e-04],
       ..., 
       [  1.52943358e-05,   1.54980810e-08,   9.39907186e-05, ...,
          7.08678338e-10,   7.70168582e-11,   4.95301133e-09],
       [  9.09049849e-11,   7.11831039e-13,   5.01017041e-07, ...,
          9.96697187e-01,   1.42343296e-02,   9.99997377e-01],
       [  1.09123675e-05,   1.50772337e-06,   9.65651736e-08, ...,
          1.19623294e-07,   9.40610245e-02,   1.57795876e-06]], dtype=float32)

Save the mode; cf. Getting Started, DeepLearning 0.1 documentation, Loading and Saving Models



In [38]:

    
import cPickle



In [40]:

    
save_file = open('./saved_models/MNIST_MTL_log_reg','wb')



In [41]:

    
for Thet in MNIST_MTL.Thetas:
    cPickle.dump( Thet.Theta.get_value(borrow=True), save_file,-1) # the -1 is for HIGHEST priority
    cPickle.dump( Thet.b.get_value(borrow=True), save_file,-1)



In [42]:

    
save_file.close()



In [50]:

    
MNIST_MTL.Thetas[0].al.set_value( valid_set[0].T.astype(theano.config.floatX) )



In [47]:

    
K=10
m = len(valid_set[1])
y_valid_prob = [np.zeros(K) for row in valid_set[1]]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_valid_prob[i][ valid_set[1][i]] = 1
y_valid_prob = np.array(y_valid_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print( y_valid_prob.shape )









    



(10, 10000)



In [48]:

    
MNIST_MTL.y = y_valid_prob



In [51]:

    
MNIST_MTL.predicted_vals_logreg()









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-51-ec623bb772a5> in <module>()
----> 1 MNIST_MTL.predicted_vals_logreg()

/home/topolo/PropD/MLgrabbag/ML/NN.py in predicted_vals_logreg(self)
    311         def predicted_vals_logreg(self):
    312                 predict_vals_func = predicted_logreg( self.Thetas[-1].alp1 )
--> 313                 return predict_vals_func() # do the actual prediction on actual values, with the inputted X and trained Thetas,b's
    314 
    315 

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    869                     node=self.fn.nodes[self.fn.position_of_error],
    870                     thunk=thunk,
--> 871                     storage_map=getattr(self.fn, 'storage_map', None))
    872             else:
    873                 # old-style linkers raise their own exceptions

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
    312         # extra long error message in that case.
    313         pass
--> 314     reraise(exc_type, exc_value, exc_trace)
    315 
    316 

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    857         t0_fn = time.time()
    858         try:
--> 859             outputs = self.fn()
    860         except Exception:
    861             if hasattr(self.fn, 'position_of_error'):

ValueError: dimension mismatch in args to gemm (25,784)x(784,10000)->(25,50000)
Apply node that caused the error: GpuGemm{inplace}(GpuReshape{2}.0, TensorConstant{1.0}, Theta1, al, TensorConstant{1.0})
Toposort index: 14
Inputs types: [CudaNdarrayType(float32, matrix), TensorType(float32, scalar), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), TensorType(float32, scalar)]
Inputs shapes: [(25, 50000), (), (25, 784), (784, 10000), ()]
Inputs strides: [(50000, 1), (), (784, 1), (10000, 1), ()]
Inputs values: ['not shown', array(1.0, dtype=float32), 'not shown', 'not shown', array(1.0, dtype=float32)]
Outputs clients: [[GpuElemwise{ScalarSigmoid}[(0, 0)](GpuGemm{inplace}.0)]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.



In [53]:

    
theano.function([], MNIST_MTL.Thetas[0].alp1)()









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-53-10646c6daf13> in <module>()
----> 1 theano.function([], MNIST_MTL.Thetas[0].alp1)()

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    869                     node=self.fn.nodes[self.fn.position_of_error],
    870                     thunk=thunk,
--> 871                     storage_map=getattr(self.fn, 'storage_map', None))
    872             else:
    873                 # old-style linkers raise their own exceptions

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
    312         # extra long error message in that case.
    313         pass
--> 314     reraise(exc_type, exc_value, exc_trace)
    315 
    316 

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    857         t0_fn = time.time()
    858         try:
--> 859             outputs = self.fn()
    860         except Exception:
    861             if hasattr(self.fn, 'position_of_error'):

ValueError: dimension mismatch in args to gemm (25,784)x(784,10000)->(25,50000)
Apply node that caused the error: GpuGemm{inplace}(GpuReshape{2}.0, TensorConstant{1.0}, Theta1, al, TensorConstant{1.0})
Toposort index: 7
Inputs types: [CudaNdarrayType(float32, matrix), TensorType(float32, scalar), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), TensorType(float32, scalar)]
Inputs shapes: [(25, 50000), (), (25, 784), (784, 10000), ()]
Inputs strides: [(50000, 1), (), (784, 1), (10000, 1), ()]
Inputs values: ['not shown', array(1.0, dtype=float32), 'not shown', 'not shown', array(1.0, dtype=float32)]
Outputs clients: [[GpuElemwise{ScalarSigmoid}[(0, 0)](GpuGemm{inplace}.0)]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.



In [56]:

    
Layer1 = MNIST_MTL.Thetas[0]
Layer2 = MNIST_MTL.Thetas[1]
m = valid_set[0].shape[0]
print(m)



In [60]:

    
a2 = T.nnet.sigmoid( T.dot( Layer1.Theta, Layer1.al) + T.tile( Layer1.b, (1,m)) )
a3 = T.nnet.sigmoid( T.dot( Layer2.Theta, a2) + T.tile( Layer2.b, (1,m)) )
valid_pred = theano.function([], a3)()
print( valid_pred.shape)









    



(10, 10000)



In [61]:

    
pd.DataFrame( valid_pred.T).describe()









    Out[61]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
    
  
  
    
      count
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
    
    
      mean
      9.992079e-02
      1.074899e-01
      9.912494e-02
      1.052671e-01
      9.976897e-02
      9.037534e-02
      9.769981e-02
      1.085759e-01
      1.005443e-01
      9.500753e-02
    
    
      std
      2.961449e-01
      3.058113e-01
      2.902825e-01
      2.958724e-01
      2.931867e-01
      2.795904e-01
      2.927824e-01
      3.047731e-01
      2.924316e-01
      2.828876e-01
    
    
      min
      9.005615e-17
      1.986378e-17
      2.439026e-13
      6.265842e-16
      1.214437e-18
      6.500246e-18
      9.674661e-19
      1.735837e-17
      3.318569e-18
      6.359484e-18
    
    
      25%
      1.320481e-09
      1.788181e-09
      4.365240e-07
      1.482097e-07
      1.504599e-09
      4.772580e-09
      1.436151e-10
      3.032865e-09
      3.319625e-08
      1.242549e-08
    
    
      50%
      9.058250e-08
      1.760941e-07
      1.530497e-05
      8.275158e-06
      3.291525e-07
      4.075375e-07
      2.823017e-08
      4.088125e-07
      3.484185e-06
      1.585515e-06
    
    
      75%
      1.096723e-05
      3.165664e-05
      5.320824e-04
      4.356618e-04
      3.269091e-05
      3.865018e-05
      6.124497e-06
      8.373335e-05
      2.405906e-04
      2.055749e-04
    
    
      max
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      9.999994e-01



In [62]:

    
np.mean( np.vstack( np.argmax( valid_pred,axis=0)) == np.vstack( valid_set[1] ) )









    Out[62]:





0.95340000000000003



In [63]:

    
X_in = T.matrix()



In [64]:

    
X_in.set_value( valid_set[0].T.astype(theano.config.floatX))









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-64-922d1dc9b484> in <module>()
----> 1 X_in.set_value( valid_set[0].T.astype(theano.config.floatX))

AttributeError: 'TensorVariable' object has no attribute 'set_value'



In [68]:

    
a2_giv = T.nnet.sigmoid( T.dot( Layer1.Theta, X_in) + T.tile(Layer1.b, (1,m)))
a3_giv = T.nnet.sigmoid( T.dot( Layer2.Theta, a2_giv) + T.tile( Layer2.b, (1,m)) )
valid_pred_givens = theano.function([], outputs=a3_giv, givens={ X_in: valid_set[0].T.astype(theano.config.floatX)} )



In [72]:

    
print( valid_pred_givens().shape )
pd.DataFrame( valid_pred_givens().T).describe()









    



(10, 10000)






    Out[72]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
    
  
  
    
      count
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
      1.000000e+04
    
    
      mean
      9.992079e-02
      1.074899e-01
      9.912494e-02
      1.052671e-01
      9.976897e-02
      9.037534e-02
      9.769981e-02
      1.085759e-01
      1.005443e-01
      9.500753e-02
    
    
      std
      2.961449e-01
      3.058113e-01
      2.902825e-01
      2.958724e-01
      2.931867e-01
      2.795904e-01
      2.927824e-01
      3.047731e-01
      2.924316e-01
      2.828876e-01
    
    
      min
      9.005615e-17
      1.986378e-17
      2.439026e-13
      6.265842e-16
      1.214437e-18
      6.500246e-18
      9.674661e-19
      1.735837e-17
      3.318569e-18
      6.359484e-18
    
    
      25%
      1.320481e-09
      1.788181e-09
      4.365240e-07
      1.482097e-07
      1.504599e-09
      4.772580e-09
      1.436151e-10
      3.032865e-09
      3.319625e-08
      1.242549e-08
    
    
      50%
      9.058250e-08
      1.760941e-07
      1.530497e-05
      8.275158e-06
      3.291525e-07
      4.075375e-07
      2.823017e-08
      4.088125e-07
      3.484185e-06
      1.585515e-06
    
    
      75%
      1.096723e-05
      3.165664e-05
      5.320824e-04
      4.356618e-04
      3.269091e-05
      3.865018e-05
      6.124497e-06
      8.373335e-05
      2.405906e-04
      2.055749e-04
    
    
      max
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      1.000000e+00
      9.999994e-01



In [78]:

    
np.mean( np.vstack( np.argmax( valid_pred_givens(),axis=0)) == np.vstack( valid_set[1] ) )









    Out[78]:





0.95340000000000003



In [79]:

    
test_pred_givens = theano.function([], outputs=a3_giv, givens={ X_in: test_set[0].T.astype(theano.config.floatX)} )



In [80]:

    
np.mean( np.vstack( np.argmax( test_pred_givens(),axis=0)) == np.vstack( test_set[1] ) )









    Out[80]:





0.94979999999999998



In [81]:

    
range(1,3)









    Out[81]:





[1, 2]



In [82]:

    
range(3)









    Out[82]:





[0, 1, 2]



In [83]:

    
range(1,3-1)









    Out[83]:





[1]

cf. Glass Classification



In [11]:

    
gls_data = pd.read_csv( "./kaggle/glass.csv")



In [12]:

    
gls_data.describe()



In [16]:

    
gls_data.get_values().shape









    Out[16]:





(214, 10)



In [12]:

    
X_gls = gls_data.get_values()[:,:-1]
print(X_gls.shape)
y_gls = gls_data.get_values()[:,-1]
print(y_gls.shape)
print( y_gls[:10])
X_gls_train = gls_data.get_values()[:-14,:-1]
print(X_gls_train.shape)
y_gls_train = gls_data.get_values()[:-14,-1]
print(y_gls_train.shape)









    



(214, 9)
(214,)
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
(200, 9)
(200,)



In [29]:

    
K=7
m = len(y_gls_train)
y_gls_train_prob = [np.zeros(K) for row in y_gls_train]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_gls_train_prob[i][ y_gls_train[i]-1] = 1
y_gls_train_prob = np.array(y_gls_train_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print( y_gls_train_prob.shape )









    



(7, 200)






    



/home/topolo/Public/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:5: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future



In [42]:

    
gls_MLP = MLP( 3, [9,8,7],200, X_gls_train.T, y_gls_train_prob, T.nnet.sigmoid, 0.01,0.05,0.0001 )



In [43]:

    
gls_MLP.accuracy_log_reg()









    



Accuracy score : 0.045000 






    Out[43]:





0.044999999999999998



In [44]:

    
gls_MLP.train_model(10000)



In [45]:

    
gls_MLP.accuracy_log_reg()









    



Accuracy score : 0.380000 






    Out[45]:





0.38



In [46]:

    
gls_MLP.predicted_vals_logreg()









    Out[46]:





array([[ 0.34978667,  0.34978667,  0.34978667, ...,  0.34978667,
         0.34978667,  0.34978667],
       [ 0.3797904 ,  0.3797904 ,  0.3797904 , ...,  0.3797904 ,
         0.3797904 ,  0.3797904 ],
       [ 0.08406211,  0.08406211,  0.08406211, ...,  0.08406211,
         0.08406211,  0.08406211],
       ..., 
       [ 0.06395678,  0.06395678,  0.06395678, ...,  0.06395678,
         0.06395678,  0.06395678],
       [ 0.04376425,  0.04376425,  0.04376425, ...,  0.04376425,
         0.04376425,  0.04376425],
       [ 0.07399232,  0.07399232,  0.07399232, ...,  0.07399232,
         0.07399232,  0.07399232]], dtype=float32)



In [47]:

    
gls_MLP.train_model(10000)
gls_MLP.accuracy_log_reg()









    



Accuracy score : 0.380000 






    Out[47]:





0.38



In [ ]:

    
ga



In [48]:

    
X_gls_test = gls_data.get_values()[-14:,:-1]
print( X_gls_test.shape )
y_gls_test = gls_data.get_values()[-14:,-1]
print( y_gls_test.shape)









    



(14, 9)
(14,)



In [49]:

    
gls_predict_on_test = gls_MLP.predict_on( 14, X_gls_test.T )



In [51]:

    
np.mean( np.vstack( np.argmax( gls_predict_on_test(), axis=0) ) == (y_gls_test-1) )









    Out[51]:





0.0



In [52]:

    
y_gls_test









    Out[52]:





array([ 7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.])



In [53]:

    
np.vstack( np.argmax( gls_predict_on_test(), axis=0))









    Out[53]:





array([[1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1]])



In [17]:

    
X_sym = T.matrix()



In [33]:

    
rng = np.random.RandomState(1234)
Thetab1 = Layer( rng, 1, 4,3,2, al = X_sym, activation=T.nnet.sigmoid)



In [34]:

    
Thetab1.alp1
Thetab1.Theta.get_value().shape









    Out[34]:





(3, 4)



In [35]:

    
Thetab2 = Layer( rng, 2, 3,2,2, al=Thetab1.alp1, activation=T.nnet.sigmoid)



In [36]:

    
Thetab2.al = Thetab1.alp1



In [30]:

    
X_sym.shape[0]









    Out[30]:





Subtensor{int64}.0



In [31]:

    
T.tile( Thetab1.b, (1, X_sym.shape[0]))









    Out[31]:





Reshape{2}.0



In [42]:

    
test12comp = theano.function( [], outputs=Thetab2.alp1, givens={ X_sym : X42test} )



In [40]:

    
X42test = np.array([1,2,3,4,5,6,7,8]).reshape((4,2)).astype(theano.config.floatX)



In [43]:

    
test12comp()









    Out[43]:





array([[ 0.91000074,  0.91101253],
       [ 0.02431746,  0.02417665]], dtype=float32)



In [44]:

    
X43test = np.array(range(1,13)).reshape((4,3)).astype(theano.config.floatX)



In [45]:

    
X43test









    Out[45]:





array([[  1.,   2.,   3.],
       [  4.,   5.,   6.],
       [  7.,   8.,   9.],
       [ 10.,  11.,  12.]], dtype=float32)



In [46]:

    
test43comp = theano.function( [], outputs=Thetab2.alp1, givens={ X_sym : X43test} )



In [47]:

    
test43comp()









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-47-4a3d91ae9a55> in <module>()
----> 1 test43comp()

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    869                     node=self.fn.nodes[self.fn.position_of_error],
    870                     thunk=thunk,
--> 871                     storage_map=getattr(self.fn, 'storage_map', None))
    872             else:
    873                 # old-style linkers raise their own exceptions

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
    312         # extra long error message in that case.
    313         pass
--> 314     reraise(exc_type, exc_value, exc_trace)
    315 
    316 

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    857         t0_fn = time.time()
    858         try:
--> 859             outputs = self.fn()
    860         except Exception:
    861             if hasattr(self.fn, 'position_of_error'):

ValueError: dimension mismatch in args to gemm (3,4)x(4,3)->(3,2)
Apply node that caused the error: GpuGemm{inplace}(GpuReshape{2}.0, TensorConstant{1.0}, Theta1, <CudaNdarrayType(float32, matrix)>, TensorConstant{1.0})
Toposort index: 14
Inputs types: [CudaNdarrayType(float32, matrix), TensorType(float32, scalar), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), TensorType(float32, scalar)]
Inputs shapes: [(3, 2), (), (3, 4), (4, 3), ()]
Inputs strides: [(2, 1), (), (4, 1), (3, 1), ()]
Inputs values: ['not shown', array(1.0, dtype=float32), 'not shown', 'not shown', array(1.0, dtype=float32)]
Outputs clients: [[GpuElemwise{ScalarSigmoid}[(0, 0)](GpuGemm{inplace}.0)]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.



In [50]:

    
print( type(Thetab1.al ))









    



<class 'theano.tensor.var.TensorVariable'>



In [52]:

    
lin_zlp1 = T.dot(Thetab1.Theta, Thetab1.al)+T.tile( Thetab1.b, (1,Thetab1.al.shape[1]))
a1p1 = Thetab1.g( lin_zlp1 )



In [51]:

    
Thetab1.al = X_sym



In [53]:

    
Thetab2.al = a1p1



In [54]:

    
lin_z2p1 = T.dot(Thetab2.Theta, Thetab2.al)+T.tile( Thetab2.b, (1, Thetab2.al.shape[1]))
a2p1 = Thetab2.g( lin_z2p1 )



In [55]:

    
test_gen_conn = theano.function([], outputs=a2p1, givens={ Thetab1.al : X42test })



In [56]:

    
test_gen_conn()









    Out[56]:





array([[ 0.91000074,  0.91101253],
       [ 0.02431746,  0.02417665]], dtype=float32)



In [57]:

    
test_gen_conn = theano.function([], outputs=a2p1, givens={ Thetab1.al : X43test })



In [58]:

    
test_gen_conn()









    Out[58]:





array([[ 0.91144621,  0.91158789,  0.91166627],
       [ 0.02425005,  0.02417867,  0.02417858]], dtype=float32)

GPU test



In [59]:

    
test_gen_conn = theano.function([], outputs=sandbox.cuda.basic_ops.gpu_from_host(a2p1), givens={ Thetab1.al : X42test })



In [60]:

    
test_gen_conn()









    Out[60]:





CudaNdarray([[ 0.91000074  0.91101253]
 [ 0.02431746  0.02417665]])



In [61]:

    
test_gen_conn = theano.function([], outputs=sandbox.cuda.basic_ops.gpu_from_host(a2p1), givens={ Thetab1.al : X43test })



In [62]:

    
test_gen_conn()









    Out[62]:





CudaNdarray([[ 0.91144621  0.91158789  0.91166627]
 [ 0.02425005  0.02417867  0.02417858]])

Summary for Neural Net with Multiple Layers for logistic regression (but can be extended to linear regression)



In [9]:

    
sys.path.append( os.getcwd() + '/ML' )



In [10]:

    
from NN import MLP



In [11]:

    
# Load Training Data
print("Loading and Visualizing Data ... \n")
ex4data1 = scipy.io.loadmat('./coursera_Ng/machine-learning-ex4/ex4/ex4data1.mat')

# recall that whereas the original labels (in the variable y) were 1, 2, ..., 10, for the purpose of training a 
# neural network, we need to recode the labels as vectors containing only values 0 or 1
K=10
m = ex4data1['y'].shape[0]
y_prob = [np.zeros(K) for row in ex4data1['y']]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_prob[i][ ex4data1['y'][i]-1] = 1
y_prob = np.array(y_prob).T.astype(theano.config.floatX)  # size dims. (K,m)

print(ex4data1['X'].T.shape)
print(y_prob.shape)









    



Loading and Visualizing Data ... 

(400, 5000)
(10, 5000)



In [12]:

    
digitsMLP = MLP( 3, [400,25,10], ex4data1['X'].T, y_prob, T.nnet.sigmoid, 1.)



In [13]:

    
digitsMLP.build_update(ex4data1['X'].T, y_prob, 0.01, 0.00001)



In [14]:

    
digitsMLP.predicted_vals_logreg()









    Out[14]:





array([[ 0.01459562,  0.00558456,  0.02797613, ...,  0.0674273 ,
         0.04817105,  0.03059   ],
       [ 0.99074477,  0.97213686,  0.98990673, ...,  0.94329911,
         0.99409556,  0.98447394],
       [ 0.02927557,  0.05798027,  0.07752991, ...,  0.36027411,
         0.1559844 ,  0.26209033],
       ..., 
       [ 0.00369688,  0.01589782,  0.0115205 , ...,  0.0152018 ,
         0.00421828,  0.00280912],
       [ 0.78314799,  0.61225456,  0.71571481, ...,  0.80646819,
         0.94149739,  0.52025074],
       [ 0.96498191,  0.98687011,  0.78228015, ...,  0.95690244,
         0.63841748,  0.404479  ]], dtype=float32)



In [15]:

    
digitsMLP.accuracy_logreg( ex4data1['X'].T, y_prob)









    



Accuracy score : 0.134200 






    Out[15]:





0.13420000000000001



In [16]:

    
digitsMLP.train_model(10000)



In [17]:

    
digitsMLP.accuracy_logreg( ex4data1['X'].T, y_prob)









    



Accuracy score : 0.894600 






    Out[17]:





0.89459999999999995



In [18]:

    
digitsMLP.train_model(50000)



In [19]:

    
digitsMLP.accuracy_logreg( ex4data1['X'].T, y_prob)









    



Accuracy score : 0.956600 






    Out[19]:





0.95660000000000001

Testing on University of Montreal LISA lab MNIST data



In [20]:

    
import gzip
import six.moves.cPickle as pickle
with gzip.open("../DeepLearningTutorials/data/mnist.pkl.gz", 'rb') as f:
    try:
        train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
    except:
        train_set, valid_set, test_set = pickle.load(f)



In [21]:

    
K=10
m = len(train_set[1])
y_train_prob = [np.zeros(K) for row in train_set[1]]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_train_prob[i][ train_set[1][i]] = 1
y_train_prob = np.array(y_train_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print( y_train_prob.shape )









    



(10, 50000)



In [22]:

    
MNIST_MLP = MLP( 3,[784,49,10], train_set[0].T, y_train_prob, T.nnet.sigmoid, 1.)



In [23]:

    
MNIST_MLP.build_update( train_set[0].T, y_train_prob, 0.01, 0.0001)



In [24]:

    
MNIST_MLP.accuracy_logreg( train_set[0].T, y_train_prob)









    



Accuracy score : 0.098660 






    Out[24]:





0.098659999999999998



In [25]:

    
MNIST_MLP.train_model(50000)



In [26]:

    
MNIST_MLP.accuracy_logreg( train_set[0].T, y_train_prob)









    



Accuracy score : 0.862140 






    Out[26]:





0.86214000000000002



In [30]:

    
%time MNIST_MLP.train_model(100000)









    



CPU times: user 3min 38s, sys: 8min 22s, total: 12min
Wall time: 12min



In [31]:

    
MNIST_MLP.accuracy_logreg( train_set[0].T,y_train_prob)









    



Accuracy score : 0.826500 






    Out[31]:





0.82650000000000001



In [32]:

    
m = len(valid_set[1])
y_valid_prob = [np.zeros(K) for row in valid_set[1]]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_valid_prob[i][ valid_set[1][i]] = 1
y_valid_prob = np.array(y_valid_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print( y_valid_prob.shape )









    



(10, 10000)



In [33]:

    
m = len(test_set[1])
y_test_prob = [np.zeros(K) for row in test_set[1]]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_test_prob[i][ test_set[1][i]] = 1
y_test_prob = np.array(y_test_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print( y_test_prob.shape )









    



(10, 10000)



In [34]:

    
MNIST_MLP.accuracy_logreg( valid_set[0].T,y_valid_prob)









    



Accuracy score : 0.814200 






    Out[34]:





0.81420000000000003



In [35]:

    
MNIST_MLP.accuracy_logreg( test_set[0].T,y_test_prob)









    



Accuracy score : 0.805000 






    Out[35]:





0.80500000000000005



In [40]:

    
MNIST_d = train_set[0].T.shape[0]
print(MNIST_d)
MNIST_MLP = MLP( 3,[MNIST_d,25,10], train_set[0].T, y_train_prob, T.nnet.sigmoid, 1.)
MNIST_MLP.build_update( train_set[0].T, y_train_prob, 0.1, 0.00001)



In [41]:

    
MNIST_MLP.accuracy_logreg( train_set[0].T, y_train_prob)









    



Accuracy score : 0.095720 






    Out[41]:





0.09572



In [42]:

    
MNIST_MLP.train_model(150000)



In [43]:

    
MNIST_MLP.accuracy_logreg( train_set[0].T, y_train_prob)









    



Accuracy score : 0.986520 






    Out[43]:





0.98651999999999995



In [44]:

    
MNIST_MLP.accuracy_logreg( valid_set[0].T, y_valid_prob)









    



Accuracy score : 0.950600 






    Out[44]:





0.9506



In [45]:

    
MNIST_MLP.accuracy_logreg( test_set[0].T, y_test_prob)









    



Accuracy score : 0.945200 






    Out[45]:





0.94520000000000004



In [ ]:

	RI	Na	Mg	Al	Si	K	Ca	Ba	Fe	Type
count	214.000000	214.000000	214.000000	214.000000	214.000000	214.000000	214.000000	214.000000	214.000000	214.000000
mean	1.518365	13.407850	2.684533	1.444907	72.650935	0.497056	8.956963	0.175047	0.057009	2.780374
std	0.003037	0.816604	1.442408	0.499270	0.774546	0.652192	1.423153	0.497219	0.097439	2.103739
min	1.511150	10.730000	0.000000	0.290000	69.810000	0.000000	5.430000	0.000000	0.000000	1.000000
25%	1.516523	12.907500	2.115000	1.190000	72.280000	0.122500	8.240000	0.000000	0.000000	1.000000
50%	1.517680	13.300000	3.480000	1.360000	72.790000	0.555000	8.600000	0.000000	0.000000	2.000000
75%	1.519157	13.825000	3.600000	1.630000	73.087500	0.610000	9.172500	0.000000	0.100000	3.000000
max	1.533930	17.380000	4.490000	3.500000	75.410000	6.210000	16.190000	3.150000	0.510000	7.000000

	0	1	2	3	4	5	6	7	8	9	...	774	775	776	777	778	779	780	781	782	783
count	50000.0	50000.0	50000.0	50000.0	50000.0	50000.0	50000.0	50000.0	50000.0	50000.0	...	50000.000000	50000.000000	50000.000000	50000.000000	50000.000000	50000.000000	50000.0	50000.0	50000.0	50000.0
mean	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.000739	0.000354	0.000204	0.000090	0.000071	0.000009	0.0	0.0	0.0	0.0
std	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.022778	0.015422	0.012079	0.007217	0.007181	0.001483	0.0	0.0	0.0	0.0
min	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.0	0.0	0.0	0.0
25%	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.0	0.0	0.0	0.0
50%	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.0	0.0	0.0	0.0
75%	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.0	0.0	0.0	0.0
max	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.992188	0.992188	0.988281	0.988281	0.992188	0.242188	0.0	0.0	0.0	0.0

	0
count	5000.000000
mean	5.500000
std	2.872569
min	1.000000
25%	3.000000
50%	5.500000
75%	8.000000
max	10.000000

	0	1	2	3	4	5	6	7	8	9
count	5000.000000	5.000000e+03	5.000000e+03	5.000000e+03	5000.000000	5.000000e+03	5000.000000	5000.000000	5000.000000	5000.000000
mean	0.100628	1.003652e-01	1.003967e-01	1.004417e-01	0.100253	1.005637e-01	0.100360	0.100543	0.100846	0.100248
std	0.280952	2.710659e-01	2.669757e-01	2.724109e-01	0.267206	2.798822e-01	0.275913	0.264708	0.264970	0.284639
min	0.000010	4.299332e-07	9.454787e-07	2.587024e-07	0.000002	7.190226e-07	0.000002	0.000003	0.000008	0.000001
25%	0.000301	8.055457e-04	7.226729e-04	1.883787e-04	0.000919	2.311849e-04	0.000240	0.001162	0.000871	0.000253
50%	0.001198	4.066701e-03	4.143638e-03	1.152211e-03	0.003805	1.742870e-03	0.002029	0.004935	0.004056	0.001377
75%	0.006197	1.748446e-02	1.921718e-02	1.193071e-02	0.017761	9.229897e-03	0.011941	0.020477	0.018032	0.006297
max	0.993053	9.996013e-01	9.982013e-01	9.986625e-01	0.999188	9.985297e-01	0.999378	0.998737	0.996482	0.998724

	0	1	2	3	4	5	6	7	8	9
count	1.000000e+04	1.000000e+04	1.000000e+04	1.000000e+04	1.000000e+04	1.000000e+04	1.000000e+04	1.000000e+04	1.000000e+04	1.000000e+04
mean	9.992079e-02	1.074899e-01	9.912494e-02	1.052671e-01	9.976897e-02	9.037534e-02	9.769981e-02	1.085759e-01	1.005443e-01	9.500753e-02
std	2.961449e-01	3.058113e-01	2.902825e-01	2.958724e-01	2.931867e-01	2.795904e-01	2.927824e-01	3.047731e-01	2.924316e-01	2.828876e-01
min	9.005615e-17	1.986378e-17	2.439026e-13	6.265842e-16	1.214437e-18	6.500246e-18	9.674661e-19	1.735837e-17	3.318569e-18	6.359484e-18
25%	1.320481e-09	1.788181e-09	4.365240e-07	1.482097e-07	1.504599e-09	4.772580e-09	1.436151e-10	3.032865e-09	3.319625e-08	1.242549e-08
50%	9.058250e-08	1.760941e-07	1.530497e-05	8.275158e-06	3.291525e-07	4.075375e-07	2.823017e-08	4.088125e-07	3.484185e-06	1.585515e-06
75%	1.096723e-05	3.165664e-05	5.320824e-04	4.356618e-04	3.269091e-05	3.865018e-05	6.124497e-06	8.373335e-05	2.405906e-04	2.055749e-04
max	1.000000e+00	1.000000e+00	1.000000e+00	1.000000e+00	1.000000e+00	1.000000e+00	1.000000e+00	1.000000e+00	1.000000e+00	9.999994e-01

More theano setup in jupyter notebook boilerplate

sample data boilerplate

Linear regression

input, output variables $x$, $y$ for Theano

Parameters (for a linear slope)

hypothesis function $h_{\theta}$

Cost function $J(\theta)$

Linear Algebra and theano

Matrix addition

Scalar Multiplication (on the GPU)

Composition; Confirming that you can do composition of scalar multiplication on a matrix (or ring) addition

sAxy or A-V multiplication or so-called "Gemv", or Matrix Multiplication on a vector, or linear transformation on a R-module, or vector space

AB or Gemm or Matrix Multiplication, i.e. Ring multiplication

Inverse and Transpose

Summation, sum, mean, scan

Linear Regression (again), via Coursera's Machine Learning Intro by Ng, Programming Exercise 1 for Week 2

Boilerplate, load sample data

Testing the Linear Regression with (Batch) Gradient Descent classes in ./ML/

Other (sample) datasets

Diabetes data from sklearn, sci-kit learn

ex2 Linear Regression, on d=2 features

Multi-class Classification

1 Multi-class Classification

Neural Networks

Model representation

Feedforward

From Deep Learning Tutorials of LISA lab of University of Montreal; logistic_sgd.py, mlp.py

NN.py, load NN.py for Layer class for Neural Net for Multiple Layers

Summary for Neural Net with Multiple Layers for logistic regression (but can be extended to linear regression)

Testing on MNIST, from University of Montreal, Deep Learning Tutorial, data

Summary for Neural Net with Multiple Layers for logistic regression (but can be extended to linear regression)

Testing on University of Montreal LISA lab MNIST data

More `theano` setup in `jupyter` notebook boilerplate

Linear Algebra and `theano`

Testing the Linear Regression with (Batch) Gradient Descent classes in `./ML/`

Diabetes data from `sklearn`, sci-kit learn

`ex2` Linear Regression, on d=2 features

From Deep Learning Tutorials of LISA lab of University of Montreal; `logistic_sgd.py`, `mlp.py`

`NN.py`, load `NN.py` for `Layer` class for Neural Net for Multiple Layers