In [71]:
%matplotlib inline

In [1]:
import matplotlib.pyplot as plt
import sklearn
from sklearn import datasets

In [2]:
import pandas as pd

In [3]:
import theano

I accomplished the above by running this command at the command prompt:

THEANO_FLAGS='mode=FAST_RUN,device=gpu,floatX=float32' jupyter notebook

In [4]:
#import theano
from theano import function, config, sandbox, shared 
import theano.tensor as T
import numpy as np
import scipy
import time

More theano setup in jupyter notebook boilerplate


In [6]:
print( theano.config.device )
print( theano.config.lib.cnmem)  # cf. http://deeplearning.net/software/theano/library/config.html
print( theano.config.print_active_device)# Print active device at when the GPU device is initialized.


gpu0
0.7
True

In [7]:
import os, sys
os.getcwd()
os.listdir( os.getcwd() )


Out[7]:
['LogReg-sklearn.ipynb',
 'Linear Algebra Shootout: NumPy vs. Theano vs. TensorFlow_files',
 'LICENSE',
 'theano.pdf',
 'deeplearning.pdf',
 'sklearn_ML.ipynb',
 'LaTeXandpdfs',
 'supervised-theano.ipynb',
 'sanity_check_theano_uses_gpu.ipynb',
 '.git',
 'README.md',
 'Linear Algebra Shootout: NumPy vs. Theano vs. TensorFlow.html',
 '.ipynb_checkpoints',
 'theano_ML.ipynb',
 'deep-learning--ud730',
 'tf_sanitycheck.ipynb',
 'saved_models',
 'FedoraNVidiaInstallTips',
 'tf',
 'kaggle',
 'sampleinputdataX.ipynb',
 'ML',
 'Data',
 'simple_logreg.py',
 'Speeding up your Neural Network with Theano and the GPU \xe2\x80\x93 WildML_files',
 'coursera_Ng',
 'MorseTheory.ipynb',
 'tutorial_theano.ipynb',
 'best_model.pkl',
 'gpu_test.py',
 'Speeding up your Neural Network with Theano and the GPU \xe2\x80\x93 WildML.html']

In [ ]:


In [8]:
%run gpu_test.py THEANO_FLAGS='mode=FAST_RUN,device=gpu,floatX=float32,lib.cnmem=0.85' # note lib.cnmem option for CnMem


[GpuElemwise{exp,no_inplace}(<CudaNdarrayType(float32, vector)>), HostFromGpu(GpuElemwise{exp,no_inplace}.0)]
Looping 1000 times took 0.211632 seconds
Result is [ 1.23178029  1.61879349  1.52278066 ...,  2.20771813  2.29967761
  1.62323296]
Used the gpu

In [ ]:

sample data boilerplate


In [9]:
# Load the diabetes dataset
diabetes = sklearn.datasets.load_diabetes()

In [10]:
diabetes_X = diabetes.data
diabetes_Y = diabetes.target

In [11]:
#diabetes_X1 = diabetes_X[:,np.newaxis,2]
diabetes_X1 = diabetes_X[:,np.newaxis, 2].astype(theano.config.floatX)
#diabetes_Y  = diabetes_Y.reshape( diabetes_Y.shape[0], 1)
diabetes_Y = diabetes_Y.astype(theano.config.floatX)

Linear regression

Train on $m$ number of input data points


In [12]:
m_lin = diabetes_X1.shape[0]

input, output variables $x$, $y$ for Theano


In [28]:
#x1 = T.vector('x1')  # X1, input data, with only 1 feature, i.e. X \in \mathbb{R}^N, d=1 
#ylin = T.vector('ylin') # target variable for linear regression, so that Y \in \mathbb{R}

x1 = T.scalar('x1')  # X1, input data, with only 1 feature, i.e. X \in \mathbb{R}^N, d=1 
ylin = T.scalar('ylin') # target variable for linear regression, so that Y \in \mathbb{R}

Parameters (for a linear slope)

$$ (\theta^0, \theta^1) \in \mathbb{R}^2 $$

In [29]:
thet0_init_val = np.random.randn()
thet1_init_val = np.random.randn()

In [30]:
thet0 = theano.shared( value=thet0_init_val, name='thet0', borrow=True)  # \theta^0
thet1 = theano.shared( thet1_init_val, name='thet1', borrow=True)   # \theta^1

hypothesis function $h_{\theta}$

$$ h_{\theta}(x) = \theta_1 x + \theta_0 $$

In [31]:
#h_thet = T.dot( thet1, x1) + thet0
# whereas, Newmu uses
h_thet = thet1 * x1 + thet0

Cost function $J(\theta)$


In [32]:
# roshansanthosh uses 
#Jthet = T.sum( T.pow(h_thet-ylin,2))/(2*m_lin)

# whereas, Newmu uses
# Jthet = T.mean( T.sqr( thet_1*x1 + thet_0 - ylin ))

Jthet = T.mean( T.pow( h_thet-ylin,2))/2
#Jthet = sandbox.cuda.basic_ops.gpu_from_host( T.mean( 
#        sandbox.cuda.basic_ops.gpu_from_host( T.pow( h_thet-ylin,2))))/2
$$ \text{grad}_{\theta}J(\theta) = ( \text{grad}_{\theta^0} J , \text{grad}_{\theta^1} J ) $$

In [33]:
grad_thet0 = T.grad(Jthet, thet0)
grad_thet1 = T.grad(Jthet, thet1)

In [19]:
# so-called "learning rate"
gamma = 0.01

Note that "updates (iterable over pairs (shared_variable, new_expression) List, tuple or dict.) – expressions for new SharedVariable values" cf. Theano doc


In [34]:
train_lin = theano.function(inputs = [x1,ylin], outputs=Jthet, 
                        updates=[[thet1,thet1-gamma*grad_thet1],[thet0,thet0-gamma*grad_thet0]])

In [35]:
test_lin = theano.function([x1],h_thet)

In [100]:
#X1_lin_in = shared( diabetes_X1 ,'float32')
#Y_lin_out = shared( diabetes_Y, 'float32')

In [23]:
training_steps = 1000 # 10000

In [36]:
sh_diabetes_X1 = shared( diabetes_X1 , borrow=True)
sh_diabetes_Y  = shared( diabetes_Y, borrow=True)

In [37]:
"""
for i in range(training_steps):
    for x,y in zip( diabetes_X1, diabetes_Y):
        Jthet_val = train_lin( x, y )
        """

for i in range(training_steps):
#    for x,y in zip( sh_diabetes_X1, sh_diabetes_Y) :
#        Jthet_val = train_lin( x,y)
    Jthet_val = train_lin( sh_diabetes_X1, sh_diabetes_Y)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-37-b6192b29b89d> in <module>()
      8 #    for x,y in zip( sh_diabetes_X1, sh_diabetes_Y) :
      9 #        Jthet_val = train_lin( x,y)
---> 10     Jthet_val = train_lin( sh_diabetes_X1, sh_diabetes_Y)

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    784                         s.storage[0] = s.type.filter(
    785                             arg, strict=s.strict,
--> 786                             allow_downcast=s.allow_downcast)
    787 
    788                     except Exception as e:

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/tensor/type.pyc in filter(self, data, strict, allow_downcast)
     84         if isinstance(data, Variable):
     85             raise TypeError(
---> 86                 'Expected an array-like object, but found a Variable: '
     87                 'maybe you are trying to call a function on a (possibly '
     88                 'shared) variable instead of a numeric array?')

TypeError: ('Bad input argument to theano function with name "<ipython-input-34-92f3da11396a>:2"  at index 0(0-based)', 'Expected an array-like object, but found a Variable: maybe you are trying to call a function on a (possibly shared) variable instead of a numeric array?')

In [27]:
print(Jthet_val)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-27-49f3322b82c4> in <module>()
----> 1 print(Jthet_val)

NameError: name 'Jthet_val' is not defined

In [129]:
print( thet0.get_value() ); print( thet1.get_value() )


151.746080721
942.701275851

In [134]:
test_lin_out = np.array( [ test_lin( x ) for x in diabetes_X1 ] )

In [137]:
plt.plot(diabetes_X1,diabetes_Y,'ro')
plt.plot(diabetes_X1,test_lin_out)


Out[137]:
[<matplotlib.lines.Line2D at 0x7f4037fd3790>]

In [38]:
if any([x.op.__class__.__name__ in ['GpuGemm','GpuGemv'] for x in train_lin.maker.fgraph.toposort()]):
    print("Used the gpu")
else:
    print(train_lin.maker.fgraph.toposort())


[Elemwise{Composite{(((i0 * i1) + i2) - i3)}}(thet1, x1, thet0, ylin), Elemwise{Composite{(i0 - (i1 * i2))}}[(0, 0)](thet0, TensorConstant{0.00999999977648}, Elemwise{Composite{(((i0 * i1) + i2) - i3)}}.0), Elemwise{Composite{(i0 - (i1 * i2 * i3))}}[(0, 0)](thet1, TensorConstant{0.00999999977648}, Elemwise{Composite{(((i0 * i1) + i2) - i3)}}.0, x1), Elemwise{Composite{(i0 * sqr(i1))}}[(0, 1)](TensorConstant{0.5}, Elemwise{Composite{(((i0 * i1) + i2) - i3)}}.0)]

In [39]:
if np.any([isinstance(x.op,T.Elemwise) for x in train_lin.maker.fgraph.toposort()]):
    print("Used the cpu")


Used the cpu

In [ ]:

Linear Algebra and theano

cf. Week 1, Linear Algebra Review, Coursera, Machine Learning with Ng

I'll take this opportunity to provide a dictionary between the syntax of linear algebra math and numpy.

Essentially, what I did was take Coursera's Week 1, Linear Algebra Review and then translated the math into theano, and in particular, running theano on the GPU.

Other reference that I used was

https://simplyml.com/linear-algebra-shootout-numpy-vs-theano-vs-tensorflow-2/

Linear Algebra Shootout: NumPy vs. Theano vs. TensorFlow by Charanpal Dhanjal - 14/07/16


In [69]:
A = T.matrix('A')
B = T.matrix('B')
#matadd = function([A,B], A+B)
#matadd = function([A,B],sandbox.cuda.basic_ops.gpu_from_host(A+B) )
# Note: we are just defining the expressions, nothing is evaluated here!  
C = sandbox.cuda.basic_ops.gpu_from_host(A+B)
matadd = function([A,B], C)

In [121]:
#A = T.dmatrix('A')
#B = T.dmatrix('B')

A = T.matrix('A')
B = T.matrix('B')

C_out = A + B
matadd_CPU = function([A,B], C_out)

In [73]:
A_eg = shared( np.array([[8,6,9],[10,1,10]]), 'float32')
B_eg = shared( np.array([[3,10,2],[6,1,-1]]), 'float32')

In [85]:
A_eg_CPU = np.array([[8,6,9],[10,1,10]])
B_eg_CPU = np.array([[3,10,2],[6,1,-1]])

In [96]:
print(A_eg_CPU)
print( type( A_eg_CPU ))
print( A_eg_CPU.shape)
print( B_eg_CPU.shape)


[[ 8  6  9]
 [10  1 10]]
<type 'numpy.ndarray'>
(2, 3)
(2, 3)

In [70]:
print( matadd.maker.fgraph.toposort() )


[GpuFromHost(B), GpuFromHost(A), GpuElemwise{Add}[(0, 0)](GpuFromHost.0, GpuFromHost.0)]

In [122]:
print( matadd_CPU.maker.fgraph.toposort() )


[GpuFromHost(B), GpuFromHost(A), GpuElemwise{Add}[(0, 0)](GpuFromHost.0, GpuFromHost.0), HostFromGpu(GpuElemwise{Add}[(0, 0)].0)]

In [71]:
matadd( A_eg, B_eg)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-71-dd6866f5ad02> in <module>()
----> 1 matadd( A_eg, B_eg)

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    784                         s.storage[0] = s.type.filter(
    785                             arg, strict=s.strict,
--> 786                             allow_downcast=s.allow_downcast)
    787 
    788                     except Exception as e:

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/tensor/type.pyc in filter(self, data, strict, allow_downcast)
     84         if isinstance(data, Variable):
     85             raise TypeError(
---> 86                 'Expected an array-like object, but found a Variable: '
     87                 'maybe you are trying to call a function on a (possibly '
     88                 'shared) variable instead of a numeric array?')

TypeError: ('Bad input argument to theano function with name "<ipython-input-69-40293d053d65>:7"  at index 0(0-based)', 'Expected an array-like object, but found a Variable: maybe you are trying to call a function on a (possibly shared) variable instead of a numeric array?')

The way to do it, to "force" on the GPU, is like this (cf. Speeding up your Neural Network with Theano and the GPU - Wild ML):


In [76]:
np.random.randn( *A_eg_CPU.shape )


Out[76]:
array([[ 1.01645633,  0.49394088, -0.22904526],
       [-0.44323914, -0.27948502,  0.335645  ]])

In [78]:
C_out = theano.shared( np.random.randn( *A_eg_CPU.shape).astype('float32') )

In [80]:
C_out.type()


Out[80]:
<CudaNdarrayType(float32, matrix)>

In [110]:
#A_in = shared( A_eg_CPU, "float32")
#A_in = shared( A_eg_CPU, "float32")

A_in = shared( A_eg_CPU.astype("float32"), "float32")
B_in = shared( B_eg_CPU.astype("float32"), "float32")
#C_out_GPU = A_in + B_in
C_out_GPU = sandbox.cuda.basic_ops.gpu_from_host(A_in+B_in)

In [111]:
matadd_GPU = theano.function( [], C_out_GPU)

In [112]:
C_out_GPU_result = matadd_GPU()

In [113]:
C_out_GPU_result


Out[113]:
CudaNdarray([[ 11.  16.  11.]
 [ 16.   2.   9.]])

Notice how DIFFERENT this setup or syntax is: we have to set up tensor or matrix shared variables A_n, B_in, which are then used to define the theano function, theano.function. "By using shared variables we ensure that they are present in the GPU memory". cf. Linear Algebra Shootout: NumPy vs. Theano vs. TensorFlow


In [114]:
print( matadd_GPU.maker.fgraph.toposort() )


[GpuElemwise{add,no_inplace}(float32, float32)]

In [128]:
#if np.any([isinstance(C_out_GPU.op, tensor.Elemwise ) and 
if np.any([isinstance( C_out_GPU.op, T.Elemwise ) and 
           ('Gpu' not in type( C_out_GPU.op).__name__) for x in matadd_GPU.maker.fgraph.toposort()]) :
    print('Used the cpu')
else:
    print('Used the gpu')


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-128-eedabe693a3f> in <module>()
      1 #if np.any([isinstance(C_out_GPU.op, tensor.Elemwise ) and
      2 if np.any([isinstance( C_out_GPU.op, T.Elemwise ) and 
----> 3            ('Gpu' not in type( C_out_GPU.op).__name__) for x in matadd_GPU.maker.fgraph.toposort()]) :
      4     print('Used the cpu')
      5 else:

AttributeError: 'CudaNdarrayVariable' object has no attribute 'op'

In [ ]:


In [124]:
matadd_CPU( A_eg_CPU.astype("float32"), B_eg_CPU.astype("float32") )


Out[124]:
array([[ 11.,  16.,  11.],
       [ 16.,   2.,   9.]], dtype=float32)

In [55]:
type(A_eg)


Out[55]:
theano.tensor.sharedvar.TensorSharedVariable

In [59]:
print( type( numpy.asarray(rng.rand(2000)) ) )
numpy.asarray(rng.rand(2000)).shape


<type 'numpy.ndarray'>
Out[59]:
(2000,)

Bottom Line: there are 2 ways of doing linear algebra on the GPU

  1. symbolic computation with the usual arguments
$$ A + B = C \in \text{Mat}_{\mathbb{R}}(M,N) $$

$ \forall \, A, B \in \text{Mat}_{\mathbb{R}}(M,N)$


In [132]:
A = T.matrix('A')
B = T.matrix('B')

C = sandbox.cuda.basic_ops.gpu_from_host( A + B ) # vs. 
# C = A + B  # this will result in an output array on the host, as opposed to CudaNdarray on device
matadd = function([A,B], C)

In [133]:
print( matadd.maker.fgraph.toposort() )


[GpuFromHost(B), GpuFromHost(A), GpuElemwise{Add}[(0, 0)](GpuFromHost.0, GpuFromHost.0)]

In [134]:
matadd( A_eg_CPU.astype("float32"), B_eg_CPU.astype("float32") )


Out[134]:
CudaNdarray([[ 11.  16.  11.]
 [ 16.   2.   9.]])
  1. with shared variables

In [140]:
A_in = shared( A_eg_CPU.astype("float32"), "float32")  # initialize with the input values, A_eg_CPU, anyway
B_in = shared( B_eg_CPU.astype("float32"), "float32")  # initialize with the input values B_eg_CPU, anyway

# C_out = A_in + B_in # this version will output to the host as a numpy.ndarray
# indeed, reading the graph,
"""
[GpuElemwise{add,no_inplace}(float32, float32), HostFromGpu(GpuElemwise{add,no_inplace}.0)]
"""
# this version immediately below, in 1 line, will result in a CudaNdarray on device
C_out = sandbox.cuda.basic_ops.gpu_from_host(A_in+B_in)

matadd_GPU = theano.function( [], C_out)

In [141]:
print( matadd_GPU.maker.fgraph.toposort() )


[GpuElemwise{add,no_inplace}(float32, float32)]

In [142]:
C_out_result = matadd_GPU()

In [143]:
C_out_result


Out[143]:
CudaNdarray([[ 11.  16.  11.]
 [ 16.   2.   9.]])

In [149]:
A_2 = np.array( [[4,5],[1,7] ])

In [145]:
a = T.scalar('a')

F = sandbox.cuda.basic_ops.gpu_from_host( a*A )
scalarmul = theano.function([a,A],F)

In [146]:
print( scalarmul.maker.fgraph.toposort() )


[GpuFromHost(A), GpuFromHost(a), GpuDimShuffle{x,x}(GpuFromHost.0), GpuElemwise{Mul}[(0, 1)](GpuDimShuffle{x,x}.0, GpuFromHost.0)]

In [150]:
scalarmul( np.float32( 2.), A_2.astype("float32"))


Out[150]:
CudaNdarray([[  8.  10.]
 [  2.  14.]])

Composition; Confirming that you can do composition of scalar multiplication on a matrix (or ring) addition

Being able to do composition is very important in math


In [152]:
scalarmul( np.float32(2.), matadd( A_eg_CPU.astype("float32"), B_eg_CPU.astype("float32") ) )


Out[152]:
CudaNdarray([[ 22.  32.  22.]
 [ 32.   4.  18.]])

In [156]:
u = T.vector('u')
v = T.vector('v')

w = sandbox.cuda.basic_ops.gpu_from_host( u + v)
vecadd = theano.function( [u,v],w)

t = sandbox.cuda.basic_ops.gpu_from_host( a * u)
scalarmul_vec = theano.function([a,u], t)

In [157]:
print(vecadd.maker.fgraph.toposort())  
print(scalarmul_vec.maker.fgraph.toposort())


[GpuFromHost(v), GpuFromHost(u), GpuElemwise{Add}[(0, 0)](GpuFromHost.0, GpuFromHost.0)]
[GpuFromHost(u), GpuFromHost(a), GpuDimShuffle{x}(GpuFromHost.0), GpuElemwise{Mul}[(0, 1)](GpuDimShuffle{x}.0, GpuFromHost.0)]

In [162]:
u_eg = np.array( [4,6,7], dtype="float32")
v_eg = np.array( [2,1,0], dtype="float32")

print( u_eg.shape)


(3,)

In [161]:
scalarmul_vec( np.float32(0.5), u_eg )


Out[161]:
CudaNdarray([ 2.   3.   3.5])

In [163]:
vecadd( scalarmul_vec( np.float32(0.5), u_eg ) , scalarmul_vec( np.float32(-3.), v_eg )  )


Out[163]:
CudaNdarray([-4.   0.   3.5])

This was the computer equivalent to mathematical expression:

$$ \left[ \begin{matrix} 4 \\ 6 \\ 7 \end{matrix} \right] /2 - 3 * \left[ \begin{matrix} 2 \\ 1 \\ 0 \end{matrix} \right] $$

sAxy or A-V multiplication or so-called "Gemv", or Matrix Multiplication on a vector, or linear transformation on a R-module, or vector space

i.e.

$$ Av = B $$

In [164]:
B_out = sandbox.cuda.basic_ops.gpu_from_host( T.dot(A,v))
AVmul = theano.function([A,v], B_out)
print(AVmul.maker.fgraph.toposort())


[GpuFromHost(v), GpuFromHost(A), Shape_i{0}(A), GpuAllocEmpty(Shape_i{0}.0), GpuGemv{inplace}(GpuAllocEmpty.0, TensorConstant{1.0}, GpuFromHost.0, GpuFromHost.0, TensorConstant{0.0})]

In [165]:
AVmul( np.array([[1,0,3],[2,1,5],[3,1,2]]).astype("float32"), np.array([1,6,2]).astype("float32"))


Out[165]:
CudaNdarray([  7.  18.  13.])

In [168]:
AVmul( np.array([[1,0,0],[0,1,0],[0,0,1]]).astype("float32"), np.array([1,6,2]).astype("float32"))


Out[168]:
CudaNdarray([ 1.  6.  2.])

AB or Gemm or Matrix Multiplication, i.e. Ring multiplication

i.e.
$$ A*B = C $$


In [166]:
C_f = sandbox.cuda.basic_ops.gpu_from_host( T.dot(A,B)) 
matmul = theano.function([A,B], C_f)
print( matmul.maker.fgraph.toposort())


[GpuFromHost(B), GpuFromHost(A), GpuDot22(GpuFromHost.0, GpuFromHost.0)]

In [167]:
matmul( np.array( [[1,3],[2,4],[0,5]]  ).astype("float32"), np.array([[1,0],[2,3]]).astype("float32")  )


Out[167]:
CudaNdarray([[  7.   9.]
 [ 10.  12.]
 [ 10.  15.]])

Inverse and Transpose

cf. Inverse and Transpose


In [170]:
Ainverse = sandbox.cuda.basic_ops.gpu_from_host( T.inv(A))
Ainv = theano.function([A], Ainverse)
print(Ainv.maker.fgraph.toposort())


[GpuFromHost(A), GpuElemwise{Inv}[(0, 0)](GpuFromHost.0)]

In [172]:
Atranspose = sandbox.cuda.basic_ops.gpu_from_host( A.T)
AT = theano.function([A],Atranspose)
print(AT.maker.fgraph.toposort())


[GpuFromHost(A), GpuDimShuffle{1,0}(GpuFromHost.0)]

In [ ]:


In [ ]:

Summation, sum, mean, scan


In [ ]:

Linear Regression (again), via Coursera's Machine Learning Intro by Ng, Programming Exercise 1 for Week 2

Boilerplate, load sample data


In [9]:
linregdata = pd.read_csv('./coursera_Ng/machine-learning-ex1/ex1/ex1data1.txt', header=None)

In [11]:
X_linreg_training = linregdata.as_matrix([0])  # pandas.DataFrame.as_matrix convert frame to its numpy-array representation
y_linreg_training = linregdata.as_matrix([1])
m_linreg_training = len(y_linreg_training)  # number of training examples  
print( X_linreg_training.shape, type(X_linreg_training)) 
print( y_linreg_training.shape, type(y_linreg_training)) 
print m_linreg_training


((97, 1), <type 'numpy.ndarray'>)
((97, 1), <type 'numpy.ndarray'>)
97

Try representing $\theta$, parameters or "weights", of size $|\theta|$ which should be equal to the number of features $n$ (or $d$).


In [109]:
# theta_linreg = T.vector('theta_linreg')
d = X_linreg_training.shape[1] # d = features

# Declare Theano symbolic variables
X = T.matrix('x')
y = T.vector('y')

Preprocess training data (due to numpy's treatment of arrays) (note, this is not needed, if you use pandas to choose which column(s) you want to make into a numpy array)


In [188]:
#X_linreg_training = X_linreg_training.reshape( m_linreg_training,1)
#y_linreg_training = y_linreg_training.reshape( m_linreg_training,1)

In [12]:
# Instead, the training data X and test data values y are going to be represented by Theano symbolic variable above
#X_linreg = theano.shared(X_linreg_training.astype("float32"),"float32")
#y_linreg = theano.shared(y_linreg_training.astype("float32"),"float32")

In [110]:
#theta_0 = np.zeros( ( d+1,1)); print(theta_0)
theta_0 = np.zeros(  d+1); print(theta_0)


[ 0.  0.]

In [111]:
theta = theano.shared( theta_0.astype("float32"), "theta")

In [89]:
alpha = np.float32(0.01) # learning rate gamma or alpha

In [112]:
# Construct Theano "expression graph"

predicted_vals = sandbox.cuda.basic_ops.gpu_from_host( T.dot(X,theta) )  # h_{\theta}
m = np.float32( y_linreg_training.shape[0] ) 
J_theta = sandbox.cuda.basic_ops.gpu_from_host( 
    T.dot( (T.dot(X,theta) - y).T, T.dot(X,theta) - y)  * np.float32( 0.5 ) * np.float32( 1./ m )   
    ) # cost function

In [113]:
update_theta = sandbox.cuda.basic_ops.gpu_from_host( 
        theta - alpha * T.grad( J_theta, theta) )

In [114]:
gradientDescent = theano.function( 
                            inputs=[X,y],
                            outputs=[predicted_vals,J_theta],  
                            updates=[(theta, update_theta)], 
                            name = "gradientDescent")

In [115]:
print( gradientDescent.maker.fgraph.toposort() )


[GpuFromHost(x), Shape_i{0}(x), GpuFromHost(y), GpuAllocEmpty(TensorConstant{1}), GpuDimShuffle{1,0}(GpuFromHost.0), GpuAllocEmpty(Shape_i{0}.0), GpuGemv{inplace}(GpuAllocEmpty.0, TensorConstant{1.0}, GpuFromHost.0, theta, TensorConstant{0.0}), GpuElemwise{Sub}[(0, 1)](GpuGemv{inplace}.0, GpuFromHost.0), GpuDimShuffle{0}(GpuElemwise{Sub}[(0, 1)].0), GpuDimShuffle{x,0}(GpuElemwise{Sub}[(0, 1)].0), GpuGemv{inplace}(GpuAllocEmpty.0, TensorConstant{1.0}, GpuDimShuffle{x,0}.0, GpuDimShuffle{0}.0, TensorConstant{0.0}), GpuElemwise{Mul}[(0, 1)](CudaNdarrayConstant{[ 0.00515464]}, GpuElemwise{Sub}[(0, 1)].0), GpuDimShuffle{}(GpuGemv{inplace}.0), GpuGemv{inplace}(theta, TensorConstant{-0.019999999553}, GpuDimShuffle{1,0}.0, GpuElemwise{Mul}[(0, 1)].0, TensorConstant{1.0}), GpuElemwise{Mul}[(0, 1)](CudaNdarrayConstant{0.00515463901684}, GpuDimShuffle{}.0)]

In [116]:
num_iters = 1500
J_History = []

Preprocess X to include intercepts


In [95]:
input_X_linreg = np.hstack( ( np.ones((m_linreg_training,1)), X_linreg_training ) ).astype("float32")

In [96]:
y_linreg_training_processed = y_linreg_training.reshape( m_linreg_training,).astype("float32")

In [117]:
J_History = [0 for iter in range(num_iters)]
for iter in range(num_iters):
    predicted_vals_out, J_out = \
        gradientDescent(input_X_linreg.astype("float32"), y_linreg_training_processed.astype("float32") ) 
    J_History[iter] = J_out

In [45]:
Deg = (np.random.randn(40,10).astype("float32"), np.random.randint(size=40,low=0,high=2).astype("float32") )

In [46]:
Deg[0].shape


Out[46]:
(40, 10)

In [47]:
Deg[1].shape


Out[47]:
(40,)

In [118]:
theta.get_value()


Out[118]:
array([-3.63029242,  1.1663624 ], dtype=float32)

In [101]:
dir( J_History[0] )


Out[101]:
['__add__',
 '__array__',
 '__class__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__doc__',
 '__format__',
 '__getattribute__',
 '__getitem__',
 '__hash__',
 '__iadd__',
 '__idiv__',
 '__init__',
 '__len__',
 '__new__',
 '__radd__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_dev_data',
 '_set_shape_i',
 '_set_stride',
 '_strides',
 'base',
 'copy',
 'dtype',
 'exp',
 'gpudata',
 'is_c_contiguous',
 'mem_size',
 'ndim',
 'reduce_sum',
 'reshape',
 'shape',
 'size',
 'strides',
 'take',
 'view',
 'zeros']

In [121]:
J_History[-5].gpudata


Out[121]:
47407335424

In [120]:
plt.plot( [ele.gpudata for ele in J_History])


Out[120]:
[<matplotlib.lines.Line2D at 0x7ffa1c17a7d0>]

In [73]:
input_X_linreg.shape


Out[73]:
(97, 2)

In [122]:
# GPU NOTE: Conversion to float32 to store them on the GPU!
X = theano.shared( input_X_linreg.astype('float32'), name='X' )
y = theano.shared( y_linreg_training.astype('float32'), name='y')

In [123]:
# GPU NOTE: Conversion to float32 to store them on the GPU!  
theta = theano.shared( np.vstack(theta_0).astype("float32"), name='theta')

In [136]:
# Construct Theano "expression graph"

predicted_vals = sandbox.cuda.basic_ops.gpu_from_host( 
    T.dot(X,theta) )  # h_{\theta}
m = np.float32( y_linreg_training.shape[0] )
# cost function J_theta, J_{\theta}
J_theta = sandbox.cuda.basic_ops.gpu_from_host( 
    (
        T.dot( (T.dot(X,theta) - y).T, T.dot(X,theta) - y) * np.float32(0.5) * np.float32( 1./m)  
    ).reshape([]) )   # cost function  # reshape is to force "broadcast" into 0-dim. scalar for cost function

In [137]:
update_theta = sandbox.cuda.basic_ops.gpu_from_host( 
        theta - alpha * T.grad( J_theta, theta) )

In [138]:
# Note that we removed the input values because we will always use the same shared variable
# GPU Note: Removed the input values to avoid copying data to the GPU.
gradientDescent = theano.function( 
                            inputs=[],
#                            outputs=[predicted_vals,J_theta],  
                            updates=[(theta, update_theta)], 
                            name = "gradientDescent")

In [139]:
print( gradientDescent.maker.fgraph.toposort() )


[Shape_i{1}(theta), GpuDimShuffle{1,0}(X), GpuGemm{no_inplace}(y, TensorConstant{1.0}, X, theta, TensorConstant{-1.0}), MakeVector{dtype='int64'}(Shape_i{1}.0, Shape_i{1}.0), GpuReshape{2}(CudaNdarrayConstant{1.0}, MakeVector{dtype='int64'}.0), GpuElemwise{mul,no_inplace}(CudaNdarrayConstant{[[ 0.00515464]]}, GpuReshape{2}.0), GpuDot22(GpuGemm{no_inplace}.0, GpuElemwise{mul,no_inplace}.0), GpuDimShuffle{1,0}(GpuElemwise{mul,no_inplace}.0), GpuDot22(GpuGemm{no_inplace}.0, GpuDimShuffle{1,0}.0), GpuGemm{inplace}(theta, TensorConstant{-0.00999999977648}, GpuDimShuffle{1,0}.0, GpuDot22.0, TensorConstant{1.0}), GpuGemm{inplace}(GpuGemm{inplace}.0, TensorConstant{-0.00999999977648}, GpuDimShuffle{1,0}.0, GpuDot22.0, TensorConstant{1.0})]

In [141]:
#J_History = [0 for iter in range(num_iters)]
for iter in range(num_iters):
    gradientDescent( )

In [78]:
print( np.vstack( theta_0).shape )
print( y_linreg_training.shape )


(2, 1)
(97, 1)

In [149]:
theta.get_value()


Out[149]:
array([[-3.63076854],
       [ 1.16641033]], dtype=float32)

In [152]:
# Profiling
print( theano.config.profile )  # Do the vm/cvm linkers profile the execution time of Theano functions?
print( theano.config.profile_memory ) # Do the vm/cvm linkers profile the memory usage of Theano functions? It only works when profile=True.


False
False

In [154]:
theano.printing.debugprint(gradientDescent)


GpuGemm{inplace} [id A] ''   10
 |GpuGemm{inplace} [id B] ''   9
 | |theta [id C]
 | |TensorConstant{-0.00999999977648} [id D]
 | |GpuDimShuffle{1,0} [id E] ''   1
 | | |X [id F]
 | |GpuDot22 [id G] ''   8
 | | |GpuGemm{no_inplace} [id H] ''   2
 | | | |y [id I]
 | | | |TensorConstant{1.0} [id J]
 | | | |X [id F]
 | | | |theta [id C]
 | | | |TensorConstant{-1.0} [id K]
 | | |GpuDimShuffle{1,0} [id L] ''   7
 | |   |GpuElemwise{mul,no_inplace} [id M] ''   5
 | |     |CudaNdarrayConstant{[[ 0.00515464]]} [id N]
 | |     |GpuReshape{2} [id O] ''   4
 | |       |CudaNdarrayConstant{1.0} [id P]
 | |       |MakeVector{dtype='int64'} [id Q] ''   3
 | |         |Shape_i{1} [id R] ''   0
 | |         | |theta [id C]
 | |         |Shape_i{1} [id R] ''   0
 | |TensorConstant{1.0} [id J]
 |TensorConstant{-0.00999999977648} [id D]
 |GpuDimShuffle{1,0} [id E] ''   1
 |GpuDot22 [id S] ''   6
 | |GpuGemm{no_inplace} [id H] ''   2
 | |GpuElemwise{mul,no_inplace} [id M] ''   5
 |TensorConstant{1.0} [id J]

In [157]:
#print( gradientDescent.profile.print_summary() )
dir( gradientDescent.profile)


Out[157]:
['__class__',
 '__delattr__',
 '__doc__',
 '__format__',
 '__getattribute__',
 '__hash__',
 '__init__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__']

Testing the Linear Regression with (Batch) Gradient Descent classes in ./ML/


In [9]:
import sys
import os

In [10]:
#sys.path.append( os.getcwd() + '/ML')
sys.path.append( os.getcwd() + '/ML' )

In [10]:
from linreg_gradDes import LinearReg, LinearReg_loaded
#from ML import LinearReg, LinearReg_loaded

Boilerplate for sample input data


In [11]:
linregdata1 = pd.read_csv('./coursera_Ng/machine-learning-ex1/ex1/ex1data1.txt', header=None)
linregdata1.as_matrix([0]).shape
linregdata1.as_matrix([1]).shape


Out[11]:
(97, 1)

In [12]:
features = linregdata1.as_matrix([0]).shape[1]
numberoftraining = linregdata1.as_matrix([0]).shape[0]
LinReg_housing = LinearReg( features, numberoftraining , 0.01)

In [13]:
Xin   = LinReg_housing.preprocess_X( linregdata1.as_matrix([0]))
ytest = linregdata1.as_matrix([1]).flatten()

In [14]:
%time LinReg_housing.build_model( Xin, ytest )


CPU times: user 171 ms, sys: 28.9 ms, total: 200 ms
Wall time: 198 ms
Out[14]:
array([-3.63029242,  1.1663624 ], dtype=float32)

In [19]:
LinRegloaded_housing = LinearReg_loaded( linregdata1.as_matrix([0]), linregdata1.as_matrix([1]), 
                                        features, numberoftraining )

In [20]:
%time LinRegloaded_housing.build_model()


CPU times: user 136 ms, sys: 16.4 ms, total: 152 ms
Wall time: 132 ms
Out[20]:
array([[-3.63028979],
       [ 1.16636217]], dtype=float32)

In [53]:
print( LinReg_housing.gradientDescent.maker.fgraph.toposort() )
print( LinRegloaded_housing.gradientDescent.maker.fgraph.toposort() )


[GpuFromHost(X), Shape_i{0}(X), GpuFromHost(y), GpuAllocEmpty(TensorConstant{1}), GpuDimShuffle{1,0}(GpuFromHost.0), GpuAllocEmpty(Shape_i{0}.0), GpuGemv{inplace}(GpuAllocEmpty.0, TensorConstant{1.0}, GpuFromHost.0, theta, TensorConstant{0.0}), GpuElemwise{Sub}[(0, 1)](GpuGemv{inplace}.0, GpuFromHost.0), GpuDimShuffle{0}(GpuElemwise{Sub}[(0, 1)].0), GpuDimShuffle{x,0}(GpuElemwise{Sub}[(0, 1)].0), GpuGemv{inplace}(GpuAllocEmpty.0, TensorConstant{1.0}, GpuDimShuffle{x,0}.0, GpuDimShuffle{0}.0, TensorConstant{0.0}), GpuElemwise{Mul}[(0, 1)](CudaNdarrayConstant{[ 0.00515464]}, GpuElemwise{Sub}[(0, 1)].0), GpuDimShuffle{}(GpuGemv{inplace}.0), GpuGemv{inplace}(theta, TensorConstant{-0.019999999553}, GpuDimShuffle{1,0}.0, GpuElemwise{Mul}[(0, 1)].0, TensorConstant{1.0}), GpuElemwise{Mul}[(0, 1)](CudaNdarrayConstant{0.00515463901684}, GpuDimShuffle{}.0)]
[Shape_i{1}(theta), GpuDimShuffle{1,0}(X), GpuGemm{no_inplace}(y, TensorConstant{1.0}, X, theta, TensorConstant{-1.0}), MakeVector{dtype='int64'}(Shape_i{1}.0, Shape_i{1}.0), GpuReshape{2}(CudaNdarrayConstant{1.0}, MakeVector{dtype='int64'}.0), GpuElemwise{mul,no_inplace}(CudaNdarrayConstant{[[ 0.00515464]]}, GpuReshape{2}.0), GpuDot22(GpuGemm{no_inplace}.0, GpuElemwise{mul,no_inplace}.0), GpuDimShuffle{1,0}(GpuElemwise{mul,no_inplace}.0), GpuDot22(GpuGemm{no_inplace}.0, GpuDimShuffle{1,0}.0), GpuGemm{inplace}(theta, TensorConstant{-0.00999999977648}, GpuDimShuffle{1,0}.0, GpuDot22.0, TensorConstant{1.0}), GpuGemm{inplace}(GpuGemm{inplace}.0, TensorConstant{-0.00999999977648}, GpuDimShuffle{1,0}.0, GpuDot22.0, TensorConstant{1.0})]

Other (sample) datasets

Consider feature normalization


In [40]:
def featureNormalize(X):
    """
    FEATURENORMALIZE Normalizes the features in X  
    FEATURENORMALIZE(X) returns a normalized version of X where  
    the mean value of each feature is 0 and the standard deviation  
    is 1.  This is often a good preprocessing step to do when 
    working with learning algorithms.
    
    """
    # You need to set these values correctly  
    X_norm = (X-X.mean(axis=0))/X.std(axis=0)
    mu = X.mean(axis=0)
    sigma = X.std(axis=0)
    
    return [X_norm, mu, sigma]

In [41]:
linregdata2 = pd.read_csv('./coursera_Ng/machine-learning-ex1/ex1/ex1data2.txt', header=None)

In [42]:
features = linregdata2.as_matrix().shape[1] - 1
numberoftraining = linregdata2.as_matrix().shape[0]
Xdat  = linregdata2.as_matrix( range(features) )
ytest = linregdata2.as_matrix( [features])

In [46]:
[Xnorm, mus,sigmas] = featureNormalize(Xdat)

In [48]:
LinReg_housing2 = LinearReg( features, numberoftraining, 0.01)
processed_X = LinReg_housing2.preprocess_X( Xnorm )

In [49]:
%time LinReg_housing2.build_model( processed_X, ytest.flatten(), 400)


CPU times: user 59.1 ms, sys: 8.88 ms, total: 67.9 ms
Wall time: 66.2 ms
Out[49]:
array([ 334302.125    ,   99411.4609375,    3267.0065918], dtype=float32)

In [50]:
LinRegloaded_housing2 = LinearReg_loaded( Xnorm, ytest, 
                                        features, numberoftraining )

In [51]:
%time LinRegloaded_housing2.build_model(  400)


CPU times: user 48.9 ms, sys: 2.49 ms, total: 51.3 ms
Wall time: 45.4 ms
Out[51]:
array([[ 334302.21875   ],
       [  99411.453125  ],
       [   3267.00976562]], dtype=float32)
Diabetes data from sklearn, sci-kit learn

In [ ]:


In [54]:
# Load the diabetes dataset
diabetes = sklearn.datasets.load_diabetes()

In [55]:
diabetes_X = diabetes.data
diabetes_Y = diabetes.target

In [61]:
#diabetes_X1 = diabetes_X[:,np.newaxis,2]
diabetes_X1 = diabetes_X[:,np.newaxis, 2].astype(theano.config.floatX)
#diabetes_Y  = diabetes_Y.reshape( diabetes_Y.shape[0], 1)
diabetes_Y = np.vstack( diabetes_Y.astype(theano.config.floatX) )

In [63]:
features1 = 1 
numberoftraining = diabetes_Y.shape[0]

In [67]:
LinReg_diabetes = LinearReg( features1, numberoftraining, 0.01)

In [70]:
processed_X = LinReg_diabetes.preprocess_X( diabetes_X1 )

In [71]:
%time LinReg_diabetes.build_model( processed_X, diabetes_Y.flatten(), 10000)


CPU times: user 753 ms, sys: 118 ms, total: 871 ms
Wall time: 867 ms
Out[71]:
array([ 152.13273621,  192.24055481], dtype=float32)

In [74]:
LinRegloaded_diabetes = LinearReg_loaded( diabetes_X1, diabetes_Y, 
                                        features1, numberoftraining )

In [75]:
%time LinRegloaded_diabetes.build_model(  10000)


CPU times: user 707 ms, sys: 45.2 ms, total: 752 ms
Wall time: 718 ms
Out[75]:
array([[ 152.13198853],
       [ 192.2406311 ]], dtype=float32)

Multiple number of features case:


In [76]:
features = diabetes_X.shape[1]

In [77]:
LinReg_diabetes = LinearReg( features, numberoftraining, 0.01)
processed_X = LinReg_diabetes.preprocess_X( diabetes_X )

In [78]:
%time LinReg_diabetes.build_model( processed_X, diabetes_Y.flatten(), 10000)


CPU times: user 855 ms, sys: 113 ms, total: 968 ms
Wall time: 964 ms
Out[78]:
array([ 152.13273621,   40.02508163,   -5.81352949,  162.25823975,
        117.35097504,   38.3995285 ,   24.88706589, -100.40937042,
         99.55418396,  149.29826355,   92.1962738 ], dtype=float32)

In [79]:
LinRegloaded_diabetes = LinearReg_loaded( diabetes_X, diabetes_Y, 
                                        features, numberoftraining )

In [80]:
%time LinRegloaded_diabetes.build_model(  10000)


CPU times: user 702 ms, sys: 64.4 ms, total: 766 ms
Wall time: 728 ms
Out[80]:
array([[ 152.13198853],
       [  40.02506256],
       [  -5.81354237],
       [ 162.25799561],
       [ 117.35108948],
       [  38.39954376],
       [  24.88703156],
       [-100.40942383],
       [  99.55430603],
       [ 149.29826355],
       [  92.1962738 ]], dtype=float32)

ex2 Linear Regression, on d=2 features


In [5]:
data_ex1data2 = pd.read_csv('./coursera_Ng/machine-learning-ex1/ex1/ex1data2.txt', header=None)
X_ex1data2 = data_ex1data2.iloc[:,0:2]
y_ex1data2 = data_ex1data2.iloc[:,2]
m_ex1data2 = y_ex1data2.shape[0]
X_ex1data2=X_ex1data2.values.astype(np.float32)
y_ex1data2=y_ex1data2.values.reshape((m_ex1data2,1)).astype(np.float32)
print(type(X_ex1data2))
print(type(y_ex1data2))
print(X_ex1data2.shape)
print(y_ex1data2.shape)
print(m_ex1data2)
print(X_ex1data2[:5])
print(y_ex1data2[:5])


<type 'numpy.ndarray'>
<type 'numpy.ndarray'>
(47, 2)
(47, 1)
47
[[  2.10400000e+03   3.00000000e+00]
 [  1.60000000e+03   3.00000000e+00]
 [  2.40000000e+03   3.00000000e+00]
 [  1.41600000e+03   2.00000000e+00]
 [  3.00000000e+03   4.00000000e+00]]
[[ 399900.]
 [ 329900.]
 [ 369000.]
 [ 232000.]
 [ 539900.]]

In [46]:
((X_ex1data2[:,1] - X_ex1data2[:,1].mean())/( X_ex1data2[:,1].std()) ).std()


Out[46]:
0.99999988

In [49]:
# feature Normalize
#X_ex1data2_norm = sklearn.preprocessing.Normalizer.transform(X_ex1data2 )
X_ex1data2_norm = (X_ex1data2  - np.mean(X_ex1data2, axis=0)) / np.std(X_ex1data2, axis=0)
print(X_ex1data2_norm[:,0].mean())
print(X_ex1data2_norm[:,0].std())
print(X_ex1data2_norm[:,1].mean())
print(X_ex1data2_norm[:,1].std())


-7.10183e-08
1.0
2.69489e-08
1.0

In [51]:
# X_ex1data2_norm[:5];

In [61]:
X=T.matrix(dtype=theano.config.floatX)
y=T.matrix(dtype=theano.config.floatX)

Theta=theano.shared(np.zeros((2,1)).astype(theano.config.floatX))
b = theano.shared(np.zeros(1).astype(theano.config.floatX))

In [16]:
print(b.get_value().shape)


(1,)

In [62]:
yhat = T.dot( X, Theta) + b

In [63]:
# L2 norm
J = np.cast[theano.config.floatX](0.5)*T.mean( T.sqr( yhat-y))

In [64]:
alpha=0.01  # learning rate
# sandbox.cuda.basic_ops.gpu_from_host
updateThetab = [ Theta-np.float32(alpha)*T.grad(J,Theta), b-np.float32(alpha)*T.grad(J,b)]
gradientDescent_step = theano.function(inputs=[X,y], 
                                          outputs=J,
                                          updates = zip([Theta,b],updateThetab) )

In [66]:
num_iters =400
JList=[]
for iter in range(num_iters):
    err = gradientDescent_step(X_ex1data2_norm,y_ex1data2)
    JList.append(err)

In [67]:
# Final mode:
print(Theta.get_value())
print(b.get_value())


[[ 99411.44601356]
 [  3267.01771421]]
[ 334302.0699632]

In [72]:
# JList[-10:]
plt.plot(JList)
plt.show()


Multi-class Classification

cf. ex3, Programming Exercise 3: Multi-class Classification and Neural Networks, Machine Learning

1 Multi-class Classification


In [83]:
os.getcwd()


Out[83]:
'/home/topolo/PropD/MLgrabbag'

In [86]:
os.listdir( './coursera_Ng/machine-learning-ex3/' )


Out[86]:
['ex3.pdf', 'ex3']

In [87]:
os.listdir( './coursera_Ng/machine-learning-ex3/ex3' )


Out[87]:
['ex3data1.mat',
 'submit.m',
 'sigmoid.m',
 'lrCostFunction.m',
 'predictOneVsAll.m',
 'oneVsAll.m',
 'predict.m',
 'ex3.m',
 'lib',
 'displayData.m',
 'ex3_nn.m',
 'fmincg.m',
 'ex3weights.mat']

In [90]:
# Load saved matrices from file 
multiclscls_data = scipy.io.loadmat('./coursera_Ng/machine-learning-ex3/ex3/ex3data1.mat')

import the classes from ML


In [9]:
import sys
import os

In [9]:
os.getcwd()


Out[9]:
'/home/topolo/PropD/MLgrabbag'

In [10]:
#sys.path.append( os.getcwd() + '/ML')
sys.path.append( os.getcwd() + '/ML' )

In [11]:
from gradDes import LogReg

In [12]:
# Test case for Cost function J_{\theta} with regularization

theta_t = np.vstack( np.array( [-2, -1, 1, 2]) )
X_t = np.array( [i/10. for i in range(1,16)]).reshape((3,5)).T
#X_t = np.hstack( ( np.ones((5,1)), X_t) ) # no need to preprocess the input data X with column of 1's
y_t = np.vstack( np.array( [1,0,1,0,1]))

In [13]:
MulClsCls_digits = LogReg( X_t, y_t, 3,5,0.01, 3.  )

In [14]:
MulClsCls_digits.calculate_cost()


Out[14]:
CudaNdarray(0.125)

In [19]:
MulClsCls_digits.z.get_value()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-19-f79030e398ab> in <module>()
----> 1 MulClsCls_digits.z.get_value()

AttributeError: 'CudaNdarrayVariable' object has no attribute 'get_value'

In [21]:
print( MulClsCls_digits.X.get_value() )
MulClsCls_digits.y.get_value()


[[ 1.          0.1         0.60000002  1.10000002]
 [ 1.          0.2         0.69999999  1.20000005]
 [ 1.          0.30000001  0.80000001  1.29999995]
 [ 1.          0.40000001  0.89999998  1.39999998]
 [ 1.          0.5         1.          1.5       ]]
Out[21]:
array([[ 1.],
       [ 0.],
       [ 1.],
       [ 0.],
       [ 1.]], dtype=float32)

In [23]:
calc_z_test = theano.function([], MulClsCls_digits.z)

In [24]:
calc_z_test()


Out[24]:
CudaNdarray([[ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]])

In [25]:
MulClsCls_digits.theta.set_value( theta_t.astype('float32') )

In [27]:
calc_z_test()


Out[27]:
CudaNdarray([[ 0.70000017]
 [ 0.9000001 ]
 [ 1.0999999 ]
 [ 1.29999983]
 [ 1.5       ]])

In [35]:
MulClsCls_digits.calculate_cost()


Out[35]:
CudaNdarray(1.93287348747)

In [43]:
print( 1/(1+np.exp( np.dot( -np.hstack( ( np.ones((5,1)), X_t) ), theta_t) ) )  )
h_test = 1/(1+np.exp( np.dot( -np.hstack( ( np.ones((5,1)), X_t) ), theta_t) ) ) 
print( np.dot( (h_test - y_t).T, h_test- y_t) * 0.5/5 ) # non-regularized J_theta cost term
np.dot( theta_t[1:].T, theta_t[1:]) * 3 / (2.* 5)


[[ 0.66818777]
 [ 0.7109495 ]
 [ 0.75026011]
 [ 0.78583498]
 [ 0.81757448]]
[[ 0.13287343]]
Out[43]:
array([[ 1.8]])

In [33]:
MulClsCls_digits.predict()


Out[33]:
array([[ 0.66818786],
       [ 0.71094954],
       [ 0.75026011],
       [ 0.78583503],
       [ 0.81757444]], dtype=float32)

In [ ]:
MulClsCls_digit

In [44]:
theano.config.floatX


Out[44]:
'float32'

In [ ]:


In [ ]:

Neural Networks

Model representation

cf. 2 Neural Networks, 2.1 Model representation, ex3.pdf


In [45]:
os.getcwd()


Out[45]:
'/home/topolo/PropD/MLgrabbag'

In [46]:
os.listdir( './coursera_Ng/machine-learning-ex3/' )


Out[46]:
['ex3.pdf', 'ex3']

In [47]:
os.listdir( './coursera_Ng/machine-learning-ex3/ex3/' )


Out[47]:
['ex3data1.mat',
 'submit.m',
 'sigmoid.m',
 'token.mat',
 'lrCostFunction.m',
 'predictOneVsAll.m',
 'oneVsAll.m',
 'predict.m',
 'ex3.m',
 'lib',
 'displayData.m',
 'ex3_nn.m',
 'fmincg.m',
 'ex3weights.mat']

$ \Theta_1, \Theta_2 $


In [48]:
# Load saved matrices from file 
nn3_data = scipy.io.loadmat('./coursera_Ng/machine-learning-ex3/ex3/ex3weights.mat')

In [53]:
print( nn3_data.keys() )
print( type( nn3_data['Theta1']) )
print( type( nn3_data['Theta2']) )
print( nn3_data['Theta1'].shape )
print( nn3_data['Theta2'].shape )


['Theta2', '__version__', '__header__', 'Theta1', '__globals__']
<type 'numpy.ndarray'>
<type 'numpy.ndarray'>
(25, 401)
(10, 26)

In [54]:
Theta1[0]


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-54-ff0b751d0b88> in <module>()
----> 1 Theta1[0]

NameError: name 'Theta1' is not defined

Feedforward


In [61]:
%load_ext tikzmagic


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-61-445a5d6380b0> in <module>()
----> 1 get_ipython().magic(u'load_ext tikzmagic')

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s)
   2161         magic_name, _, magic_arg_s = arg_s.partition(' ')
   2162         magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2163         return self.run_line_magic(magic_name, magic_arg_s)
   2164 
   2165     #-------------------------------------------------------------------------

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
   2082                 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
   2083             with self.builtin_trap:
-> 2084                 result = fn(*args,**kwargs)
   2085             return result
   2086 

<decorator-gen-64> in load_ext(self, module_str)

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
    191     # but it's overkill for just that one bit of state.
    192     def magic_deco(arg):
--> 193         call = lambda f, *a, **k: f(*a, **k)
    194 
    195         if callable(arg):

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/magics/extension.pyc in load_ext(self, module_str)
     64         if not module_str:
     65             raise UsageError('Missing module name.')
---> 66         res = self.shell.extension_manager.load_extension(module_str)
     67 
     68         if res == 'already loaded':

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/extensions.pyc in load_extension(self, module_str)
     82             if module_str not in sys.modules:
     83                 with prepended_to_syspath(self.ipython_extension_dir):
---> 84                     __import__(module_str)
     85             mod = sys.modules[module_str]
     86             if self._call_load_ipython_extension(mod):

ImportError: No module named tikzmagic
$$ \begin{tikzpicture} \matrix (m) [matrix of math nodes, row sep=3em, column sep=4em, minimum width=2em] { \mathbb{R}^{s_l} & \mathbb{R}^{ s_l +1 } & \mathbb{R}^{s_{l+1} } & \mathbb{R}^{s_{l+1} } \\ a^{(l)} & (a_0^{(l)} = 1, a^{(l)} ) & z^{(l+1)} & g(z^{(l+1)}) = a^{(l+1)} \\ }; \path[->] (m-1-1) edge node [above] {$a_0^{(l)}=1$} (m-1-2) (m-1-2) edge node [above] {$\Theta^{(l)}$} (m-1-3) (m-1-3) edge node [above] {$g$} (m-1-4) ; \path[|->] (m-2-1) edge node [above] {$a_0^{(l)}=1$} (m-2-2) (m-2-2) edge node [above] {$\Theta^{(l)}$} (m-2-3) (m-2-3) edge node [above] {$g$} (m-2-4) ; \end{tikzpicture} $$

In [ ]:


In [66]:
np.random.seed(0)
s_l = 400 # (layer) size of layer l, i.e. number of nodes, units in layer l
s_lp1 = 25
al = theano.shared( np.random.randn(s_l+1,1).astype('float32'), name="al")
#alp1 = theano.shared( np.random.randn(s_lp1,1).astype('float32'), name="al")
#Thetal = theano.shared( np.random.randn( s_lp1,s_l+1).astype('float32') , name="Thetal")

# Feedforward, forward propagation
#z = T.dot( Thetal, al)
#g = T.nnet.sigmoid( z)


---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-66-ca1c7934296c> in <module>()
      2 s_l = 400 # (layer) size of layer l, i.e. number of nodes, units in layer l
      3 s_lp1 = 25
----> 4 al = theano.shared( np.random.randn(s_l+1,1).astype('float32'), name="al")
      5 #alp1 = theano.shared( np.random.randn(s_lp1,1).astype('float32'), name="al")
      6 #Thetal = theano.shared( np.random.randn( s_lp1,s_l+1).astype('float32') , name="Thetal")

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/sharedvalue.pyc in shared(value, name, strict, allow_downcast, **kwargs)
    245             try:
    246                 var = ctor(value, name=name, strict=strict,
--> 247                            allow_downcast=allow_downcast, **kwargs)
    248                 utils.add_tag_trace(var)
    249                 return var

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/sandbox/cuda/var.pyc in float32_shared_constructor(value, name, strict, allow_downcast, borrow, broadcastable, target)
    238         # type.broadcastable is guaranteed to be a tuple, which this next
    239         # function requires
--> 240         deviceval = type_support_filter(value, type.broadcastable, False, None)
    241 
    242     try:

RuntimeError: Cuda error 'unspecified launch failure' while copying %lli data element to device memory

In [8]:
s_l = 25
s_lp1 = 10

In [19]:
rng = np.random.RandomState(99)
Theta_values = np.asarray( rng.uniform( 
    low=-np.sqrt( 6. / (s_l+ s_lp1)), 
    high=np.sqrt( 6./(s_l + s_lp1)), size=(s_lp1,s_l+1)), dtype=theano.config.floatX )
print( Theta_values.shape )
print( Theta_values.dtype )
#Theta_values *= np.float32(4)
Theta_values *= 4.

print( Theta_values.dtype)
Theta_values.shape


(10, 26)
float32
float32
Out[19]:
(10, 26)

In [14]:
np.float32( 4)


Out[14]:
(10, 26)

From Deep Learning Tutorials of LISA lab of University of Montreal; logistic_sgd.py, mlp.py


In [8]:
%env


Out[8]:
{'BASH_FUNC_module()': '() {  eval `/usr/bin/modulecmd bash $*`\n}',
 'BASH_FUNC_scl()': '() {  local CMD=$1;\n if [ "$CMD" = "load" -o "$CMD" = "unload" ]; then\n eval "module $@";\n else\n /usr/bin/scl "$@";\n fi\n}',
 'CLICOLOR': '1',
 'CVS_RSH': 'ssh',
 'DBUS_SESSION_BUS_ADDRESS': 'unix:abstract=/tmp/dbus-N7ppIfAX8e,guid=e833f00fdfe69074bad66b6a58a4c651',
 'DESKTOP_SESSION': 'gnome',
 'DISPLAY': ':1',
 'GDMSESSION': 'gnome',
 'GDM_LANG': 'en_US.UTF-8',
 'GIT_PAGER': 'cat',
 'GJS_DEBUG_OUTPUT': 'stderr',
 'GJS_DEBUG_TOPICS': 'JS ERROR;JS LOG',
 'GNOME_DESKTOP_SESSION_ID': 'this-is-deprecated',
 'HISTCONTROL': 'ignoredups',
 'HISTSIZE': '1000',
 'HOME': '/home/topolo',
 'HOSTNAME': 'localhost.localdomain',
 'JPY_PARENT_PID': '3902',
 'LANG': 'en_US.UTF-8',
 'LD_LIBRARY_PATH': '/usr/local/lib:/usr/local/lib::/usr/local/cuda/lib64:/usr/local/lib64:/usr/local/cuda/lib64:/usr/local/lib64',
 'LESSOPEN': '||/usr/bin/lesspipe.sh %s',
 'LOADEDMODULES': '',
 'LOGNAME': 'topolo',
 'LS_COLORS': 'rs=0:di=38;5;33:ln=38;5;51:mh=00:pi=40;38;5;11:so=38;5;13:do=38;5;5:bd=48;5;232;38;5;11:cd=48;5;232;38;5;3:or=48;5;232;38;5;9:mi=01;05;37;41:su=48;5;196;38;5;15:sg=48;5;11;38;5;16:ca=48;5;196;38;5;226:tw=48;5;10;38;5;16:ow=48;5;10;38;5;21:st=48;5;21;38;5;15:ex=38;5;40:*.tar=38;5;9:*.tgz=38;5;9:*.arc=38;5;9:*.arj=38;5;9:*.taz=38;5;9:*.lha=38;5;9:*.lz4=38;5;9:*.lzh=38;5;9:*.lzma=38;5;9:*.tlz=38;5;9:*.txz=38;5;9:*.tzo=38;5;9:*.t7z=38;5;9:*.zip=38;5;9:*.z=38;5;9:*.Z=38;5;9:*.dz=38;5;9:*.gz=38;5;9:*.lrz=38;5;9:*.lz=38;5;9:*.lzo=38;5;9:*.xz=38;5;9:*.bz2=38;5;9:*.bz=38;5;9:*.tbz=38;5;9:*.tbz2=38;5;9:*.tz=38;5;9:*.deb=38;5;9:*.rpm=38;5;9:*.jar=38;5;9:*.war=38;5;9:*.ear=38;5;9:*.sar=38;5;9:*.rar=38;5;9:*.alz=38;5;9:*.ace=38;5;9:*.zoo=38;5;9:*.cpio=38;5;9:*.7z=38;5;9:*.rz=38;5;9:*.cab=38;5;9:*.jpg=38;5;13:*.jpeg=38;5;13:*.gif=38;5;13:*.bmp=38;5;13:*.pbm=38;5;13:*.pgm=38;5;13:*.ppm=38;5;13:*.tga=38;5;13:*.xbm=38;5;13:*.xpm=38;5;13:*.tif=38;5;13:*.tiff=38;5;13:*.png=38;5;13:*.svg=38;5;13:*.svgz=38;5;13:*.mng=38;5;13:*.pcx=38;5;13:*.mov=38;5;13:*.mpg=38;5;13:*.mpeg=38;5;13:*.m2v=38;5;13:*.mkv=38;5;13:*.webm=38;5;13:*.ogm=38;5;13:*.mp4=38;5;13:*.m4v=38;5;13:*.mp4v=38;5;13:*.vob=38;5;13:*.qt=38;5;13:*.nuv=38;5;13:*.wmv=38;5;13:*.asf=38;5;13:*.rm=38;5;13:*.rmvb=38;5;13:*.flc=38;5;13:*.avi=38;5;13:*.fli=38;5;13:*.flv=38;5;13:*.gl=38;5;13:*.dl=38;5;13:*.xcf=38;5;13:*.xwd=38;5;13:*.yuv=38;5;13:*.cgm=38;5;13:*.emf=38;5;13:*.ogv=38;5;13:*.ogx=38;5;13:*.aac=38;5;45:*.au=38;5;45:*.flac=38;5;45:*.m4a=38;5;45:*.mid=38;5;45:*.midi=38;5;45:*.mka=38;5;45:*.mp3=38;5;45:*.mpc=38;5;45:*.ogg=38;5;45:*.ra=38;5;45:*.wav=38;5;45:*.oga=38;5;45:*.opus=38;5;45:*.spx=38;5;45:*.xspf=38;5;45:',
 'MAIL': '/var/spool/mail/topolo',
 'MODULEPATH': '/etc/scl/modulefiles:/etc/scl/modulefiles:/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles',
 'MODULESHOME': '/usr/share/Modules',
 'OLDPWD': '/',
 'PAGER': 'cat',
 'PATH': '/usr/local/cuda-7.5/bin:/home/topolo/Public/anaconda2/bin:/usr/local/cuda-7.5/bin:/home/topolo/Public/anaconda2/bin:/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/home/topolo/.local/bin:/home/topolo/bin',
 'PWD': '/home/topolo',
 'QT_IM_MODULE': 'ibus',
 'SESSION_MANAGER': 'local/unix:@/tmp/.ICE-unix/2593,unix/unix:/tmp/.ICE-unix/2593',
 'SHELL': '/bin/bash',
 'SHLVL': '2',
 'SSH_ASKPASS': '/usr/libexec/openssh/gnome-ssh-askpass',
 'SSH_AUTH_SOCK': '/run/user/1001/keyring/ssh',
 'TERM': 'xterm-color',
 'THEANO_FLAGS': 'mode=FAST_RUN,floatX=float32,device=gpu0,lib.cnmem=0.80',
 'USER': 'topolo',
 'USERNAME': 'topolo',
 'VTE_VERSION': '4205',
 'WINDOWID': '29360134',
 'WINDOWPATH': '2',
 'XAUTHORITY': '/run/user/1001/gdm/Xauthority',
 'XDG_CURRENT_DESKTOP': 'GNOME',
 'XDG_MENU_PREFIX': 'gnome-',
 'XDG_RUNTIME_DIR': '/run/user/1001',
 'XDG_SEAT': 'seat0',
 'XDG_SESSION_DESKTOP': 'gnome',
 'XDG_SESSION_ID': '1',
 'XDG_SESSION_TYPE': 'x11',
 'XDG_VTNR': '2',
 'XMODIFIERS': '@im=ibus',
 '_': '/home/topolo/Public/anaconda2/bin/jupyter'}

In [10]:
os.getcwd()


Out[10]:
'/home/topolo/PropD/MLgrabbag'

In [11]:
print( sys.path )


['', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/thermopy-0.5.4-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/pytools-2016.2.6-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/appdirs-1.4.0-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python27.zip', '/home/topolo/Public/anaconda2/lib/python2.7', '/home/topolo/Public/anaconda2/lib/python2.7/plat-linux2', '/home/topolo/Public/anaconda2/lib/python2.7/lib-tk', '/home/topolo/Public/anaconda2/lib/python2.7/lib-old', '/home/topolo/Public/anaconda2/lib/python2.7/lib-dynload', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/PyDispatcher-2.0.5-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/Sphinx-1.3.5-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/characteristic-14.3.0-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/cssselect-0.9.1-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/pyasn1_modules-0.0.5-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/service_identity-14.0.0-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/setuptools-20.3-py2.7.egg', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages', '/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/extensions', '/home/topolo/.ipython']

In [12]:
#sys.path.append( os.getcwd() + '/ML')
sys.path.append( '../DeepLearningTutorials/code/' )

In [14]:
#from logistic_sgd import LogisticRegression, load_data, sgd_optimization_mnist, predict
import logistic_sgd

In [10]:
MNIST_MTLdat = logistic_sgd.load_data("../DeepLearningTutorials/data/mnist.pkl.gz") # list of training data


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-1526aa8fd8a5> in <module>()
----> 1 MNIST_MTLdat = logistic_sgd.load_data("../DeepLearningTutorials/data/mnist.pkl.gz") # list of training data

NameError: name 'logistic_sgd' is not defined

In [36]:
print(len(MNIST_MTLdat))
print(type(MNIST_MTLdat))
for ele in MNIST_MTLdat: print type(ele), len(ele) # test_set_x, test_set_y, valid_set_x, valid_set_y, train_set_x,


3
<type 'list'>
<type 'tuple'> 2
<type 'tuple'> 2
<type 'tuple'> 2

In [37]:
print( MNIST_MTLdat[0][0].get_value().shape)
print( type(MNIST_MTLdat[0][1]))
print( MNIST_MTLdat[0][1].get_scalar_constant_value )


(50000, 784)
<class 'theano.tensor.var.TensorVariable'>
<bound method TensorVariable.get_scalar_constant_value of Elemwise{Cast{int32}}.0>

In [38]:
print( type( MNIST_MTLdat[1][1] ) )
MNIST_MTLdat[1][1].shape


<class 'theano.tensor.var.TensorVariable'>
Out[38]:
Shape.0

In [39]:
dir(MNIST_MTLdat[0][1]) ;

In [25]:
import gzip
import six.moves.cPickle as pickle
with gzip.open("../DeepLearningTutorials/data/mnist.pkl.gz", 'rb') as f:
    try:
        train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
    except:
        train_set, valid_set, test_set = pickle.load(f)

In [26]:
print( type( train_set[0] ))
print( train_set[0].shape )
print( type( train_set[1]))
print( train_set[1].shape )
print( type( valid_set[0] ))
print( valid_set[0].shape )
print( type( valid_set[1]))
print( valid_set[1].shape )
print( type( test_set[0] ))
print( test_set[0].shape )
print( type( test_set[1]))
print( test_set[1].shape )


<type 'numpy.ndarray'>
(50000, 784)
<type 'numpy.ndarray'>
(50000,)
<type 'numpy.ndarray'>
(10000, 784)
<type 'numpy.ndarray'>
(10000,)
<type 'numpy.ndarray'>
(10000, 784)
<type 'numpy.ndarray'>
(10000,)

In [15]:
X = train_set[0].T

In [17]:
pd.DataFrame(X.T).describe()


Out[17]:
0 1 2 3 4 5 6 7 8 9 ... 774 775 776 777 778 779 780 781 782 783
count 50000.0 50000.0 50000.0 50000.0 50000.0 50000.0 50000.0 50000.0 50000.0 50000.0 ... 50000.000000 50000.000000 50000.000000 50000.000000 50000.000000 50000.000000 50000.0 50000.0 50000.0 50000.0
mean 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.000739 0.000354 0.000204 0.000090 0.000071 0.000009 0.0 0.0 0.0 0.0
std 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.022778 0.015422 0.012079 0.007217 0.007181 0.001483 0.0 0.0 0.0 0.0
min 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0
25% 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0
50% 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0
75% 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0
max 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.992188 0.992188 0.988281 0.988281 0.992188 0.242188 0.0 0.0 0.0 0.0

8 rows × 784 columns


In [18]:
28*28


Out[18]:
784

In [94]:
X_i = theano.shared( X.astype("float32"))

In [95]:
m = X_i.get_value().shape[1]

In [45]:
a1 = T.stack( [ theano.shared( np.ones((1,m)).astype("float32") ) , X_i ] , axis=1 )

In [41]:
print( type(a1) )
#print( a1.get_scalar_constant_value() )
dir(a1)
a1.get_parents()


<class 'theano.tensor.var.TensorVariable'>
Out[41]:
[Join(TensorConstant{2}, DimShuffle{0,1,x}.0, DimShuffle{0,1,x}.0)]

In [46]:
a1.ndim


Out[46]:
3

In [96]:
a1_0 = theano.shared( np.ones((1,m)).astype("float32"),name='a1_0')

In [97]:
a1 = T.stack( [a1_0,X_i], axis=0)

In [98]:
d = X_i.get_value().shape[0]
s_2 = d/2
rng1 = np.random.RandomState(1234)
Theta1_values = np.asarray( rng1.uniform( low=-np.sqrt(6./(d+s_2)),high=np.sqrt(6./(d+s_2)),size=(s_2,d+1)),
                           dtype=theano.config.floatX)
Theta1 = theano.shared(value=Theta1_values, name="Theta",borrow=True)

In [99]:
#rng1.uniform( low=-np.sqrt(6./(d+s_2)),high=np.sqrt(6./(d+s_2)),size=(s_2,d+1))
z1 = T.dot( Theta1, a1)
a2 = T.tanh(z1)

In [100]:
passthru1 = theano.function( [], a2)

In [101]:
print(d)
passthru1()


784
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-101-eaf6b97a7ef3> in <module>()
      1 print(d)
----> 2 passthru1()

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    869                     node=self.fn.nodes[self.fn.position_of_error],
    870                     thunk=thunk,
--> 871                     storage_map=getattr(self.fn, 'storage_map', None))
    872             else:
    873                 # old-style linkers raise their own exceptions

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
    312         # extra long error message in that case.
    313         pass
--> 314     reraise(exc_type, exc_value, exc_trace)
    315 
    316 

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    857         t0_fn = time.time()
    858         try:
--> 859             outputs = self.fn()
    860         except Exception:
    861             if hasattr(self.fn, 'position_of_error'):

ValueError: GpuJoin: Wrong inputs for input 0 related to inputs 0.!
Apply node that caused the error: GpuJoin(TensorConstant{2}, GpuDimShuffle{0,1,x}.0, GpuDimShuffle{0,1,x}.0)
Toposort index: 9
Inputs types: [TensorType(int8, scalar), CudaNdarrayType(float32, (False, False, True)), CudaNdarrayType(float32, (False, False, True))]
Inputs shapes: [(), (1, 50000, 1), (784, 50000, 1)]
Inputs strides: [(), (0, 1, 0), (50000, 1, 0)]
Inputs values: [array(2, dtype=int8), 'not shown', 'not shown']
Outputs clients: [[GpuDimShuffle{1,0,2}(GpuJoin.0)]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

In [108]:
print(X.shape)
X_i = theano.shared( X.astype("float32"))
#m = X_i.get_value().shape[1]
m = X.shape[1]
print(m)
a1_0 = theano.shared( np.ones((1,m)).astype("float32"),name='a1_0')
print(a1_0.get_value().shape)
a1 = T.stack( [a1_0,X_i], axis=0)
addintercept = theano.function([],a1)


(784, 50000)
50000
(1, 50000)

In [109]:
addintercept()


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-109-f3e06b86efb8> in <module>()
----> 1 addintercept()

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    869                     node=self.fn.nodes[self.fn.position_of_error],
    870                     thunk=thunk,
--> 871                     storage_map=getattr(self.fn, 'storage_map', None))
    872             else:
    873                 # old-style linkers raise their own exceptions

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
    312         # extra long error message in that case.
    313         pass
--> 314     reraise(exc_type, exc_value, exc_trace)
    315 
    316 

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    857         t0_fn = time.time()
    858         try:
--> 859             outputs = self.fn()
    860         except Exception:
    861             if hasattr(self.fn, 'position_of_error'):

ValueError: GpuJoin: Wrong inputs for input 1 related to inputs 0.!
Apply node that caused the error: GpuJoin(TensorConstant{0}, GpuDimShuffle{x,0,1}.0, GpuDimShuffle{x,0,1}.0)
Toposort index: 2
Inputs types: [TensorType(int8, scalar), CudaNdarrayType(float32, (True, False, False)), CudaNdarrayType(float32, (True, False, False))]
Inputs shapes: [(), (1, 1, 50000), (1, 784, 50000)]
Inputs strides: [(), (0, 0, 1), (0, 50000, 1)]
Inputs values: [array(0, dtype=int8), 'not shown', 'not shown']
Outputs clients: [[HostFromGpu(GpuJoin.0)]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

In [131]:
d = X_i.get_value().shape[0]
print(d)
s_2 = d/2
print(s_2)
rng1 = np.random.RandomState(1234)
Theta1_values = np.asarray( rng1.uniform( low=-np.sqrt(6./(d+s_2)),high=np.sqrt(6./(d+s_2)),size=(s_2,d)),
                           dtype=theano.config.floatX)
Theta1 = theano.shared(value=Theta1_values, name="Theta1",borrow=True)
b_values = np.vstack( np.zeros(s_2) ).astype(theano.config.floatX)
b1 = theano.shared(value=b_values, name='b1',borrow=True)
a1_values=np.array( np.zeros( (d,m)), dtype=theano.config.floatX)
a1 = theano.shared(value=a1_values, name='a1', borrow=True)
lin_z2 = T.dot( Theta1, a1) + T.tile(b1,(1,m))
#lin_z2 = T.dot( Theta1, a1)


784
392

In [132]:
test_mult = theano.function([],lin_z2)

print( type(b_values))
b_values.dtype


<type 'numpy.ndarray'>
Out[132]:
dtype('float32')

In [133]:
test_mult()


Out[133]:
array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]], dtype=float32)

In [126]:
print( b1.get_value().shape )
T.tile( b1, (0,m))


(392, 1)
Out[126]:
Reshape{2}.0

NN.py, load NN.py for Layer class for Neural Net for Multiple Layers


In [13]:
import sys
import os

In [14]:
#sys.path.append( os.getcwd() + '/ML')
sys.path.append( os.getcwd() + '/ML' )

In [11]:
from NN import Layer, cost_functional, cost_functional_noreg, gradientDescent_step

Boilerplate sample data, from Coursera's Machine Learning Introduction


In [12]:
# Load Training Data
print("Loading and Visualizing Data ... \n")
ex4data1 = scipy.io.loadmat('./coursera_Ng/machine-learning-ex4/ex4/ex4data1.mat')


Loading and Visualizing Data ... 


In [14]:
ex4data1.keys()


Out[14]:
['y', 'X', '__version__', '__header__', '__globals__']

In [16]:
print( ex4data1['X'].shape )
print( ex4data1['y'].shape )


(5000, 400)
(5000, 1)

In [13]:
test_rng = np.random.RandomState(1234)
#Theta1 = Layer( test_rng, 1, 400,25, 5000)

In [29]:
#help(Theta1.al.set_value); # Beginning with Theano 0.3.1, set_value will work in-place on the GPU, if ... source on CPU
Theta1.al.set_value( ex4data1['X'].T.astype(theano.config.floatX))

In [21]:
Theta1.alp1


Out[21]:
Elemwise{tanh,no_inplace}.0

In [30]:
print( type( Theta1.alp1 ) )
Theta2 = Layer( test_rng, 2, 25,10,5000, al=Theta1.alp1 )


<class 'theano.tensor.var.TensorVariable'>

In [24]:
Theta2.alp1


Out[24]:
Elemwise{tanh,no_inplace}.0

In [35]:
predicted = theano.function([],sandbox.cuda.basic_ops.gpu_from_host( Theta2.alp1 ) )

In [37]:
predicted().shape


Out[37]:
(10, 5000)

In [39]:
print( ex4data1['y'].shape )
pd.DataFrame( ex4data1['y']).describe()


(5000, 1)
Out[39]:
0
count 5000.000000
mean 5.500000
std 2.872569
min 1.000000
25% 3.000000
50% 5.500000
75% 8.000000
max 10.000000

In [21]:
# recall that whereas the original labels (in the variable y) were 1, 2, ..., 10, for the purpose of training a 
# neural network, we need to recode the labels as vectors containing only values 0 or 1
K=10
m = ex4data1['y'].shape[0]
y_prob = [np.zeros(K) for row in ex4data1['y']]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_prob[i][ ex4data1['y'][i]-1] = 1
y_prob = np.array(y_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print(y_prob.shape)


(10, 5000)

In [47]:
print( type(y_prob) )
type( np.asarray( y_prob, dtype=theano.config.floatX) )


<type 'numpy.ndarray'>
Out[47]:
numpy.ndarray

In [48]:
help( T.nlinalg.trace )


Help on function trace in module theano.tensor.nlinalg:

trace(X)
    Returns the sum of diagonal elements of matrix X.
    
    Notes
    -----
    Works on GPU since 0.6rc4.


In [49]:
y_sh_var = theano.shared( np.asarray( y_prob,dtype=theano.config.floatX),name='y')

In [54]:
h_test = Theta2.alp1
J = sandbox.cuda.basic_ops.gpu_from_host(
        (-T.nlinalg.trace( T.dot( T.log( h_test ), y_sh_var.T)) - T.nlinalg.trace( 
        T.dot( T.log( np.float32(1.)-h_test),(np.float32(1.)- y_sh_var.T ) )))/np.float32(m)
    )

In [55]:
print(type(J))
test_cost_func = theano.function([],J)


<class 'theano.sandbox.cuda.var.CudaNdarrayVariable'>

In [56]:
test_cost_func()


Out[56]:
CudaNdarray(nan)

In [58]:
J_test_build = sandbox.cuda.basic_ops.gpu_from_host( -T.nlinalg.trace( T.dot( T.log(h_test),y_sh_var.T) ) )
test_cost_build_func = theano.function([], J_test_build)

In [59]:
test_cost_build_func()


Out[59]:
CudaNdarray(nan)

Sanity check using ex4.m, Exercise 4 or Programming Exercise 4 from Coursera's Machine Learning Introduction by Ng


In [14]:
Theta_testvals = scipy.io.loadmat('./coursera_Ng/machine-learning-ex4/ex4/ex4weights.mat')

In [16]:
print( Theta_testvals.keys() )
print( Theta_testvals['Theta1'].shape )
print( Theta_testvals['Theta2'].shape )
Theta1_testval = Theta_testvals['Theta1'][:,1:]
b1_testval = Theta_testvals['Theta1'][:,0:1]
print( Theta1_testval.shape )
print( b1_testval.shape )
Theta2_testval = Theta_testvals['Theta2'][:,1:]
b2_testval = Theta_testvals['Theta2'][:,0:1]
print( Theta2_testval.shape )
print( b2_testval.shape )


['Theta2', '__version__', '__header__', 'Theta1', '__globals__']
(25, 401)
(10, 26)
(25, 400)
(25, 1)
(10, 25)
(10, 1)

In [15]:
Theta1 = Layer( test_rng, 1, 400,25, 5000, activation=T.nnet.sigmoid)

In [17]:
Theta1.Theta.set_value( Theta1_testval.astype("float32"))
Theta1.b.set_value( b1_testval.astype('float32') )
Theta1.al.set_value( ex4data1['X'].T.astype('float32'))

For $\Theta^{(2)}$, the key to connecting $\Theta^{(2)}$ with $\Theta^{(1)}$ is to set the argument in class Layer with al=Theta1.alp1,


In [18]:
Theta2 = Layer( test_rng, 2, 25,10,5000, al=Theta1.alp1 , activation=T.nnet.sigmoid)

In [19]:
Theta2.Theta.set_value( Theta2_testval.astype('float32'))
Theta2.b.set_value( b2_testval.astype('float32'))

In [107]:
h_test = Theta2.alp1
J = sandbox.cuda.basic_ops.gpu_from_host(
    T.mean( T.sum( 
        - y_sh_var * T.log( h_test ) - ( np.float32( 1) - y_sh_var) * T.log( np.float32(1) - h_test), axis =0), axis=0)
  )
#J = sandbox.cuda.basic_ops.gpu_from_host( 
#    T.log(h_test) * y_sh_var
#    )

In [108]:
test_cost_func = theano.function([],J)

In [109]:
test_cost_func()


Out[109]:
CudaNdarray(0.287629187107)

In [80]:
print(type( y_sh_var) )
print( y_sh_var.get_value().shape )
print( type( h_test ))


<class 'theano.sandbox.cuda.var.CudaNdarraySharedVariable'>
(10, 5000)
<class 'theano.tensor.var.TensorVariable'>

In [94]:
checklayer2 = theano.function([], sandbox.cuda.basic_ops.gpu_from_host(Theta1.alp1))

In [102]:
checklayer2()


Out[102]:
CudaNdarray([[ 0.49435964  0.49435964  0.49435964 ...,  0.49435964  0.49435964
   0.49435964]
 [ 0.47542453  0.47542453  0.47542453 ...,  0.47542453  0.47542453
   0.47542453]
 [ 0.52900642  0.52900642  0.52900642 ...,  0.52900642  0.52900642
   0.52900642]
 ..., 
 [ 0.45432255  0.45432255  0.45432255 ...,  0.45432255  0.45432255
   0.45432255]
 [ 0.33134761  0.33134761  0.33134761 ...,  0.33134761  0.33134761
   0.33134761]
 [ 0.41315612  0.41315612  0.41315612 ...,  0.41315612  0.41315612
   0.41315612]])

In [112]:
testreg = theano.function([], T.sum( Theta1.Theta * Theta1.Theta ) )

In [113]:
testreg()


Out[113]:
array(392.58416748046875, dtype=float32)

In [114]:
range(1,3)


Out[114]:
[1, 2]

In [115]:
Thetas_lst = [ Theta1.Theta, Theta2.Theta ]

In [118]:
T.sum( [ T.sum( theta*theta) for theta in Thetas_lst] )


Out[118]:
Sum{acc_dtype=float64}.0

In [22]:
cost_func_test = cost_functional(3, 1, y_prob, Theta2.alp1, [Theta1.Theta, Theta2.Theta])

In [23]:
cost_test = theano.function([], cost_func_test)

In [24]:
cost_test() # (this value should be about 0.383770)


Out[24]:
CudaNdarray(0.383769869804)

In [26]:
grad_test = T.grad( cost_func_test,[Theta1.Theta, Theta2.Theta])

In [27]:
grad_test_test = theano.function([], grad_test)

In [31]:
print( type(grad_test_test() ) )
print( len( grad_test_test() ))
print( type(grad_test_test()[0] ))
print( grad_test_test()[0].shape )
print( grad_test_test()[1].shape )


<type 'list'>
2
<type 'numpy.ndarray'>
(25, 400)
(10, 25)

In [41]:
print( range(6))
print( list( "Ernest") )
zip( range(6), list("Ernest"))
print( type(grad_test))


[0, 1, 2, 3, 4, 5]
['E', 'r', 'n', 'e', 's', 't']
<type 'list'>

In [40]:
print( grad_test_test.maker.fgraph.toposort() )


[Shape_i{1}(b1), Shape_i{0}(b1), GpuDimShuffle{1,0}(al), Shape_i{1}(y), Shape_i{1}(b2), Shape_i{0}(b2), GpuDimShuffle{1,0}(Theta2), GpuElemwise{mul,no_inplace}(CudaNdarrayConstant{[[  9.99999975e-05]]}, Theta2), GpuElemwise{mul,no_inplace}(CudaNdarrayConstant{[[  9.99999975e-05]]}, Theta1), GpuAlloc(b1, TensorConstant{1}, TensorConstant{5000}, Shape_i{0}.0, Shape_i{1}.0), InplaceDimShuffle{x,x}(Shape_i{1}.0), GpuAlloc(b2, TensorConstant{1}, TensorConstant{5000}, Shape_i{0}.0, Shape_i{1}.0), Elemwise{Mul}[(0, 1)](TensorConstant{5000}, Shape_i{1}.0), Elemwise{Mul}[(0, 1)](TensorConstant{5000}, Shape_i{1}.0), GpuDimShuffle{0,2,1,3}(GpuAlloc.0), Elemwise{Cast{float32}}(InplaceDimShuffle{x,x}.0), GpuDimShuffle{0,2,1,3}(GpuAlloc.0), MakeVector{dtype='int64'}(Shape_i{0}.0, Elemwise{Mul}[(0, 1)].0), MakeVector{dtype='int64'}(Shape_i{0}.0, Elemwise{Mul}[(0, 1)].0), GpuFromHost(Elemwise{Cast{float32}}.0), GpuReshape{2}(GpuDimShuffle{0,2,1,3}.0, MakeVector{dtype='int64'}.0), GpuReshape{2}(GpuDimShuffle{0,2,1,3}.0, MakeVector{dtype='int64'}.0), GpuElemwise{true_div,no_inplace}(CudaNdarrayConstant{[[-1.]]}, GpuFromHost.0), GpuGemm{inplace}(GpuReshape{2}.0, TensorConstant{1.0}, Theta1, al, TensorConstant{1.0}), GpuElemwise{ScalarSigmoid}[(0, 0)](GpuGemm{inplace}.0), GpuDimShuffle{1,0}(GpuElemwise{ScalarSigmoid}[(0, 0)].0), GpuGemm{inplace}(GpuReshape{2}.0, TensorConstant{1.0}, Theta2, GpuElemwise{ScalarSigmoid}[(0, 0)].0, TensorConstant{1.0}), GpuElemwise{Composite{(((i0 * i1 * (i2 - scalar_sigmoid(i3))) / i4) - (i5 * (i2 - i1) * scalar_sigmoid(i3)))}}[(0, 3)](CudaNdarrayConstant{[[-1.]]}, y, CudaNdarrayConstant{[[ 1.]]}, GpuGemm{inplace}.0, GpuFromHost.0, GpuElemwise{true_div,no_inplace}.0), GpuGemm{inplace}(GpuElemwise{mul,no_inplace}.0, TensorConstant{1.0}, GpuElemwise{Composite{(((i0 * i1 * (i2 - scalar_sigmoid(i3))) / i4) - (i5 * (i2 - i1) * scalar_sigmoid(i3)))}}[(0, 3)].0, GpuDimShuffle{1,0}.0, TensorConstant{2.0}), GpuDot22(GpuDimShuffle{1,0}.0, GpuElemwise{Composite{(((i0 * i1 * (i2 - scalar_sigmoid(i3))) / i4) - (i5 * (i2 - i1) * scalar_sigmoid(i3)))}}[(0, 3)].0), HostFromGpu(GpuGemm{inplace}.0), GpuElemwise{Composite{((i0 * i1) * (i2 - i1))}}[(0, 0)](GpuDot22.0, GpuElemwise{ScalarSigmoid}[(0, 0)].0, CudaNdarrayConstant{[[ 1.]]}), GpuGemm{inplace}(GpuElemwise{mul,no_inplace}.0, TensorConstant{1.0}, GpuElemwise{Composite{((i0 * i1) * (i2 - i1))}}[(0, 0)].0, GpuDimShuffle{1,0}.0, TensorConstant{2.0}), HostFromGpu(GpuGemm{inplace}.0)]

In [43]:
0.01 * grad_test


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-43-c3291a50120e> in <module>()
----> 1 0.01 * grad_test

TypeError: can't multiply sequence by non-int of type 'float'

In [44]:
test_update = [(Theta,sandbox.cuda.basic_ops.gpu_from_host( Theta - np.float32(0.01)*T.grad(cost_func_test, Theta)+0.0001*Theta ) ) for Theta in [Theta1.Theta, Theta2.Theta] ]

In [46]:
test_gradDes_step = theano.function( inputs=[], updates= test_update )

In [47]:
test_gradDes_step()


Out[47]:
[]

In [52]:
print( Theta1.Theta.get_value() )
print( Theta2.Theta.get_value() )


[[ -1.05634514e-08   2.19436180e-09  -6.92295362e-06 ...,  -1.30543685e-05
   -5.04227410e-06   2.80491941e-09]
 [  7.66243780e-09  -9.75969350e-09   1.04755338e-06 ...,  -5.60192129e-05
    2.00961935e-07   3.54457574e-09]
 [ -8.77740458e-09   8.16117751e-09  -1.47759499e-06 ...,  -1.20964221e-04
   -2.33693959e-06  -7.50741602e-09]
 ..., 
 [ -8.89359164e-09  -9.82064385e-09  -7.78459707e-06 ...,   2.35335647e-05
   -3.25518340e-06   9.02587516e-09]
 [  3.05208303e-10   2.56086108e-09  -2.11960196e-06 ...,  -8.61849287e-04
    9.43547930e-05   3.83799614e-09]
 [  8.85963747e-09  -6.57579602e-10  -8.81727192e-06 ...,  -1.80388656e-06
   -8.14549094e-06   8.79540707e-09]]
[[-1.21257138 -0.10188229 -2.36874819 -1.0578922  -2.20846629  0.56389523
   1.21117842  2.21053886  0.4446061  -1.18257177  1.04299855 -1.60575604
   1.30433381  1.37189186  1.74843192 -0.23368138 -1.52030313  1.15336025
   0.10369149 -0.37211585 -0.61536551 -0.12569839 -2.27216721 -0.71843761
  -1.29703891]
 [ 0.61565566 -1.26563799  1.85764742 -0.91862833 -0.05503076 -0.38593763
   1.2953428  -1.56859624 -0.97036505 -2.18357611 -2.85063267 -2.07754731
   1.63180149  0.34905949  1.82808101 -2.44199824 -0.85639215 -0.29828632
  -2.07969451 -1.2934581   0.8999148   0.28309527  2.31204581 -2.46469688
   1.45671725]
 [-1.94558346  2.01381588 -3.12348628 -0.23620027  1.38695455  0.9099192
  -1.54790509 -0.79839182 -0.65606695  0.73545998 -2.58620143  0.47215798
   0.55355227  2.51281595 -2.41699743 -1.63915682  1.20285499 -1.20258307
  -1.83465064 -1.88032556 -0.34059626  0.23694935 -1.06149018  1.02769864
  -0.47695836]
 [ 0.46304011  0.58498383 -0.16503577  1.93284273 -0.2296816  -1.84750748
   0.49016848  1.07157159 -3.31940198  1.54129529  0.37375814 -0.86493742
  -2.583004    0.97072506 -0.51027173 -0.68435007 -1.64730716  0.21155307
  -0.27425268  1.72617733  1.32432389 -2.64011979 -0.08056725 -2.03531981
  -1.46138978]
 [-2.04503059  2.05719876  1.95121229  0.17639595 -2.16163683 -0.40398875
   1.8017633  -1.56294954 -0.2525554   0.23588987  0.71664256  1.07700384
  -0.3546088  -1.67760444 -0.12940609 -0.67495829  1.14078426  1.32445085
   3.21191907 -2.15911388 -2.60191083 -3.22298121 -1.89632535 -0.87497073
   2.51064777]
 [ 0.43445611 -0.93170726  0.18392649 -0.36082     0.61964542  0.38628966
  -2.65177917  2.29734659 -2.08839846 -1.86401701  1.06068861  0.77570206
   2.13490796 -1.14985681 -0.52086854  0.99753791 -1.48324752 -2.31418347
   0.29520378 -0.38708907 -2.20630646  0.3070533  -1.17658365 -1.63479984
  -0.82476246]
 [ 1.21576905 -1.50111604 -2.03216481 -1.52382553 -2.43757415 -2.37595034
  -1.40001822 -0.88744533 -0.63285488  1.50465775 -1.58092761  0.58605266
  -0.77548492  0.942671    2.10941553  0.54484761  0.43778127 -1.28037572
  -0.0436146   1.47765326 -1.13288772 -0.72854507  0.04735166  1.65762866
   1.68558455]
 [-0.72256583 -3.15261006  0.36581546  0.19813281 -0.73067629  1.65280986
  -2.30059648 -1.87487686  0.98105556 -1.58841705  1.35448146  2.17917943
  -1.99260521 -2.00392246 -0.38865316 -2.34017301 -2.91749477  0.99408847
  -2.70504951 -1.27153015  1.86110783 -1.20531952 -0.38018176  0.70879132
  -2.11035943]
 [ 0.53607166  1.30320907 -1.03383625 -4.03126812  0.58179194 -2.65745735
   0.80388159 -1.09253371  2.49935699  0.36204222  0.66201895 -0.92170537
  -0.83132339 -2.0022192  -2.94928217  0.64570653 -1.10126281  0.74517834
   0.5851267  -1.99566114  0.62597275  1.80614579 -0.22312002 -1.40457022
  -2.13213754]
 [-1.43959904 -1.2182219   0.71100444  0.45221624 -0.35957071  0.62291443
  -0.67012274 -0.7069872   0.06312034 -1.2321192  -1.74663413 -2.71989202
  -2.21460128 -1.69325113 -0.90936852  0.87861484  1.18677163 -1.87060738
   0.39800486  1.72131801 -1.36948287  0.85815626 -0.24782105  1.2802242
  -1.3276583 ]]

In [53]:
test_gradDes_step()


Out[53]:
[]

In [54]:
print( Theta1.Theta.get_value() )
print( Theta2.Theta.get_value() )


[[ -1.05644871e-08   2.19457674e-09  -6.92367394e-06 ...,  -1.30557455e-05
   -5.04279706e-06   2.80519430e-09]
 [  7.66318919e-09  -9.76065007e-09   1.04766264e-06 ...,  -5.60250264e-05
    2.00982896e-07   3.54492324e-09]
 [ -8.77826434e-09   8.16197776e-09  -1.47774847e-06 ...,  -1.20976787e-04
   -2.33718265e-06  -7.50815143e-09]
 ..., 
 [ -8.89446294e-09  -9.82160664e-09  -7.78540652e-06 ...,   2.35360112e-05
   -3.25552196e-06   9.02675978e-09]
 [  3.05238224e-10   2.56111221e-09  -2.11982092e-06 ...,  -8.61938810e-04
    9.43645937e-05   3.83837229e-09]
 [  8.86050611e-09  -6.57644050e-10  -8.81818869e-06 ...,  -1.80411416e-06
   -8.14633404e-06   8.79626860e-09]]
[[-1.21269774 -0.10189326 -2.36899543 -1.05800319 -2.20869637  0.56395203
   1.21130395  2.21076775  0.44465062 -1.18269479  1.04310584 -1.60592437
   1.30446815  1.37203324  1.74861288 -0.2337063  -1.52046156  1.15347874
   0.10370216 -0.3721545  -0.61543089 -0.12571317 -2.27240396 -0.71851313
  -1.29717469]
 [ 0.61571926 -1.2657696   1.85784066 -0.91872346 -0.05503564 -0.38597718
   1.29547703 -1.56875956 -0.9704659  -2.18380332 -2.8509295  -2.07776403
   1.63197136  0.34909612  1.82827091 -2.44225264 -0.8564809  -0.29831624
  -2.07991028 -1.29359245  0.90000927  0.28312474  2.31228638 -2.46495295
   1.45686901]
 [-1.9457854   2.01402569 -3.12381077 -0.23622425  1.38709962  0.91001403
  -1.54806602 -0.79847473 -0.65613556  0.73553669 -2.58646989  0.47220758
   0.55360955  2.51307726 -2.41724944 -1.63932729  1.20297968 -1.20270777
  -1.83484173 -1.88052094 -0.34063154  0.23697387 -1.06160128  1.02780485
  -0.47700837]
 [ 0.46308801  0.58504438 -0.16505313  1.93304372 -0.22970556 -1.8477
   0.49021927  1.07168269 -3.31974745  1.54145551  0.37379676 -0.86502802
  -2.58327293  0.97082567 -0.51032478 -0.68442118 -1.64747822  0.21157469
  -0.27428094  1.7263571   1.32446122 -2.64039493 -0.08057581 -2.03553128
  -1.46154177]
 [-2.04524326  2.05741334  1.95141554  0.17641492 -2.16186166 -0.40403011
   1.80195129 -1.56311166 -0.25258079  0.23591475  0.7167182   1.07711673
  -0.35464483 -1.67777836 -0.12941965 -0.6750282   1.14090323  1.32458937
   3.21225333 -2.15933871 -2.60218096 -3.22331572 -1.89652169 -0.8750608
   2.51090932]
 [ 0.43450102 -0.93180406  0.18394522 -0.36085787  0.61970943  0.3863298
  -2.65205503  2.29758549 -2.08861589 -1.86421084  1.06079876  0.7757827
   2.13512993 -1.14997661 -0.52092284  0.99764156 -1.48340154 -2.31442451
   0.2952342  -0.3871294  -2.20653629  0.30708465 -1.17670619 -1.63497019
  -0.82484829]
 [ 1.21589518 -1.5012722  -2.03237605 -1.52398372 -2.43782783 -2.37619758
  -1.40016377 -0.88753754 -0.63292104  1.50481379 -1.58109224  0.58611315
  -0.77556568  0.94276875  2.10963464  0.54490387  0.43782642 -1.28050911
  -0.04361925  1.47780681 -1.13300586 -0.72862113  0.04735615  1.65780067
   1.68575966]
 [-0.72264111 -3.1529386   0.3658531   0.19815198 -0.73075318  1.65298057
  -2.30083585 -1.87507212  0.98115718 -1.58858275  1.35462141  2.17940545
  -1.99281275 -2.00413132 -0.38869336 -2.34041619 -2.917799    0.99419045
  -2.70533133 -1.27166259  1.86130106 -1.20544505 -0.38022164  0.70886451
  -2.11057878]
 [ 0.53612489  1.30334234 -1.03394532 -4.03168917  0.58184916 -2.65773559
   0.80396318 -1.09264815  2.49961329  0.3620764   0.66208446 -0.92180538
  -0.83141011 -2.00242877 -2.94958925  0.64577097 -1.10137868  0.74525356
   0.5851863  -1.99586976  0.62603438  1.80633044 -0.2231469  -1.40471911
  -2.13235974]
 [-1.43974864 -1.21834874  0.71107888  0.4522633  -0.35960764  0.62297934
  -0.67019254 -0.70706058  0.06312715 -1.23224759 -1.74681592 -2.72017503
  -2.21483088 -1.69342732 -0.90946311  0.87870657  1.18689513 -1.87080216
   0.39804676  1.7214973  -1.36962521  0.85824573 -0.24784632  1.28035724
  -1.32779622]]

In [23]:
gradDes_test_res = gradientDescent_step(cost_func_test, [Theta1.Theta, Theta2.Theta], 0.01, 0.00001 )

In [25]:
print( type(gradDes_test_res) )
gradDes_step_test = gradDes_test_res[1]


<type 'tuple'>

In [26]:
gradDes_step_test()


Out[26]:
[]

In [27]:
print( Theta1.Theta.get_value() )
print( Theta2.Theta.get_value() )


[[ -1.05625011e-08   2.19416441e-09  -6.92233061e-06 ...,  -1.30531944e-05
   -5.04182026e-06   2.80466694e-09]
 [  7.66174768e-09  -9.75881509e-09   1.04745914e-06 ...,  -5.60141707e-05
    2.00943859e-07   3.54425689e-09]
 [ -8.77661499e-09   8.16044299e-09  -1.47746198e-06 ...,  -1.20953329e-04
   -2.33672927e-06  -7.50674101e-09]
 ..., 
 [ -8.89279139e-09  -9.81976012e-09  -7.78389676e-06 ...,   2.35314474e-05
   -3.25489032e-06   9.02506336e-09]
 [  3.05180825e-10   2.56063060e-09  -2.11941119e-06 ...,  -8.61771696e-04
    9.43463019e-05   3.83765109e-09]
 [  8.85883988e-09  -6.57520427e-10  -8.81647793e-06 ...,  -1.80372422e-06
   -8.14475789e-06   8.79461570e-09]]
[[-1.21246231 -0.10187311 -2.3685348  -1.05779707 -2.20826769  0.5638445
   1.21106946  2.21034002  0.44456607 -1.18246531  1.04290473 -1.60561156
   1.3042165   1.37176836  1.74827456 -0.23366036 -1.5201664   1.15325642
   0.10368215 -0.37208235 -0.61531013 -0.12568706 -2.27196264 -0.718373
  -1.29692221]
 [ 0.61560023 -1.26552403  1.85748029 -0.91854566 -0.05502581 -0.38590291
   1.29522622 -1.5684551  -0.97027773 -2.18337965 -2.85037613 -2.07736039
   1.63165462  0.34902808  1.8279165  -2.44177866 -0.85631508 -0.29825947
  -2.07950735 -1.29334176  0.8998338   0.28306979  2.31183767 -2.46447515
   1.45658612]
 [-1.94540834  2.01363468 -3.12320518 -0.23617902  1.38682961  0.90983731
  -1.54776573 -0.79831994 -0.65600789  0.73539382 -2.58596873  0.47211552
   0.55350244  2.51258993 -2.41677999 -1.63900936  1.20274675 -1.20247483
  -1.83448553 -1.8801564  -0.34056559  0.23692803 -1.06139469  1.02760613
  -0.47691542]
 [ 0.46299845  0.58493114 -0.16502091  1.93266881 -0.22966093 -1.84734118
   0.49012437  1.07147515 -3.31910324  1.54115653  0.37372452 -0.86485958
  -2.58277154  0.97063768 -0.51022583 -0.6842885  -1.64715886  0.21153402
  -0.27422801  1.72602201  1.32420468 -2.63988233 -0.08056    -2.03513646
  -1.46125829]
 [-2.04484653  2.05701351  1.95103669  0.17638008 -2.16144228 -0.40395239
   1.80160105 -1.56280887 -0.25253269  0.23586865  0.71657807  1.0769068
  -0.35457689 -1.67745352 -0.12939446 -0.67489755  1.14068162  1.32433164
   3.21163011 -2.15891957 -2.6016767  -3.22269106 -1.89615464 -0.874892
   2.51042175]
 [ 0.43441701 -0.93162346  0.18390995 -0.36078751  0.61958963  0.38625491
  -2.65154052  2.29713964 -2.08821058 -1.86384928  1.06059313  0.77563226
   2.1347158  -1.14975333 -0.52082163  0.99744815 -1.48311412 -2.3139751
   0.29517719 -0.38705423 -2.20610809  0.30702567 -1.17647779 -1.63465273
  -0.82468826]
 [ 1.21565962 -1.50098097 -2.03198195 -1.52368844 -2.4373548  -2.37573647
  -1.39989233 -0.88736546 -0.6327979   1.50452232 -1.58078539  0.58599991
  -0.77541512  0.94258612  2.10922575  0.54479861  0.43774188 -1.28026056
  -0.04361067  1.47752023 -1.1327858  -0.7284795   0.0473474   1.65747941
   1.68543291]
 [-0.7225008  -3.15232635  0.36578253  0.19811498 -0.73061055  1.65266109
  -2.30038953 -1.87470806  0.98096728 -1.588274    1.35435963  2.17898345
  -1.9924258  -2.00374198 -0.3886182  -2.33996224 -2.91723228  0.993999
  -2.70480609 -1.27141571  1.86094034 -1.20521104 -0.38014755  0.70872754
  -2.11016965]
 [ 0.53602344  1.30309176 -1.03374326 -4.03090572  0.5817396  -2.65721822
   0.80380923 -1.09243536  2.49913216  0.36200964  0.66195935 -0.92162246
  -0.83124858 -2.00203896 -2.94901681  0.64564842 -1.10116363  0.74511129
   0.58507401 -1.99548161  0.62591642  1.8059833  -0.22309995 -1.40444386
  -2.13194585]
 [-1.43946946 -1.21811223  0.71094042  0.45217556 -0.35953835  0.62285841
  -0.67006248 -0.7069236   0.06311466 -1.23200822 -1.74647701 -2.71964717
  -2.21440196 -1.69309878 -0.90928668  0.87853581  1.18666482 -1.87043905
   0.39796904  1.72116315 -1.36935961  0.85807902 -0.24779876  1.28010905
  -1.32753885]]

In [28]:
gradDes_step_test()


Out[28]:
[]

In [29]:
print( Theta1.Theta.get_value() )
print( Theta2.Theta.get_value() )


[[ -1.05625855e-08   2.19418195e-09  -6.92242747e-06 ...,  -1.30533963e-05
   -5.04188938e-06   2.80468937e-09]
 [  7.66180897e-09  -9.75889325e-09   1.04747403e-06 ...,  -5.60149419e-05
    2.00946744e-07   3.54428531e-09]
 [ -8.77668516e-09   8.16050871e-09  -1.47748244e-06 ...,  -1.20955003e-04
   -2.33676201e-06  -7.50680140e-09]
 ..., 
 [ -8.89286245e-09  -9.81983916e-09  -7.78400590e-06 ...,   2.35317766e-05
   -3.25493579e-06   9.02513619e-09]
 [  3.05183268e-10   2.56065102e-09  -2.11943939e-06 ...,  -8.61783628e-04
    9.43476116e-05   3.83768217e-09]
 [  8.85891094e-09  -6.57525701e-10  -8.81660071e-06 ...,  -1.80378925e-06
   -8.14486793e-06   8.79468587e-09]]
[[-1.21247959 -0.10187493 -2.36856866 -1.05781281 -2.20829916  0.56385064
   1.21108603  2.21037006  0.44457057 -1.18248188  1.04291832 -1.6056354
   1.30423355  1.37178624  1.74829817 -0.23366424 -1.52018797  1.15327108
   0.10368349 -0.37208751 -0.61532009 -0.12569052 -2.27199483 -0.71838391
  -1.29694128]
 [ 0.61560839 -1.26554167  1.85750628 -0.91855812 -0.05502573 -0.38590774
   1.29524386 -1.56847727 -0.97029126 -2.18341041 -2.85041642 -2.07738996
   1.63167763  0.34903327  1.82794189 -2.44181347 -0.85632676 -0.29826254
  -2.07953596 -1.29335976  0.89984727  0.28307381  2.3118701  -2.46450949
   1.45660675]
 [-1.94543517  2.01366329 -3.12324858 -0.23618175  1.38684976  0.9098503
  -1.54778731 -0.7983309  -0.65601742  0.73540437 -2.5860045   0.47212264
   0.55350989  2.51262522 -2.41681457 -1.63903248  1.20276332 -1.20249116
  -1.83451152 -1.8801825  -0.34057021  0.23693123 -1.06141019  1.02761996
  -0.47692251]
 [ 0.46300465  0.58493906 -0.16502343  1.93269575 -0.22966421 -1.84736741
   0.49013108  1.07148981 -3.31914997  1.54117799  0.37372953 -0.86487234
  -2.58280778  0.97065091 -0.51023299 -0.68429804 -1.64718163  0.2115366
  -0.27423158  1.72604644  1.3242228  -2.63991976 -0.0805613  -2.03516483
  -1.4612788 ]
 [-2.04487514  2.05704284  1.95106435  0.17638318 -2.16147256 -0.4039574
   1.8016268  -1.56283033 -0.25253534  0.23587231  0.71658915  1.07692266
  -0.354581   -1.67747641 -0.12939636 -0.67490661  1.14069796  1.32435095
   3.21167541 -2.15895009 -2.60171247 -3.2227354  -1.89618027 -0.87490332
   2.51045728]
 [ 0.43442282 -0.93163645  0.18391213 -0.36079288  0.61959785  0.38626033
  -2.65157771  2.29717159 -2.08824015 -1.86387539  1.06060791  0.77564305
   2.1347456  -1.14976966 -0.52082902  0.99746203 -1.48313475 -2.31400776
   0.29518107 -0.38705969 -2.20613933  0.30702943 -1.17649448 -1.63467586
  -0.82469988]
 [ 1.21567631 -1.50100219 -2.03201032 -1.52370954 -2.4373889  -2.37576985
  -1.39991188 -0.8873778  -0.63280708  1.50454307 -1.58080781  0.58600765
  -0.77542609  0.94259894  2.10925508  0.54480588  0.43774763 -1.2802788
  -0.0436114   1.47754073 -1.13280201 -0.72849     0.04734763  1.65750217
   1.6854564 ]
 [-0.72251105 -3.15237093  0.36578727  0.19811635 -0.73062164  1.65268302
  -2.30042195 -1.87473452  0.98098069 -1.58829641  1.35437787  2.17901349
  -1.99245393 -2.00377035 -0.38862342 -2.33999467 -2.917274    0.99401158
  -2.70484447 -1.27143371  1.86096621 -1.20522809 -0.38015315  0.70873696
  -2.11019921]
 [ 0.53602844  1.30310774 -1.03375936 -4.0309639   0.58174449 -2.65725732
   0.80381852 -1.09245145  2.49916363  0.36201128  0.66196531 -0.92163956
  -0.8312605  -2.00206828 -2.94905853  0.6456548  -1.10118032  0.74511951
   0.58508086 -1.9955107   0.62592179  1.8060056  -0.22310673 -1.40446639
  -2.13197637]
 [-1.43948936 -1.2181294   0.71095073  0.45218194 -0.35954288  0.62286729
  -0.67007202 -0.70693338  0.06311581 -1.23202562 -1.74650168 -2.71968532
  -2.21443224 -1.69312251 -0.90929943  0.8785485   1.18668139 -1.87046552
   0.39797512  1.72118759 -1.36937869  0.85809124 -0.24780172  1.28012693
  -1.32755733]]

In [30]:
y_prob.shape


Out[30]:
(10, 5000)

In [31]:
ex4data1['y'].shape


Out[31]:
(5000, 1)

In [51]:
pd.DataFrame( ex4data1['y']).describe()


Out[51]:
0
count 5000.000000
mean 5.500000
std 2.872569
min 1.000000
25% 3.000000
50% 5.500000
75% 8.000000
max 10.000000

In [39]:
print( Theta2.alp1.shape )
print( Theta2.alp1.shape.ndim )
# Theta2.alp1.shape.get_scalar_constant_value()
predicted_logreg = theano.function([],Theta2.alp1)


Shape.0
1

In [42]:
pd.DataFrame( predicted_logreg().T ).describe()


Out[42]:
0 1 2 3 4 5 6 7 8 9
count 5000.000000 5.000000e+03 5.000000e+03 5.000000e+03 5000.000000 5.000000e+03 5000.000000 5000.000000 5000.000000 5000.000000
mean 0.100628 1.003652e-01 1.003967e-01 1.004417e-01 0.100253 1.005637e-01 0.100360 0.100543 0.100846 0.100248
std 0.280952 2.710659e-01 2.669757e-01 2.724109e-01 0.267206 2.798822e-01 0.275913 0.264708 0.264970 0.284639
min 0.000010 4.299332e-07 9.454787e-07 2.587024e-07 0.000002 7.190226e-07 0.000002 0.000003 0.000008 0.000001
25% 0.000301 8.055457e-04 7.226729e-04 1.883787e-04 0.000919 2.311849e-04 0.000240 0.001162 0.000871 0.000253
50% 0.001198 4.066701e-03 4.143638e-03 1.152211e-03 0.003805 1.742870e-03 0.002029 0.004935 0.004056 0.001377
75% 0.006197 1.748446e-02 1.921718e-02 1.193071e-02 0.017761 9.229897e-03 0.011941 0.020477 0.018032 0.006297
max 0.993053 9.996013e-01 9.982013e-01 9.986625e-01 0.999188 9.985297e-01 0.999378 0.998737 0.996482 0.998724

In [46]:
pd.DataFrame(predicted_logreg().T).describe().iloc[1:-1,:].plot()


Out[46]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc5ee56a110>

In [49]:
print( np.argmax( predicted_logreg(), axis=0).shape )
np.vstack( np.argmax( predicted_logreg(),axis=0) ).shape


(5000,)
Out[49]:
(5000, 1)

In [52]:
pd.DataFrame( np.vstack( np.argmax(predicted_logreg(),axis=0)) + 1).describe()


Out[52]:
0
count 5000.000000
mean 5.504000
std 2.874933
min 1.000000
25% 3.000000
50% 6.000000
75% 8.000000
max 10.000000

In [55]:
res = np.float32( ( np.vstack( np.argmax( predicted_logreg(),axis=0)) + 1 ) == ex4data1['y'] )
pd.DataFrame(res).describe()


Out[55]:
0
count 5000.000000
mean 0.975200
std 0.155534
min 0.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 1.000000

In [56]:
range(1,3)


Out[56]:
[1, 2]

In [57]:
predicted_logreg().shape


Out[57]:
(10, 5000)

In [64]:
print(y_prob.shape); print( np.argmax( y_prob,axis=0 ).shape)


(10, 5000)
(5000,)

Summary for Neural Net with Multiple Layers for logistic regression (but can be extended to linear regression)

  • Load boilerplate training data:

In [9]:
sys.path.append( os.getcwd() + '/ML' )

In [10]:
from NN import Layer, cost_functional, cost_functional_noreg, gradientDescent_step, MLP

In [11]:
# Load Training Data
print("Loading and Visualizing Data ... \n")
ex4data1 = scipy.io.loadmat('./coursera_Ng/machine-learning-ex4/ex4/ex4data1.mat')

# recall that whereas the original labels (in the variable y) were 1, 2, ..., 10, for the purpose of training a 
# neural network, we need to recode the labels as vectors containing only values 0 or 1
K=10
m = ex4data1['y'].shape[0]
y_prob = [np.zeros(K) for row in ex4data1['y']]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_prob[i][ ex4data1['y'][i]-1] = 1
y_prob = np.array(y_prob).T.astype(theano.config.floatX)  # size dims. (K,m)

print(ex4data1['X'].T.shape)
print(y_prob.shape)


Loading and Visualizing Data ... 

(400, 5000)
(10, 5000)

In [12]:
digitsMLP = MLP(3,[400,25,10], 5000, ex4data1['X'].T, y_prob, T.nnet.sigmoid, 1., 0.1, 0.0000)

In [17]:
digitsMLP.train_model(100000)

In [18]:
digitsMLP.accuracy_log_reg()


Accuracy score : 0.991200 
Out[18]:
0.99119999999999997

In [19]:
print( digitsMLP.Thetas[0].Theta.get_value() )
digitsMLP.Thetas[1].Theta.get_value()


[[-0.03968292  0.01570808 -0.00801071 ...,  0.05324144 -0.03536524
  -0.02519361]
 [ 0.02765682  0.02882705 -0.06191737 ..., -0.01663327  0.03339296
  -0.02493248]
 [ 0.01494016 -0.03611031  0.04792143 ...,  0.0510212  -0.00466521
  -0.00434607]
 ..., 
 [ 0.03901267 -0.03169481  0.06044593 ..., -0.0364962  -0.03772442
   0.01351396]
 [ 0.03186403  0.05424397  0.02720479 ...,  0.01087107 -0.01080637
   0.01786175]
 [ 0.06252811 -0.04008792  0.06331227 ..., -0.04469054  0.02635518
   0.04384876]]
Out[19]:
array([[ 1.20451474,  0.83254075,  0.3577919 ,  2.09141254,  0.30367732,
         1.70919907, -0.68671805,  1.94035482,  0.6540705 , -2.20026374,
         1.0235399 , -1.10299885,  0.87818861,  2.8505857 , -2.22271323,
        -2.00406885, -2.60233808, -0.85654968, -2.76958466, -1.48524833,
        -3.17246437, -0.06278938,  0.616503  ,  3.52648425,  0.98629874],
       [ 2.60660052, -0.10535918,  1.22893441,  2.44127226, -3.89616203,
        -3.1201973 , -2.51445174,  1.34601641,  3.96324277, -2.53724122,
        -3.20322871,  0.95207882, -2.91599441, -2.66748142,  1.32648623,
         0.81661856,  1.32140303, -0.87079829, -0.53782797, -1.90582633,
         3.33778262, -0.50736022, -0.66713619, -1.36594701,  2.1127429 ],
       [-1.43893147,  1.59595168,  0.58790714,  3.8221848 ,  4.65350819,
        -2.22888541, -1.92654026, -1.48021591, -3.24884081, -2.0549016 ,
         2.35445762,  3.15103745, -1.86801529,  2.50599551, -0.51754326,
        -0.61664522, -1.13638568, -3.49592352, -0.14462674,  0.52218926,
         0.94804603,  0.48102653, -2.2381053 , -2.80585504, -2.67067432],
       [-0.97880727, -3.42777109, -3.0412941 ,  0.07771082,  0.56903011,
        -0.83924389,  1.42225981,  2.75040054, -1.39653993,  1.39573848,
         1.54833257, -2.28482318, -1.19908345, -1.23934305,  0.1012596 ,
         0.41159233, -4.30142689, -1.29635179,  2.10533142, -0.36743957,
         1.919065  , -5.38604879, -0.68395364,  1.60128868, -0.92604429],
       [-2.21723104,  0.94654542,  2.71510959, -3.15821409,  1.2421279 ,
         1.54322195,  2.38756323, -0.35632336,  1.59020221, -2.20207787,
        -2.31908298, -0.23842566,  2.842659  , -1.16831934, -1.10033369,
         2.3584559 , -2.86785245,  2.43999338,  2.25221205, -2.86637926,
        -3.29994965,  0.2062892 , -2.27796054, -1.90024221,  5.06210709],
       [-0.41967511,  2.81876731, -3.19176626, -2.13696814, -1.76408064,
         1.12407899,  3.01087523,  2.11157274, -1.18705463,  1.32382548,
        -0.06723422,  0.01781415,  2.63719058, -2.33022046, -1.4483918 ,
        -2.91259265,  1.15207839, -1.17236662, -2.97872806, -0.56889361,
        -1.30872059, -1.56629968,  1.08266664, -1.98741114, -0.89212441],
       [-0.6513139 , -0.67716223, -3.03075981,  2.60289288, -2.83682179,
        -1.9186002 , -2.81462193, -1.34299827, -2.06861496, -2.43334413,
         2.26945639, -0.2679739 , -0.33434728, -1.78550529, -0.60505742,
         1.65669274,  0.72454143,  2.54930949,  1.80095565, -0.6691587 ,
        -2.79404688,  0.49601215,  3.74682832,  1.82545471,  0.06220638],
       [-0.31317323, -2.17267513,  3.26154256, -0.96487314, -3.19127011,
         4.00027752,  1.55789506, -3.2021575 , -3.13566351,  2.24683571,
        -1.71402073, -1.77391756, -3.39949989,  1.01905394, -0.05652857,
         1.37401104, -0.78254008, -2.96524358, -2.75739622, -1.48058116,
        -1.42676282, -1.46033561,  2.61433244, -2.85393929, -3.30281448],
       [ 2.51441741, -2.35930371, -2.45756555, -2.2026875 ,  3.0209465 ,
         1.47433209,  1.19607747, -2.47334051, -0.01969391,  3.09615278,
        -2.00905395,  0.31061357, -2.11481953, -1.13638878, -2.71792459,
        -1.84998   ,  2.16689348, -3.52886462,  0.64545244, -2.02436543,
        -0.21113941,  4.98415565, -3.85935426,  1.63133836, -1.07641482],
       [-1.74175525, -2.37122393,  1.55441034,  0.23550114, -1.96717358,
        -1.83210754, -3.25745535, -1.22520483, -0.62219352,  2.43008757,
        -0.22606266, -0.01252492,  1.41700852,  0.24551152, -0.73388177,
        -3.98788357,  1.05824101,  3.25433969,  0.86931092, -0.00776808,
         0.05484827, -0.38054886,  0.17874011, -1.70390856, -0.95664531]], dtype=float32)

In [20]:
digitsMLP.predicted_vals_logreg()


Out[20]:
array([[  1.69714658e-05,   6.81859674e-05,   4.40727854e-06, ...,
          5.32231061e-03,   2.53793423e-05,   1.24584597e-06],
       [  1.01746270e-03,   4.22267512e-05,   4.14312631e-03, ...,
          2.78708263e-04,   5.46442550e-07,   2.42858031e-03],
       [  1.28256984e-03,   2.17894791e-03,   8.93946737e-04, ...,
          7.34846219e-02,   1.61182688e-04,   1.05979457e-03],
       ..., 
       [  5.44993207e-04,   4.99824178e-04,   2.40481794e-02, ...,
          1.57466289e-04,   6.36213226e-03,   3.31715518e-03],
       [  8.89896415e-04,   6.08195027e-04,   1.27942930e-03, ...,
          8.00565720e-01,   9.88285422e-01,   7.53709316e-01],
       [  9.96679068e-01,   9.98024583e-01,   9.72293377e-01, ...,
          6.17099488e-07,   1.53781421e-05,   1.62253886e-01]], dtype=float32)

In [21]:
testL1a2 = theano.function([], digitsMLP.Thetas[0].alp1 )
print( testL1a2() )
testL2a2 = theano.function([], digitsMLP.Thetas[1].al )
print( testL2a2() )


[[  1.68952462e-03   9.66312247e-04   3.05149867e-03 ...,   6.01312637e-01
    7.69796073e-02   1.04411095e-02]
 [  5.63030466e-02   3.84460762e-02   3.80754247e-02 ...,   5.98353744e-01
    2.61052395e-04   2.27537050e-04]
 [  9.90030646e-01   9.98632133e-01   9.82155979e-01 ...,   1.93933040e-01
    4.72371355e-02   2.82941740e-02]
 ..., 
 [  3.31877563e-05   3.73444600e-05   8.24407116e-03 ...,   2.87410337e-03
    5.70651256e-02   5.32165647e-01]
 [  3.52760107e-04   9.42657294e-04   9.13141354e-04 ...,   6.50500178e-01
    9.91682410e-01   3.51258606e-01]
 [  5.73539697e-02   9.71803325e-04   3.29945865e-03 ...,   6.90432638e-02
    1.61013941e-05   1.03116455e-03]]
[[  1.68952462e-03   9.66312247e-04   3.05149867e-03 ...,   6.01312637e-01
    7.69796073e-02   1.04411095e-02]
 [  5.63030466e-02   3.84460762e-02   3.80754247e-02 ...,   5.98353744e-01
    2.61052395e-04   2.27537050e-04]
 [  9.90030646e-01   9.98632133e-01   9.82155979e-01 ...,   1.93933040e-01
    4.72371355e-02   2.82941740e-02]
 ..., 
 [  3.31877563e-05   3.73444600e-05   8.24407116e-03 ...,   2.87410337e-03
    5.70651256e-02   5.32165647e-01]
 [  3.52760107e-04   9.42657294e-04   9.13141354e-04 ...,   6.50500178e-01
    9.91682410e-01   3.51258606e-01]
 [  5.73539697e-02   9.71803325e-04   3.29945865e-03 ...,   6.90432638e-02
    1.61013941e-05   1.03116455e-03]]

In [33]:
[1,2,3,4,5] + [8,1,5]


Out[33]:
[1, 2, 3, 4, 5, 8, 1, 5]

In [22]:
print( digitsMLP.y.shape )
y_cls_test = np.vstack( np.argmax( digitsMLP.y, axis=0) )
print( y_cls_test.shape )
pd.DataFrame( y_cls_test ).describe()


(10, 5000)
(5000, 1)
Out[22]:
0
count 5000.000000
mean 4.500000
std 2.872569
min 0.000000
25% 2.000000
50% 4.500000
75% 7.000000
max 9.000000

In [23]:
pred_y_cls_test = np.vstack( np.argmax( digitsMLP.predicted_vals_logreg() , axis=0))
print( pred_y_cls_test.shape )
pd.DataFrame( pred_y_cls_test ).describe()


(5000, 1)
Out[23]:
0
count 5000.000000
mean 4.499000
std 2.879315
min 0.000000
25% 2.000000
50% 5.000000
75% 7.000000
max 9.000000

In [24]:
np.mean( pred_y_cls_test == y_cls_test )


Out[24]:
0.99119999999999997

Testing on MNIST, from University of Montreal, Deep Learning Tutorial, data


In [27]:
K=10
m = len(train_set[1])
y_train_prob = [np.zeros(K) for row in train_set[1]]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_train_prob[i][ train_set[1][i]] = 1
y_train_prob = np.array(y_train_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print( y_train_prob.shape )


(10, 50000)

In [28]:
print( pd.DataFrame( y_train_prob).describe() )


           0          1          2          3          4          5      \
count  10.000000  10.000000  10.000000  10.000000  10.000000  10.000000   
mean    0.100000   0.100000   0.100000   0.100000   0.100000   0.100000   
std     0.316228   0.316228   0.316228   0.316228   0.316228   0.316228   
min     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
25%     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
50%     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
75%     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
max     1.000000   1.000000   1.000000   1.000000   1.000000   1.000000   

           6          7          8          9        ...          49990  \
count  10.000000  10.000000  10.000000  10.000000    ...      10.000000   
mean    0.100000   0.100000   0.100000   0.100000    ...       0.100000   
std     0.316228   0.316228   0.316228   0.316228    ...       0.316228   
min     0.000000   0.000000   0.000000   0.000000    ...       0.000000   
25%     0.000000   0.000000   0.000000   0.000000    ...       0.000000   
50%     0.000000   0.000000   0.000000   0.000000    ...       0.000000   
75%     0.000000   0.000000   0.000000   0.000000    ...       0.000000   
max     1.000000   1.000000   1.000000   1.000000    ...       1.000000   

           49991      49992      49993      49994      49995      49996  \
count  10.000000  10.000000  10.000000  10.000000  10.000000  10.000000   
mean    0.100000   0.100000   0.100000   0.100000   0.100000   0.100000   
std     0.316228   0.316228   0.316228   0.316228   0.316228   0.316228   
min     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
25%     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
50%     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
75%     0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
max     1.000000   1.000000   1.000000   1.000000   1.000000   1.000000   

           49997      49998      49999  
count  10.000000  10.000000  10.000000  
mean    0.100000   0.100000   0.100000  
std     0.316228   0.316228   0.316228  
min     0.000000   0.000000   0.000000  
25%     0.000000   0.000000   0.000000  
50%     0.000000   0.000000   0.000000  
75%     0.000000   0.000000   0.000000  
max     1.000000   1.000000   1.000000  

[8 rows x 50000 columns]

In [29]:
m,d= train_set[0].shape
MNIST_MTL = MLP(3,[d,25,10], m, train_set[0].T, y_train_prob, T.nnet.sigmoid, 1., 0.1, 0.00001)

In [31]:
MNIST_MTL.accuracy_log_reg()


Accuracy score : 0.095720 
Out[31]:
0.09572

In [32]:
print( MNIST_MTL.Thetas[0].Theta.get_value() )
MNIST_MTL.Thetas[1].Theta.get_value()


[[-0.21252947  0.08412755 -0.04290283 ...,  0.07965862 -0.23788518
   0.22836781]
 [ 0.21750133 -0.28106511  0.2671701  ...,  0.05657074 -0.29780233
  -0.34037825]
 [-0.02203849  0.19995408  0.29139307 ..., -0.0802632   0.14923781
  -0.01326215]
 ..., 
 [-0.09192752  0.2681919   0.26859927 ...,  0.06894684  0.12018485
  -0.32358104]
 [-0.13953739  0.02424429 -0.20282558 ...,  0.13586117 -0.29190949
   0.01420764]
 [ 0.14536618 -0.06120829 -0.19793963 ...,  0.02141157 -0.26165357
  -0.22347094]]
Out[32]:
array([[ 0.39743981,  0.47411704, -0.06570012, -1.42164946, -1.16744244,
        -0.44029245,  1.55344975,  0.42985651,  0.42142299,  0.34747389,
        -0.81492281, -1.56219399,  0.82000202, -0.40216985,  1.47232902,
         0.35999769,  0.68932515,  0.82125103, -0.06185548, -0.27060255,
        -0.25692338, -0.86920112, -1.25981688,  1.49232507,  0.52447045],
       [-0.45812011, -1.22509992, -1.14734924,  0.8219015 ,  0.67774659,
        -0.54277843, -0.51398909, -1.19371367,  1.41380441,  0.30840605,
        -0.11502755,  0.03701442, -0.82230645,  0.57699567,  1.3465414 ,
         0.46789521,  0.72777843, -0.74098152,  1.17320132,  0.3995336 ,
         0.79102081, -0.1648806 , -0.07733183, -1.60657287, -0.64839828],
       [-0.26341805,  1.1494323 , -0.1576034 , -0.51010013, -1.4537679 ,
        -0.32284725, -0.11438043, -0.16837011, -0.92213452, -1.61426508,
        -1.34188533, -0.43147799, -0.61895543,  1.38522243, -0.58895016,
        -0.7618435 ,  0.89010906, -0.43544725,  0.9864412 ,  1.43864715,
        -1.04564011,  1.22792351,  1.51681113,  1.48080194,  0.77915668],
       [ 0.20965257,  0.96099746, -1.38709569, -0.71545523,  1.10635912,
        -1.29045534, -1.57748663, -0.95929617, -0.24077024, -0.35265234,
        -0.75665629, -1.07101023, -0.00545067, -0.99316341,  1.4602592 ,
         0.7212196 , -0.10515147,  0.49578574,  1.45931995,  1.07267094,
        -0.01084974,  1.01463413, -0.73990732, -0.75446784, -1.58813906],
       [ 1.26138949, -0.47427896, -0.4497835 , -0.19745249, -0.21095291,
        -0.08235157,  1.11536026,  0.04610212,  1.11256003, -0.46868172,
         0.98976576,  0.5969364 ,  1.2798928 , -0.48845506, -1.03150034,
         0.96056491,  0.30198944,  1.47925031,  0.36784145,  0.33264968,
         0.26245806,  0.48660216,  0.96751767, -1.27767527,  0.42533398],
       [-1.35573721, -0.85225189,  0.39260316,  1.22144449, -0.31289348,
        -0.9116798 , -1.23236263,  0.81047082,  0.68190771,  0.23868939,
         0.48585162,  0.1120138 , -0.15501086,  0.46083274,  1.03472054,
         1.32915258, -1.44029725, -0.71790475,  1.60148978,  0.20379403,
         1.15068221, -1.49505317, -0.42173779, -0.72793669,  0.96851325],
       [ 1.43791103, -0.69099313, -0.79151177,  0.21312031,  1.36474013,
        -1.19343758, -0.45168665,  0.60482723,  0.41439977, -1.3197763 ,
        -1.09207129,  0.23710582,  0.06775524, -0.72686183,  0.96668959,
         0.09902351,  0.4042387 , -0.44409871, -0.26330748,  1.35316038,
        -1.48465347,  1.03650403,  0.71476686,  1.63380992,  0.13157721],
       [-0.90288603,  1.17905378,  1.22328877,  1.41004956,  0.29629964,
        -0.9095031 , -0.23757875,  1.6151613 , -1.52695167, -1.07278395,
         1.56079459,  0.52057123, -1.35828531, -1.01550984,  0.75505394,
        -0.20916736, -0.56811482, -0.02100989,  1.45307636, -1.61759782,
        -0.51052654, -1.48488081,  1.15462708, -1.02267063, -1.04683352],
       [ 1.52449572,  1.1317147 ,  0.56625313,  1.33766675, -0.3755441 ,
         0.9018296 ,  0.18880704, -0.07619639, -0.89904916,  1.51346755,
        -0.34904653, -0.45106241,  1.63571787,  0.12569541,  1.17675114,
        -0.60786849, -0.92819321,  0.43607047,  1.23182929, -0.37070179,
         0.63889045, -1.0101124 , -1.28258407,  1.46705651, -0.69088858],
       [-1.50364363,  1.22934222,  0.83844817,  0.65174961, -1.37190723,
        -0.32444778, -0.26416573,  0.29394153, -0.66541278,  0.23971787,
         0.32632565, -1.46561682, -0.02179751, -1.46791589, -0.15427259,
        -0.18021606, -0.39853364, -0.67619216, -0.01778612, -0.78865045,
        -1.53309178,  0.3225404 ,  1.01434851,  0.08351628,  1.15306926]], dtype=float32)

In [33]:
MNIST_MTL.predicted_vals_logreg()


Out[33]:
array([[ 0.77596611,  0.75060475,  0.423906  , ...,  0.60422331,
         0.40988356,  0.31822193],
       [ 0.34422615,  0.67563272,  0.273395  , ...,  0.20291083,
         0.3232716 ,  0.4690749 ],
       [ 0.93080932,  0.28231877,  0.59183002, ...,  0.95939362,
         0.69641775,  0.79752219],
       ..., 
       [ 0.00841192,  0.01366395,  0.53732932, ...,  0.08155674,
         0.07537536,  0.07692103],
       [ 0.90377617,  0.96592015,  0.98436731, ...,  0.98901641,
         0.98044145,  0.9684335 ],
       [ 0.0747021 ,  0.01449165,  0.20927182, ...,  0.16606021,
         0.27188161,  0.05889074]], dtype=float32)

In [34]:
MNIST_MTL.train_model(100000)

In [35]:
MNIST_MTL.accuracy_log_reg()


Accuracy score : 0.980420 
Out[35]:
0.98041999999999996

In [36]:
print( MNIST_MTL.Thetas[0].Theta.get_value() )
MNIST_MTL.Thetas[1].Theta.get_value()


[[-0.47362512  0.18747789 -0.09561399 ...,  0.17751911 -0.53015506
   0.50892061]
 [ 0.48472929 -0.62637842  0.5954116  ...,  0.12606943 -0.66368026
  -0.7585848 ]
 [-0.04911496  0.44561294  0.64935881 ..., -0.1788629   0.33259752
  -0.02955511]
 ..., 
 [-0.20487256  0.59768224  0.59859228 ...,  0.15364726  0.26784071
  -0.72111851]
 [-0.31096086  0.05403042 -0.45201683 ...,  0.30278146 -0.65051121
   0.03166183]
 [ 0.3239423  -0.13641159 -0.44111899 ...,  0.0477186  -0.58310646
  -0.49803132]]
Out[36]:
array([[  7.11062133e-01,   4.57554549e-01,   9.75120664e-02,
         -5.07937050e+00,  -3.83276868e+00,   5.47499716e-01,
          7.71603918e+00,   1.36168015e+00,   3.17900872e+00,
         -2.18454540e-01,  -2.86413026e+00,  -4.37900066e+00,
         -2.17216992e+00,  -6.29720163e+00,   3.04203176e+00,
         -5.22348732e-02,   1.34919095e+00,   4.03648329e+00,
         -4.14651203e+00,  -5.77749729e+00,  -2.90662193e+00,
         -3.27267551e+00,  -4.66104698e+00,   5.65531969e+00,
          4.85670775e-01],
       [ -9.64691222e-01,  -6.14411163e+00,  -7.89063311e+00,
          2.64467096e+00,   1.46627915e+00,  -3.22474170e+00,
         -1.01905584e+00,  -1.88596475e+00,   7.01221895e+00,
          1.92705405e+00,   2.55174327e+00,   1.04782128e+00,
         -5.82779360e+00,   3.49170709e+00,   6.75916958e+00,
          2.16688132e+00,   5.75238085e+00,  -3.99549007e+00,
          1.14223826e+00,  -3.51164603e+00,   1.34314454e+00,
          3.65567946e+00,   1.13587379e-01,  -5.55841112e+00,
         -5.92364883e+00],
       [ -6.39417708e-01,   3.38985538e+00,  -3.20516133e+00,
         -5.39822042e-01,  -5.74882793e+00,  -5.57149768e-01,
          4.90440190e-01,  -2.76023102e+00,  -4.99793005e+00,
         -6.13943529e+00,  -4.86437082e+00,   1.80150628e+00,
         -2.43367410e+00,   5.27764797e+00,  -6.62898493e+00,
          4.37618375e-01,   4.27750015e+00,  -2.12696409e+00,
          4.19394159e+00,   5.73637605e-01,   1.46028674e+00,
         -1.85208154e+00,   3.50097370e+00,   6.81654167e+00,
          3.72835803e+00],
       [  5.72197735e-01,   1.68034172e+00,  -3.00833821e+00,
         -4.48841381e+00,   5.16429281e+00,  -4.16888475e+00,
         -6.96294880e+00,  -1.28992057e+00,  -2.63191819e-01,
         -1.80943739e+00,  -6.06960821e+00,  -6.07639885e+00,
         -1.68026042e+00,  -2.49091673e+00,   2.72927237e+00,
          4.20622349e+00,  -6.85564899e+00,   8.40938282e+00,
          3.71062899e+00,   4.54962540e+00,  -1.98130476e+00,
          4.28268671e+00,  -3.92813444e+00,  -4.68170261e+00,
         -3.60260940e+00],
       [  2.54607844e+00,  -7.96052980e+00,  -2.66434860e+00,
         -2.93493319e+00,  -2.69420886e+00,   2.50106549e+00,
          3.34071350e+00,  -2.57470822e+00,   4.58119583e+00,
         -6.90563297e+00,   3.39330125e+00,   2.23719907e+00,
          3.48375678e+00,  -5.02125692e+00,  -8.19848251e+00,
         -1.71341038e+00,  -5.05596447e+00,   6.00544357e+00,
         -6.56114340e-01,  -1.66288626e+00,   6.16928673e+00,
         -2.33877707e+00,   3.89905548e+00,  -5.27366161e+00,
          4.28496420e-01],
       [ -3.05176616e+00,  -1.78837430e+00,   7.71145201e+00,
          4.71982098e+00,  -3.77251506e+00,  -1.15936108e+01,
         -1.03705215e+01,   6.77015543e+00,   1.57322168e-01,
         -8.24020922e-01,   1.67905545e+00,  -1.78405321e+00,
         -7.02612519e-01,   1.29097593e+00,   4.61267138e+00,
          7.27420330e+00,  -6.49334812e+00,  -9.20294094e+00,
          4.45572805e+00,   6.66842163e-01,   5.05022955e+00,
         -4.36325932e+00,  -5.60956860e+00,  -2.40490198e+00,
          4.14503574e+00],
       [  3.16660190e+00,  -4.45554256e+00,  -8.53040874e-01,
          1.87503129e-01,   3.07649398e+00,  -4.22100449e+00,
         -3.50551677e+00,   1.83885932e+00,   2.24337983e+00,
         -4.28148890e+00,  -4.76952076e+00,   2.70211458e+00,
          3.59806252e+00,  -7.04305601e+00,   3.61042929e+00,
         -5.86637259e+00,   4.11456347e+00,  -3.98823214e+00,
         -4.80216026e+00,   4.52647972e+00,  -7.22660494e+00,
          1.45237148e-01,  -3.31796020e-01,   6.53981590e+00,
         -1.41842246e+00],
       [ -1.80336964e+00,   8.29802036e-01,   3.64742970e+00,
         -2.13532329e+00,   2.97970319e+00,  -2.54826999e+00,
          5.18451631e-01,   7.25896454e+00,  -3.89744210e+00,
         -3.08217072e+00,   7.56499863e+00,   4.09100199e+00,
         -7.79509687e+00,  -3.42566800e+00,   1.08090663e+00,
         -2.89872932e+00,  -2.74361014e+00,  -1.76804709e+00,
          6.18264246e+00,  -5.52989483e+00,  -2.50305200e+00,
         -5.23402739e+00,   6.55207300e+00,  -5.23101330e+00,
         -6.57023430e+00],
       [  3.09581041e+00,  -2.72198647e-01,   1.52220082e+00,
          6.30821896e+00,  -3.80636024e+00,   2.36185217e+00,
         -1.18582726e+00,  -5.26250982e+00,  -6.28981304e+00,
          3.22874022e+00,  -6.88107872e+00,   2.49213648e+00,
          4.94375658e+00,  -1.33158898e+00,  -9.53457475e-01,
         -9.96722794e+00,  -7.79621029e+00,   2.17832088e+00,
         -4.02353048e+00,  -2.39235783e+00,   2.79603124e+00,
         -3.93508315e+00,  -4.85469151e+00,   4.19522619e+00,
         -4.63762474e+00],
       [ -3.33755684e+00,   4.57075739e+00,  -4.37777138e+00,
          2.42956066e+00,  -3.74254751e+00,  -8.17862332e-01,
          2.52893835e-01,  -5.16166925e+00,  -4.43093204e+00,
          8.01136875e+00,  -5.95063388e-01,  -8.98904037e+00,
          1.42401373e+00,  -7.24769258e+00,  -1.48296952e+00,
         -8.67474616e-01,  -2.32061291e+00,  -2.56787968e+00,
         -1.69821453e+00,  -6.43771887e+00,  -5.80188227e+00,
          4.79170799e+00,  -7.73105025e-01,  -6.07473543e-04,
          6.16166925e+00]], dtype=float32)

In [37]:
MNIST_MTL.predicted_vals_logreg()


Out[37]:
array([[  4.52048823e-07,   1.00000000e+00,   7.51884666e-10, ...,
          6.04108479e-08,   1.88120055e-06,   1.62847478e-08],
       [  5.50135458e-11,   3.89633271e-07,   3.38999451e-09, ...,
          7.60669491e-06,   4.72258321e-09,   7.85911197e-06],
       [  2.77830568e-05,   2.95011669e-05,   3.45770331e-06, ...,
          8.52322955e-06,   7.60071725e-02,   1.38454227e-04],
       ..., 
       [  1.52943358e-05,   1.54980810e-08,   9.39907186e-05, ...,
          7.08678338e-10,   7.70168582e-11,   4.95301133e-09],
       [  9.09049849e-11,   7.11831039e-13,   5.01017041e-07, ...,
          9.96697187e-01,   1.42343296e-02,   9.99997377e-01],
       [  1.09123675e-05,   1.50772337e-06,   9.65651736e-08, ...,
          1.19623294e-07,   9.40610245e-02,   1.57795876e-06]], dtype=float32)

In [38]:
import cPickle

In [40]:
save_file = open('./saved_models/MNIST_MTL_log_reg','wb')

In [41]:
for Thet in MNIST_MTL.Thetas:
    cPickle.dump( Thet.Theta.get_value(borrow=True), save_file,-1) # the -1 is for HIGHEST priority
    cPickle.dump( Thet.b.get_value(borrow=True), save_file,-1)

In [42]:
save_file.close()

In [50]:
MNIST_MTL.Thetas[0].al.set_value( valid_set[0].T.astype(theano.config.floatX) )

In [47]:
K=10
m = len(valid_set[1])
y_valid_prob = [np.zeros(K) for row in valid_set[1]]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_valid_prob[i][ valid_set[1][i]] = 1
y_valid_prob = np.array(y_valid_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print( y_valid_prob.shape )


(10, 10000)

In [48]:
MNIST_MTL.y = y_valid_prob

In [51]:
MNIST_MTL.predicted_vals_logreg()


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-51-ec623bb772a5> in <module>()
----> 1 MNIST_MTL.predicted_vals_logreg()

/home/topolo/PropD/MLgrabbag/ML/NN.py in predicted_vals_logreg(self)
    311         def predicted_vals_logreg(self):
    312                 predict_vals_func = predicted_logreg( self.Thetas[-1].alp1 )
--> 313                 return predict_vals_func() # do the actual prediction on actual values, with the inputted X and trained Thetas,b's
    314 
    315 

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    869                     node=self.fn.nodes[self.fn.position_of_error],
    870                     thunk=thunk,
--> 871                     storage_map=getattr(self.fn, 'storage_map', None))
    872             else:
    873                 # old-style linkers raise their own exceptions

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
    312         # extra long error message in that case.
    313         pass
--> 314     reraise(exc_type, exc_value, exc_trace)
    315 
    316 

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    857         t0_fn = time.time()
    858         try:
--> 859             outputs = self.fn()
    860         except Exception:
    861             if hasattr(self.fn, 'position_of_error'):

ValueError: dimension mismatch in args to gemm (25,784)x(784,10000)->(25,50000)
Apply node that caused the error: GpuGemm{inplace}(GpuReshape{2}.0, TensorConstant{1.0}, Theta1, al, TensorConstant{1.0})
Toposort index: 14
Inputs types: [CudaNdarrayType(float32, matrix), TensorType(float32, scalar), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), TensorType(float32, scalar)]
Inputs shapes: [(25, 50000), (), (25, 784), (784, 10000), ()]
Inputs strides: [(50000, 1), (), (784, 1), (10000, 1), ()]
Inputs values: ['not shown', array(1.0, dtype=float32), 'not shown', 'not shown', array(1.0, dtype=float32)]
Outputs clients: [[GpuElemwise{ScalarSigmoid}[(0, 0)](GpuGemm{inplace}.0)]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

In [53]:
theano.function([], MNIST_MTL.Thetas[0].alp1)()


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-53-10646c6daf13> in <module>()
----> 1 theano.function([], MNIST_MTL.Thetas[0].alp1)()

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    869                     node=self.fn.nodes[self.fn.position_of_error],
    870                     thunk=thunk,
--> 871                     storage_map=getattr(self.fn, 'storage_map', None))
    872             else:
    873                 # old-style linkers raise their own exceptions

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
    312         # extra long error message in that case.
    313         pass
--> 314     reraise(exc_type, exc_value, exc_trace)
    315 
    316 

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    857         t0_fn = time.time()
    858         try:
--> 859             outputs = self.fn()
    860         except Exception:
    861             if hasattr(self.fn, 'position_of_error'):

ValueError: dimension mismatch in args to gemm (25,784)x(784,10000)->(25,50000)
Apply node that caused the error: GpuGemm{inplace}(GpuReshape{2}.0, TensorConstant{1.0}, Theta1, al, TensorConstant{1.0})
Toposort index: 7
Inputs types: [CudaNdarrayType(float32, matrix), TensorType(float32, scalar), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), TensorType(float32, scalar)]
Inputs shapes: [(25, 50000), (), (25, 784), (784, 10000), ()]
Inputs strides: [(50000, 1), (), (784, 1), (10000, 1), ()]
Inputs values: ['not shown', array(1.0, dtype=float32), 'not shown', 'not shown', array(1.0, dtype=float32)]
Outputs clients: [[GpuElemwise{ScalarSigmoid}[(0, 0)](GpuGemm{inplace}.0)]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

In [56]:
Layer1 = MNIST_MTL.Thetas[0]
Layer2 = MNIST_MTL.Thetas[1]
m = valid_set[0].shape[0]
print(m)


10000

In [60]:
a2 = T.nnet.sigmoid( T.dot( Layer1.Theta, Layer1.al) + T.tile( Layer1.b, (1,m)) )
a3 = T.nnet.sigmoid( T.dot( Layer2.Theta, a2) + T.tile( Layer2.b, (1,m)) )
valid_pred = theano.function([], a3)()
print( valid_pred.shape)


(10, 10000)

In [61]:
pd.DataFrame( valid_pred.T).describe()


Out[61]:
0 1 2 3 4 5 6 7 8 9
count 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04
mean 9.992079e-02 1.074899e-01 9.912494e-02 1.052671e-01 9.976897e-02 9.037534e-02 9.769981e-02 1.085759e-01 1.005443e-01 9.500753e-02
std 2.961449e-01 3.058113e-01 2.902825e-01 2.958724e-01 2.931867e-01 2.795904e-01 2.927824e-01 3.047731e-01 2.924316e-01 2.828876e-01
min 9.005615e-17 1.986378e-17 2.439026e-13 6.265842e-16 1.214437e-18 6.500246e-18 9.674661e-19 1.735837e-17 3.318569e-18 6.359484e-18
25% 1.320481e-09 1.788181e-09 4.365240e-07 1.482097e-07 1.504599e-09 4.772580e-09 1.436151e-10 3.032865e-09 3.319625e-08 1.242549e-08
50% 9.058250e-08 1.760941e-07 1.530497e-05 8.275158e-06 3.291525e-07 4.075375e-07 2.823017e-08 4.088125e-07 3.484185e-06 1.585515e-06
75% 1.096723e-05 3.165664e-05 5.320824e-04 4.356618e-04 3.269091e-05 3.865018e-05 6.124497e-06 8.373335e-05 2.405906e-04 2.055749e-04
max 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 9.999994e-01

In [62]:
np.mean( np.vstack( np.argmax( valid_pred,axis=0)) == np.vstack( valid_set[1] ) )


Out[62]:
0.95340000000000003

In [63]:
X_in = T.matrix()

In [64]:
X_in.set_value( valid_set[0].T.astype(theano.config.floatX))


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-64-922d1dc9b484> in <module>()
----> 1 X_in.set_value( valid_set[0].T.astype(theano.config.floatX))

AttributeError: 'TensorVariable' object has no attribute 'set_value'

In [68]:
a2_giv = T.nnet.sigmoid( T.dot( Layer1.Theta, X_in) + T.tile(Layer1.b, (1,m)))
a3_giv = T.nnet.sigmoid( T.dot( Layer2.Theta, a2_giv) + T.tile( Layer2.b, (1,m)) )
valid_pred_givens = theano.function([], outputs=a3_giv, givens={ X_in: valid_set[0].T.astype(theano.config.floatX)} )

In [72]:
print( valid_pred_givens().shape )
pd.DataFrame( valid_pred_givens().T).describe()


(10, 10000)
Out[72]:
0 1 2 3 4 5 6 7 8 9
count 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04 1.000000e+04
mean 9.992079e-02 1.074899e-01 9.912494e-02 1.052671e-01 9.976897e-02 9.037534e-02 9.769981e-02 1.085759e-01 1.005443e-01 9.500753e-02
std 2.961449e-01 3.058113e-01 2.902825e-01 2.958724e-01 2.931867e-01 2.795904e-01 2.927824e-01 3.047731e-01 2.924316e-01 2.828876e-01
min 9.005615e-17 1.986378e-17 2.439026e-13 6.265842e-16 1.214437e-18 6.500246e-18 9.674661e-19 1.735837e-17 3.318569e-18 6.359484e-18
25% 1.320481e-09 1.788181e-09 4.365240e-07 1.482097e-07 1.504599e-09 4.772580e-09 1.436151e-10 3.032865e-09 3.319625e-08 1.242549e-08
50% 9.058250e-08 1.760941e-07 1.530497e-05 8.275158e-06 3.291525e-07 4.075375e-07 2.823017e-08 4.088125e-07 3.484185e-06 1.585515e-06
75% 1.096723e-05 3.165664e-05 5.320824e-04 4.356618e-04 3.269091e-05 3.865018e-05 6.124497e-06 8.373335e-05 2.405906e-04 2.055749e-04
max 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 9.999994e-01

In [78]:
np.mean( np.vstack( np.argmax( valid_pred_givens(),axis=0)) == np.vstack( valid_set[1] ) )


Out[78]:
0.95340000000000003

In [79]:
test_pred_givens = theano.function([], outputs=a3_giv, givens={ X_in: test_set[0].T.astype(theano.config.floatX)} )

In [80]:
np.mean( np.vstack( np.argmax( test_pred_givens(),axis=0)) == np.vstack( test_set[1] ) )


Out[80]:
0.94979999999999998

In [81]:
range(1,3)


Out[81]:
[1, 2]

In [82]:
range(3)


Out[82]:
[0, 1, 2]

In [83]:
range(1,3-1)


Out[83]:
[1]

In [11]:
gls_data = pd.read_csv( "./kaggle/glass.csv")

In [12]:
gls_data.describe()


Out[12]:
RI Na Mg Al Si K Ca Ba Fe Type
count 214.000000 214.000000 214.000000 214.000000 214.000000 214.000000 214.000000 214.000000 214.000000 214.000000
mean 1.518365 13.407850 2.684533 1.444907 72.650935 0.497056 8.956963 0.175047 0.057009 2.780374
std 0.003037 0.816604 1.442408 0.499270 0.774546 0.652192 1.423153 0.497219 0.097439 2.103739
min 1.511150 10.730000 0.000000 0.290000 69.810000 0.000000 5.430000 0.000000 0.000000 1.000000
25% 1.516523 12.907500 2.115000 1.190000 72.280000 0.122500 8.240000 0.000000 0.000000 1.000000
50% 1.517680 13.300000 3.480000 1.360000 72.790000 0.555000 8.600000 0.000000 0.000000 2.000000
75% 1.519157 13.825000 3.600000 1.630000 73.087500 0.610000 9.172500 0.000000 0.100000 3.000000
max 1.533930 17.380000 4.490000 3.500000 75.410000 6.210000 16.190000 3.150000 0.510000 7.000000

In [16]:
gls_data.get_values().shape


Out[16]:
(214, 10)

In [12]:
X_gls = gls_data.get_values()[:,:-1]
print(X_gls.shape)
y_gls = gls_data.get_values()[:,-1]
print(y_gls.shape)
print( y_gls[:10])
X_gls_train = gls_data.get_values()[:-14,:-1]
print(X_gls_train.shape)
y_gls_train = gls_data.get_values()[:-14,-1]
print(y_gls_train.shape)


(214, 9)
(214,)
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
(200, 9)
(200,)

In [29]:
K=7
m = len(y_gls_train)
y_gls_train_prob = [np.zeros(K) for row in y_gls_train]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_gls_train_prob[i][ y_gls_train[i]-1] = 1
y_gls_train_prob = np.array(y_gls_train_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print( y_gls_train_prob.shape )


(7, 200)
/home/topolo/Public/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:5: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future

In [42]:
gls_MLP = MLP( 3, [9,8,7],200, X_gls_train.T, y_gls_train_prob, T.nnet.sigmoid, 0.01,0.05,0.0001 )

In [43]:
gls_MLP.accuracy_log_reg()


Accuracy score : 0.045000 
Out[43]:
0.044999999999999998

In [44]:
gls_MLP.train_model(10000)

In [45]:
gls_MLP.accuracy_log_reg()


Accuracy score : 0.380000 
Out[45]:
0.38

In [46]:
gls_MLP.predicted_vals_logreg()


Out[46]:
array([[ 0.34978667,  0.34978667,  0.34978667, ...,  0.34978667,
         0.34978667,  0.34978667],
       [ 0.3797904 ,  0.3797904 ,  0.3797904 , ...,  0.3797904 ,
         0.3797904 ,  0.3797904 ],
       [ 0.08406211,  0.08406211,  0.08406211, ...,  0.08406211,
         0.08406211,  0.08406211],
       ..., 
       [ 0.06395678,  0.06395678,  0.06395678, ...,  0.06395678,
         0.06395678,  0.06395678],
       [ 0.04376425,  0.04376425,  0.04376425, ...,  0.04376425,
         0.04376425,  0.04376425],
       [ 0.07399232,  0.07399232,  0.07399232, ...,  0.07399232,
         0.07399232,  0.07399232]], dtype=float32)

In [47]:
gls_MLP.train_model(10000)
gls_MLP.accuracy_log_reg()


Accuracy score : 0.380000 
Out[47]:
0.38

In [ ]:
ga

In [48]:
X_gls_test = gls_data.get_values()[-14:,:-1]
print( X_gls_test.shape )
y_gls_test = gls_data.get_values()[-14:,-1]
print( y_gls_test.shape)


(14, 9)
(14,)

In [49]:
gls_predict_on_test = gls_MLP.predict_on( 14, X_gls_test.T )

In [51]:
np.mean( np.vstack( np.argmax( gls_predict_on_test(), axis=0) ) == (y_gls_test-1) )


Out[51]:
0.0

In [52]:
y_gls_test


Out[52]:
array([ 7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.])

In [53]:
np.vstack( np.argmax( gls_predict_on_test(), axis=0))


Out[53]:
array([[1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1]])

In [17]:
X_sym = T.matrix()

In [33]:
rng = np.random.RandomState(1234)
Thetab1 = Layer( rng, 1, 4,3,2, al = X_sym, activation=T.nnet.sigmoid)

In [34]:
Thetab1.alp1
Thetab1.Theta.get_value().shape


Out[34]:
(3, 4)

In [35]:
Thetab2 = Layer( rng, 2, 3,2,2, al=Thetab1.alp1, activation=T.nnet.sigmoid)

In [36]:
Thetab2.al = Thetab1.alp1

In [30]:
X_sym.shape[0]


Out[30]:
Subtensor{int64}.0

In [31]:
T.tile( Thetab1.b, (1, X_sym.shape[0]))


Out[31]:
Reshape{2}.0

In [42]:
test12comp = theano.function( [], outputs=Thetab2.alp1, givens={ X_sym : X42test} )

In [40]:
X42test = np.array([1,2,3,4,5,6,7,8]).reshape((4,2)).astype(theano.config.floatX)

In [43]:
test12comp()


Out[43]:
array([[ 0.91000074,  0.91101253],
       [ 0.02431746,  0.02417665]], dtype=float32)

In [44]:
X43test = np.array(range(1,13)).reshape((4,3)).astype(theano.config.floatX)

In [45]:
X43test


Out[45]:
array([[  1.,   2.,   3.],
       [  4.,   5.,   6.],
       [  7.,   8.,   9.],
       [ 10.,  11.,  12.]], dtype=float32)

In [46]:
test43comp = theano.function( [], outputs=Thetab2.alp1, givens={ X_sym : X43test} )

In [47]:
test43comp()


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-47-4a3d91ae9a55> in <module>()
----> 1 test43comp()

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    869                     node=self.fn.nodes[self.fn.position_of_error],
    870                     thunk=thunk,
--> 871                     storage_map=getattr(self.fn, 'storage_map', None))
    872             else:
    873                 # old-style linkers raise their own exceptions

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
    312         # extra long error message in that case.
    313         pass
--> 314     reraise(exc_type, exc_value, exc_trace)
    315 
    316 

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    857         t0_fn = time.time()
    858         try:
--> 859             outputs = self.fn()
    860         except Exception:
    861             if hasattr(self.fn, 'position_of_error'):

ValueError: dimension mismatch in args to gemm (3,4)x(4,3)->(3,2)
Apply node that caused the error: GpuGemm{inplace}(GpuReshape{2}.0, TensorConstant{1.0}, Theta1, <CudaNdarrayType(float32, matrix)>, TensorConstant{1.0})
Toposort index: 14
Inputs types: [CudaNdarrayType(float32, matrix), TensorType(float32, scalar), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), TensorType(float32, scalar)]
Inputs shapes: [(3, 2), (), (3, 4), (4, 3), ()]
Inputs strides: [(2, 1), (), (4, 1), (3, 1), ()]
Inputs values: ['not shown', array(1.0, dtype=float32), 'not shown', 'not shown', array(1.0, dtype=float32)]
Outputs clients: [[GpuElemwise{ScalarSigmoid}[(0, 0)](GpuGemm{inplace}.0)]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

In [50]:
print( type(Thetab1.al ))


<class 'theano.tensor.var.TensorVariable'>

In [52]:
lin_zlp1 = T.dot(Thetab1.Theta, Thetab1.al)+T.tile( Thetab1.b, (1,Thetab1.al.shape[1]))
a1p1 = Thetab1.g( lin_zlp1 )

In [51]:
Thetab1.al = X_sym

In [53]:
Thetab2.al = a1p1

In [54]:
lin_z2p1 = T.dot(Thetab2.Theta, Thetab2.al)+T.tile( Thetab2.b, (1, Thetab2.al.shape[1]))
a2p1 = Thetab2.g( lin_z2p1 )

In [55]:
test_gen_conn = theano.function([], outputs=a2p1, givens={ Thetab1.al : X42test })

In [56]:
test_gen_conn()


Out[56]:
array([[ 0.91000074,  0.91101253],
       [ 0.02431746,  0.02417665]], dtype=float32)

In [57]:
test_gen_conn = theano.function([], outputs=a2p1, givens={ Thetab1.al : X43test })

In [58]:
test_gen_conn()


Out[58]:
array([[ 0.91144621,  0.91158789,  0.91166627],
       [ 0.02425005,  0.02417867,  0.02417858]], dtype=float32)

GPU test


In [59]:
test_gen_conn = theano.function([], outputs=sandbox.cuda.basic_ops.gpu_from_host(a2p1), givens={ Thetab1.al : X42test })

In [60]:
test_gen_conn()


Out[60]:
CudaNdarray([[ 0.91000074  0.91101253]
 [ 0.02431746  0.02417665]])

In [61]:
test_gen_conn = theano.function([], outputs=sandbox.cuda.basic_ops.gpu_from_host(a2p1), givens={ Thetab1.al : X43test })

In [62]:
test_gen_conn()


Out[62]:
CudaNdarray([[ 0.91144621  0.91158789  0.91166627]
 [ 0.02425005  0.02417867  0.02417858]])

Summary for Neural Net with Multiple Layers for logistic regression (but can be extended to linear regression)


In [9]:
sys.path.append( os.getcwd() + '/ML' )

In [10]:
from NN import MLP

In [11]:
# Load Training Data
print("Loading and Visualizing Data ... \n")
ex4data1 = scipy.io.loadmat('./coursera_Ng/machine-learning-ex4/ex4/ex4data1.mat')

# recall that whereas the original labels (in the variable y) were 1, 2, ..., 10, for the purpose of training a 
# neural network, we need to recode the labels as vectors containing only values 0 or 1
K=10
m = ex4data1['y'].shape[0]
y_prob = [np.zeros(K) for row in ex4data1['y']]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_prob[i][ ex4data1['y'][i]-1] = 1
y_prob = np.array(y_prob).T.astype(theano.config.floatX)  # size dims. (K,m)

print(ex4data1['X'].T.shape)
print(y_prob.shape)


Loading and Visualizing Data ... 

(400, 5000)
(10, 5000)

In [12]:
digitsMLP = MLP( 3, [400,25,10], ex4data1['X'].T, y_prob, T.nnet.sigmoid, 1.)

In [13]:
digitsMLP.build_update(ex4data1['X'].T, y_prob, 0.01, 0.00001)

In [14]:
digitsMLP.predicted_vals_logreg()


Out[14]:
array([[ 0.01459562,  0.00558456,  0.02797613, ...,  0.0674273 ,
         0.04817105,  0.03059   ],
       [ 0.99074477,  0.97213686,  0.98990673, ...,  0.94329911,
         0.99409556,  0.98447394],
       [ 0.02927557,  0.05798027,  0.07752991, ...,  0.36027411,
         0.1559844 ,  0.26209033],
       ..., 
       [ 0.00369688,  0.01589782,  0.0115205 , ...,  0.0152018 ,
         0.00421828,  0.00280912],
       [ 0.78314799,  0.61225456,  0.71571481, ...,  0.80646819,
         0.94149739,  0.52025074],
       [ 0.96498191,  0.98687011,  0.78228015, ...,  0.95690244,
         0.63841748,  0.404479  ]], dtype=float32)

In [15]:
digitsMLP.accuracy_logreg( ex4data1['X'].T, y_prob)


Accuracy score : 0.134200 
Out[15]:
0.13420000000000001

In [16]:
digitsMLP.train_model(10000)

In [17]:
digitsMLP.accuracy_logreg( ex4data1['X'].T, y_prob)


Accuracy score : 0.894600 
Out[17]:
0.89459999999999995

In [18]:
digitsMLP.train_model(50000)

In [19]:
digitsMLP.accuracy_logreg( ex4data1['X'].T, y_prob)


Accuracy score : 0.956600 
Out[19]:
0.95660000000000001

Testing on University of Montreal LISA lab MNIST data


In [20]:
import gzip
import six.moves.cPickle as pickle
with gzip.open("../DeepLearningTutorials/data/mnist.pkl.gz", 'rb') as f:
    try:
        train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
    except:
        train_set, valid_set, test_set = pickle.load(f)

In [21]:
K=10
m = len(train_set[1])
y_train_prob = [np.zeros(K) for row in train_set[1]]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_train_prob[i][ train_set[1][i]] = 1
y_train_prob = np.array(y_train_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print( y_train_prob.shape )


(10, 50000)

In [22]:
MNIST_MLP = MLP( 3,[784,49,10], train_set[0].T, y_train_prob, T.nnet.sigmoid, 1.)

In [23]:
MNIST_MLP.build_update( train_set[0].T, y_train_prob, 0.01, 0.0001)

In [24]:
MNIST_MLP.accuracy_logreg( train_set[0].T, y_train_prob)


Accuracy score : 0.098660 
Out[24]:
0.098659999999999998

In [25]:
MNIST_MLP.train_model(50000)

In [26]:
MNIST_MLP.accuracy_logreg( train_set[0].T, y_train_prob)


Accuracy score : 0.862140 
Out[26]:
0.86214000000000002

In [30]:
%time MNIST_MLP.train_model(100000)


CPU times: user 3min 38s, sys: 8min 22s, total: 12min
Wall time: 12min

In [31]:
MNIST_MLP.accuracy_logreg( train_set[0].T,y_train_prob)


Accuracy score : 0.826500 
Out[31]:
0.82650000000000001

In [32]:
m = len(valid_set[1])
y_valid_prob = [np.zeros(K) for row in valid_set[1]]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_valid_prob[i][ valid_set[1][i]] = 1
y_valid_prob = np.array(y_valid_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print( y_valid_prob.shape )


(10, 10000)

In [33]:
m = len(test_set[1])
y_test_prob = [np.zeros(K) for row in test_set[1]]  # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
        y_test_prob[i][ test_set[1][i]] = 1
y_test_prob = np.array(y_test_prob).T.astype(theano.config.floatX)  # size dims. (K,m)
print( y_test_prob.shape )


(10, 10000)

In [34]:
MNIST_MLP.accuracy_logreg( valid_set[0].T,y_valid_prob)


Accuracy score : 0.814200 
Out[34]:
0.81420000000000003

In [35]:
MNIST_MLP.accuracy_logreg( test_set[0].T,y_test_prob)


Accuracy score : 0.805000 
Out[35]:
0.80500000000000005

In [40]:
MNIST_d = train_set[0].T.shape[0]
print(MNIST_d)
MNIST_MLP = MLP( 3,[MNIST_d,25,10], train_set[0].T, y_train_prob, T.nnet.sigmoid, 1.)
MNIST_MLP.build_update( train_set[0].T, y_train_prob, 0.1, 0.00001)


784

In [41]:
MNIST_MLP.accuracy_logreg( train_set[0].T, y_train_prob)


Accuracy score : 0.095720 
Out[41]:
0.09572

In [42]:
MNIST_MLP.train_model(150000)

In [43]:
MNIST_MLP.accuracy_logreg( train_set[0].T, y_train_prob)


Accuracy score : 0.986520 
Out[43]:
0.98651999999999995

In [44]:
MNIST_MLP.accuracy_logreg( valid_set[0].T, y_valid_prob)


Accuracy score : 0.950600 
Out[44]:
0.9506

In [45]:
MNIST_MLP.accuracy_logreg( test_set[0].T, y_test_prob)


Accuracy score : 0.945200 
Out[45]:
0.94520000000000004

In [ ]: