In [71]:
%matplotlib inline
In [1]:
import matplotlib.pyplot as plt
import sklearn
from sklearn import datasets
In [2]:
import pandas as pd
In [3]:
import theano
I accomplished the above by running this command at the command prompt:
THEANO_FLAGS='mode=FAST_RUN,device=gpu,floatX=float32' jupyter notebook
In [4]:
#import theano
from theano import function, config, sandbox, shared
import theano.tensor as T
import numpy as np
import scipy
import time
In [6]:
print( theano.config.device )
print( theano.config.lib.cnmem) # cf. http://deeplearning.net/software/theano/library/config.html
print( theano.config.print_active_device)# Print active device at when the GPU device is initialized.
In [7]:
import os, sys
os.getcwd()
os.listdir( os.getcwd() )
Out[7]:
In [ ]:
In [8]:
%run gpu_test.py THEANO_FLAGS='mode=FAST_RUN,device=gpu,floatX=float32,lib.cnmem=0.85' # note lib.cnmem option for CnMem
In [ ]:
In [9]:
# Load the diabetes dataset
diabetes = sklearn.datasets.load_diabetes()
In [10]:
diabetes_X = diabetes.data
diabetes_Y = diabetes.target
In [11]:
#diabetes_X1 = diabetes_X[:,np.newaxis,2]
diabetes_X1 = diabetes_X[:,np.newaxis, 2].astype(theano.config.floatX)
#diabetes_Y = diabetes_Y.reshape( diabetes_Y.shape[0], 1)
diabetes_Y = diabetes_Y.astype(theano.config.floatX)
Train on $m$ number of input data points
In [12]:
m_lin = diabetes_X1.shape[0]
In [28]:
#x1 = T.vector('x1') # X1, input data, with only 1 feature, i.e. X \in \mathbb{R}^N, d=1
#ylin = T.vector('ylin') # target variable for linear regression, so that Y \in \mathbb{R}
x1 = T.scalar('x1') # X1, input data, with only 1 feature, i.e. X \in \mathbb{R}^N, d=1
ylin = T.scalar('ylin') # target variable for linear regression, so that Y \in \mathbb{R}
In [29]:
thet0_init_val = np.random.randn()
thet1_init_val = np.random.randn()
In [30]:
thet0 = theano.shared( value=thet0_init_val, name='thet0', borrow=True) # \theta^0
thet1 = theano.shared( thet1_init_val, name='thet1', borrow=True) # \theta^1
In [31]:
#h_thet = T.dot( thet1, x1) + thet0
# whereas, Newmu uses
h_thet = thet1 * x1 + thet0
In [32]:
# roshansanthosh uses
#Jthet = T.sum( T.pow(h_thet-ylin,2))/(2*m_lin)
# whereas, Newmu uses
# Jthet = T.mean( T.sqr( thet_1*x1 + thet_0 - ylin ))
Jthet = T.mean( T.pow( h_thet-ylin,2))/2
#Jthet = sandbox.cuda.basic_ops.gpu_from_host( T.mean(
# sandbox.cuda.basic_ops.gpu_from_host( T.pow( h_thet-ylin,2))))/2
In [33]:
grad_thet0 = T.grad(Jthet, thet0)
grad_thet1 = T.grad(Jthet, thet1)
In [19]:
# so-called "learning rate"
gamma = 0.01
Note that "updates (iterable over pairs (shared_variable, new_expression) List, tuple or dict.) – expressions for new SharedVariable values" cf. Theano doc
In [34]:
train_lin = theano.function(inputs = [x1,ylin], outputs=Jthet,
updates=[[thet1,thet1-gamma*grad_thet1],[thet0,thet0-gamma*grad_thet0]])
In [35]:
test_lin = theano.function([x1],h_thet)
In [100]:
#X1_lin_in = shared( diabetes_X1 ,'float32')
#Y_lin_out = shared( diabetes_Y, 'float32')
In [23]:
training_steps = 1000 # 10000
In [36]:
sh_diabetes_X1 = shared( diabetes_X1 , borrow=True)
sh_diabetes_Y = shared( diabetes_Y, borrow=True)
In [37]:
"""
for i in range(training_steps):
for x,y in zip( diabetes_X1, diabetes_Y):
Jthet_val = train_lin( x, y )
"""
for i in range(training_steps):
# for x,y in zip( sh_diabetes_X1, sh_diabetes_Y) :
# Jthet_val = train_lin( x,y)
Jthet_val = train_lin( sh_diabetes_X1, sh_diabetes_Y)
In [27]:
print(Jthet_val)
In [129]:
print( thet0.get_value() ); print( thet1.get_value() )
In [134]:
test_lin_out = np.array( [ test_lin( x ) for x in diabetes_X1 ] )
In [137]:
plt.plot(diabetes_X1,diabetes_Y,'ro')
plt.plot(diabetes_X1,test_lin_out)
Out[137]:
In [38]:
if any([x.op.__class__.__name__ in ['GpuGemm','GpuGemv'] for x in train_lin.maker.fgraph.toposort()]):
print("Used the gpu")
else:
print(train_lin.maker.fgraph.toposort())
In [39]:
if np.any([isinstance(x.op,T.Elemwise) for x in train_lin.maker.fgraph.toposort()]):
print("Used the cpu")
In [ ]:
theano
cf. Week 1, Linear Algebra Review, Coursera, Machine Learning with Ng
I'll take this opportunity to provide a dictionary between the syntax of linear algebra math and numpy
.
Essentially, what I did was take Coursera's Week 1, Linear Algebra Review and then translated the math into theano
, and in particular, running theano on the GPU.
Other reference that I used was
https://simplyml.com/linear-algebra-shootout-numpy-vs-theano-vs-tensorflow-2/
Linear Algebra Shootout: NumPy vs. Theano vs. TensorFlow by Charanpal Dhanjal - 14/07/16
In [69]:
A = T.matrix('A')
B = T.matrix('B')
#matadd = function([A,B], A+B)
#matadd = function([A,B],sandbox.cuda.basic_ops.gpu_from_host(A+B) )
# Note: we are just defining the expressions, nothing is evaluated here!
C = sandbox.cuda.basic_ops.gpu_from_host(A+B)
matadd = function([A,B], C)
In [121]:
#A = T.dmatrix('A')
#B = T.dmatrix('B')
A = T.matrix('A')
B = T.matrix('B')
C_out = A + B
matadd_CPU = function([A,B], C_out)
In [73]:
A_eg = shared( np.array([[8,6,9],[10,1,10]]), 'float32')
B_eg = shared( np.array([[3,10,2],[6,1,-1]]), 'float32')
In [85]:
A_eg_CPU = np.array([[8,6,9],[10,1,10]])
B_eg_CPU = np.array([[3,10,2],[6,1,-1]])
In [96]:
print(A_eg_CPU)
print( type( A_eg_CPU ))
print( A_eg_CPU.shape)
print( B_eg_CPU.shape)
In [70]:
print( matadd.maker.fgraph.toposort() )
In [122]:
print( matadd_CPU.maker.fgraph.toposort() )
In [71]:
matadd( A_eg, B_eg)
The way to do it, to "force" on the GPU, is like this (cf. Speeding up your Neural Network with Theano and the GPU - Wild ML):
In [76]:
np.random.randn( *A_eg_CPU.shape )
Out[76]:
In [78]:
C_out = theano.shared( np.random.randn( *A_eg_CPU.shape).astype('float32') )
In [80]:
C_out.type()
Out[80]:
In [110]:
#A_in = shared( A_eg_CPU, "float32")
#A_in = shared( A_eg_CPU, "float32")
A_in = shared( A_eg_CPU.astype("float32"), "float32")
B_in = shared( B_eg_CPU.astype("float32"), "float32")
#C_out_GPU = A_in + B_in
C_out_GPU = sandbox.cuda.basic_ops.gpu_from_host(A_in+B_in)
In [111]:
matadd_GPU = theano.function( [], C_out_GPU)
In [112]:
C_out_GPU_result = matadd_GPU()
In [113]:
C_out_GPU_result
Out[113]:
Notice how DIFFERENT this setup or syntax is: we have to set up tensor or matrix shared variables A_n
, B_in
, which are then used to define the theano function, theano.function
. "By using shared variables we ensure that they are present in the GPU memory". cf. Linear Algebra Shootout: NumPy vs. Theano vs. TensorFlow
In [114]:
print( matadd_GPU.maker.fgraph.toposort() )
In [128]:
#if np.any([isinstance(C_out_GPU.op, tensor.Elemwise ) and
if np.any([isinstance( C_out_GPU.op, T.Elemwise ) and
('Gpu' not in type( C_out_GPU.op).__name__) for x in matadd_GPU.maker.fgraph.toposort()]) :
print('Used the cpu')
else:
print('Used the gpu')
In [ ]:
In [124]:
matadd_CPU( A_eg_CPU.astype("float32"), B_eg_CPU.astype("float32") )
Out[124]:
In [55]:
type(A_eg)
Out[55]:
In [59]:
print( type( numpy.asarray(rng.rand(2000)) ) )
numpy.asarray(rng.rand(2000)).shape
Out[59]:
Bottom Line: there are 2 ways of doing linear algebra on the GPU
$ \forall \, A, B \in \text{Mat}_{\mathbb{R}}(M,N)$
In [132]:
A = T.matrix('A')
B = T.matrix('B')
C = sandbox.cuda.basic_ops.gpu_from_host( A + B ) # vs.
# C = A + B # this will result in an output array on the host, as opposed to CudaNdarray on device
matadd = function([A,B], C)
In [133]:
print( matadd.maker.fgraph.toposort() )
In [134]:
matadd( A_eg_CPU.astype("float32"), B_eg_CPU.astype("float32") )
Out[134]:
In [140]:
A_in = shared( A_eg_CPU.astype("float32"), "float32") # initialize with the input values, A_eg_CPU, anyway
B_in = shared( B_eg_CPU.astype("float32"), "float32") # initialize with the input values B_eg_CPU, anyway
# C_out = A_in + B_in # this version will output to the host as a numpy.ndarray
# indeed, reading the graph,
"""
[GpuElemwise{add,no_inplace}(float32, float32), HostFromGpu(GpuElemwise{add,no_inplace}.0)]
"""
# this version immediately below, in 1 line, will result in a CudaNdarray on device
C_out = sandbox.cuda.basic_ops.gpu_from_host(A_in+B_in)
matadd_GPU = theano.function( [], C_out)
In [141]:
print( matadd_GPU.maker.fgraph.toposort() )
In [142]:
C_out_result = matadd_GPU()
In [143]:
C_out_result
Out[143]:
cf. Scalar Multiplication of Linear Algebra Review, coursera, Machine Learning Intro by Ng
In [149]:
A_2 = np.array( [[4,5],[1,7] ])
In [145]:
a = T.scalar('a')
F = sandbox.cuda.basic_ops.gpu_from_host( a*A )
scalarmul = theano.function([a,A],F)
In [146]:
print( scalarmul.maker.fgraph.toposort() )
In [150]:
scalarmul( np.float32( 2.), A_2.astype("float32"))
Out[150]:
In [152]:
scalarmul( np.float32(2.), matadd( A_eg_CPU.astype("float32"), B_eg_CPU.astype("float32") ) )
Out[152]:
In [156]:
u = T.vector('u')
v = T.vector('v')
w = sandbox.cuda.basic_ops.gpu_from_host( u + v)
vecadd = theano.function( [u,v],w)
t = sandbox.cuda.basic_ops.gpu_from_host( a * u)
scalarmul_vec = theano.function([a,u], t)
In [157]:
print(vecadd.maker.fgraph.toposort())
print(scalarmul_vec.maker.fgraph.toposort())
In [162]:
u_eg = np.array( [4,6,7], dtype="float32")
v_eg = np.array( [2,1,0], dtype="float32")
print( u_eg.shape)
In [161]:
scalarmul_vec( np.float32(0.5), u_eg )
Out[161]:
In [163]:
vecadd( scalarmul_vec( np.float32(0.5), u_eg ) , scalarmul_vec( np.float32(-3.), v_eg ) )
Out[163]:
This was the computer equivalent to mathematical expression:
$$ \left[ \begin{matrix} 4 \\ 6 \\ 7 \end{matrix} \right] /2 - 3 * \left[ \begin{matrix} 2 \\ 1 \\ 0 \end{matrix} \right] $$
In [164]:
B_out = sandbox.cuda.basic_ops.gpu_from_host( T.dot(A,v))
AVmul = theano.function([A,v], B_out)
print(AVmul.maker.fgraph.toposort())
In [165]:
AVmul( np.array([[1,0,3],[2,1,5],[3,1,2]]).astype("float32"), np.array([1,6,2]).astype("float32"))
Out[165]:
In [168]:
AVmul( np.array([[1,0,0],[0,1,0],[0,0,1]]).astype("float32"), np.array([1,6,2]).astype("float32"))
Out[168]:
In [166]:
C_f = sandbox.cuda.basic_ops.gpu_from_host( T.dot(A,B))
matmul = theano.function([A,B], C_f)
print( matmul.maker.fgraph.toposort())
In [167]:
matmul( np.array( [[1,3],[2,4],[0,5]] ).astype("float32"), np.array([[1,0],[2,3]]).astype("float32") )
Out[167]:
In [170]:
Ainverse = sandbox.cuda.basic_ops.gpu_from_host( T.inv(A))
Ainv = theano.function([A], Ainverse)
print(Ainv.maker.fgraph.toposort())
In [172]:
Atranspose = sandbox.cuda.basic_ops.gpu_from_host( A.T)
AT = theano.function([A],Atranspose)
print(AT.maker.fgraph.toposort())
In [ ]:
In [ ]:
In [ ]:
In [9]:
linregdata = pd.read_csv('./coursera_Ng/machine-learning-ex1/ex1/ex1data1.txt', header=None)
In [11]:
X_linreg_training = linregdata.as_matrix([0]) # pandas.DataFrame.as_matrix convert frame to its numpy-array representation
y_linreg_training = linregdata.as_matrix([1])
m_linreg_training = len(y_linreg_training) # number of training examples
print( X_linreg_training.shape, type(X_linreg_training))
print( y_linreg_training.shape, type(y_linreg_training))
print m_linreg_training
Try representing $\theta$, parameters or "weights", of size $|\theta|$ which should be equal to the number of features $n$ (or $d$).
In [109]:
# theta_linreg = T.vector('theta_linreg')
d = X_linreg_training.shape[1] # d = features
# Declare Theano symbolic variables
X = T.matrix('x')
y = T.vector('y')
Preprocess training data (due to numpy's treatment of arrays) (note, this is not needed, if you use pandas to choose which column(s) you want to make into a numpy array)
In [188]:
#X_linreg_training = X_linreg_training.reshape( m_linreg_training,1)
#y_linreg_training = y_linreg_training.reshape( m_linreg_training,1)
In [12]:
# Instead, the training data X and test data values y are going to be represented by Theano symbolic variable above
#X_linreg = theano.shared(X_linreg_training.astype("float32"),"float32")
#y_linreg = theano.shared(y_linreg_training.astype("float32"),"float32")
In [110]:
#theta_0 = np.zeros( ( d+1,1)); print(theta_0)
theta_0 = np.zeros( d+1); print(theta_0)
In [111]:
theta = theano.shared( theta_0.astype("float32"), "theta")
In [89]:
alpha = np.float32(0.01) # learning rate gamma or alpha
In [112]:
# Construct Theano "expression graph"
predicted_vals = sandbox.cuda.basic_ops.gpu_from_host( T.dot(X,theta) ) # h_{\theta}
m = np.float32( y_linreg_training.shape[0] )
J_theta = sandbox.cuda.basic_ops.gpu_from_host(
T.dot( (T.dot(X,theta) - y).T, T.dot(X,theta) - y) * np.float32( 0.5 ) * np.float32( 1./ m )
) # cost function
In [113]:
update_theta = sandbox.cuda.basic_ops.gpu_from_host(
theta - alpha * T.grad( J_theta, theta) )
In [114]:
gradientDescent = theano.function(
inputs=[X,y],
outputs=[predicted_vals,J_theta],
updates=[(theta, update_theta)],
name = "gradientDescent")
In [115]:
print( gradientDescent.maker.fgraph.toposort() )
In [116]:
num_iters = 1500
J_History = []
Preprocess X to include intercepts
In [95]:
input_X_linreg = np.hstack( ( np.ones((m_linreg_training,1)), X_linreg_training ) ).astype("float32")
In [96]:
y_linreg_training_processed = y_linreg_training.reshape( m_linreg_training,).astype("float32")
In [117]:
J_History = [0 for iter in range(num_iters)]
for iter in range(num_iters):
predicted_vals_out, J_out = \
gradientDescent(input_X_linreg.astype("float32"), y_linreg_training_processed.astype("float32") )
J_History[iter] = J_out
In [45]:
Deg = (np.random.randn(40,10).astype("float32"), np.random.randint(size=40,low=0,high=2).astype("float32") )
In [46]:
Deg[0].shape
Out[46]:
In [47]:
Deg[1].shape
Out[47]:
In [118]:
theta.get_value()
Out[118]:
In [101]:
dir( J_History[0] )
Out[101]:
In [121]:
J_History[-5].gpudata
Out[121]:
In [120]:
plt.plot( [ele.gpudata for ele in J_History])
Out[120]:
In [73]:
input_X_linreg.shape
Out[73]:
In [122]:
# GPU NOTE: Conversion to float32 to store them on the GPU!
X = theano.shared( input_X_linreg.astype('float32'), name='X' )
y = theano.shared( y_linreg_training.astype('float32'), name='y')
In [123]:
# GPU NOTE: Conversion to float32 to store them on the GPU!
theta = theano.shared( np.vstack(theta_0).astype("float32"), name='theta')
In [136]:
# Construct Theano "expression graph"
predicted_vals = sandbox.cuda.basic_ops.gpu_from_host(
T.dot(X,theta) ) # h_{\theta}
m = np.float32( y_linreg_training.shape[0] )
# cost function J_theta, J_{\theta}
J_theta = sandbox.cuda.basic_ops.gpu_from_host(
(
T.dot( (T.dot(X,theta) - y).T, T.dot(X,theta) - y) * np.float32(0.5) * np.float32( 1./m)
).reshape([]) ) # cost function # reshape is to force "broadcast" into 0-dim. scalar for cost function
In [137]:
update_theta = sandbox.cuda.basic_ops.gpu_from_host(
theta - alpha * T.grad( J_theta, theta) )
In [138]:
# Note that we removed the input values because we will always use the same shared variable
# GPU Note: Removed the input values to avoid copying data to the GPU.
gradientDescent = theano.function(
inputs=[],
# outputs=[predicted_vals,J_theta],
updates=[(theta, update_theta)],
name = "gradientDescent")
In [139]:
print( gradientDescent.maker.fgraph.toposort() )
In [141]:
#J_History = [0 for iter in range(num_iters)]
for iter in range(num_iters):
gradientDescent( )
In [78]:
print( np.vstack( theta_0).shape )
print( y_linreg_training.shape )
In [149]:
theta.get_value()
Out[149]:
In [152]:
# Profiling
print( theano.config.profile ) # Do the vm/cvm linkers profile the execution time of Theano functions?
print( theano.config.profile_memory ) # Do the vm/cvm linkers profile the memory usage of Theano functions? It only works when profile=True.
In [154]:
theano.printing.debugprint(gradientDescent)
In [157]:
#print( gradientDescent.profile.print_summary() )
dir( gradientDescent.profile)
Out[157]:
In [9]:
import sys
import os
In [10]:
#sys.path.append( os.getcwd() + '/ML')
sys.path.append( os.getcwd() + '/ML' )
In [10]:
from linreg_gradDes import LinearReg, LinearReg_loaded
#from ML import LinearReg, LinearReg_loaded
Boilerplate for sample input data
In [11]:
linregdata1 = pd.read_csv('./coursera_Ng/machine-learning-ex1/ex1/ex1data1.txt', header=None)
linregdata1.as_matrix([0]).shape
linregdata1.as_matrix([1]).shape
Out[11]:
In [12]:
features = linregdata1.as_matrix([0]).shape[1]
numberoftraining = linregdata1.as_matrix([0]).shape[0]
LinReg_housing = LinearReg( features, numberoftraining , 0.01)
In [13]:
Xin = LinReg_housing.preprocess_X( linregdata1.as_matrix([0]))
ytest = linregdata1.as_matrix([1]).flatten()
In [14]:
%time LinReg_housing.build_model( Xin, ytest )
Out[14]:
In [19]:
LinRegloaded_housing = LinearReg_loaded( linregdata1.as_matrix([0]), linregdata1.as_matrix([1]),
features, numberoftraining )
In [20]:
%time LinRegloaded_housing.build_model()
Out[20]:
In [53]:
print( LinReg_housing.gradientDescent.maker.fgraph.toposort() )
print( LinRegloaded_housing.gradientDescent.maker.fgraph.toposort() )
Consider feature normalization
In [40]:
def featureNormalize(X):
"""
FEATURENORMALIZE Normalizes the features in X
FEATURENORMALIZE(X) returns a normalized version of X where
the mean value of each feature is 0 and the standard deviation
is 1. This is often a good preprocessing step to do when
working with learning algorithms.
"""
# You need to set these values correctly
X_norm = (X-X.mean(axis=0))/X.std(axis=0)
mu = X.mean(axis=0)
sigma = X.std(axis=0)
return [X_norm, mu, sigma]
In [41]:
linregdata2 = pd.read_csv('./coursera_Ng/machine-learning-ex1/ex1/ex1data2.txt', header=None)
In [42]:
features = linregdata2.as_matrix().shape[1] - 1
numberoftraining = linregdata2.as_matrix().shape[0]
Xdat = linregdata2.as_matrix( range(features) )
ytest = linregdata2.as_matrix( [features])
In [46]:
[Xnorm, mus,sigmas] = featureNormalize(Xdat)
In [48]:
LinReg_housing2 = LinearReg( features, numberoftraining, 0.01)
processed_X = LinReg_housing2.preprocess_X( Xnorm )
In [49]:
%time LinReg_housing2.build_model( processed_X, ytest.flatten(), 400)
Out[49]:
In [50]:
LinRegloaded_housing2 = LinearReg_loaded( Xnorm, ytest,
features, numberoftraining )
In [51]:
%time LinRegloaded_housing2.build_model( 400)
Out[51]:
In [ ]:
In [54]:
# Load the diabetes dataset
diabetes = sklearn.datasets.load_diabetes()
In [55]:
diabetes_X = diabetes.data
diabetes_Y = diabetes.target
In [61]:
#diabetes_X1 = diabetes_X[:,np.newaxis,2]
diabetes_X1 = diabetes_X[:,np.newaxis, 2].astype(theano.config.floatX)
#diabetes_Y = diabetes_Y.reshape( diabetes_Y.shape[0], 1)
diabetes_Y = np.vstack( diabetes_Y.astype(theano.config.floatX) )
In [63]:
features1 = 1
numberoftraining = diabetes_Y.shape[0]
In [67]:
LinReg_diabetes = LinearReg( features1, numberoftraining, 0.01)
In [70]:
processed_X = LinReg_diabetes.preprocess_X( diabetes_X1 )
In [71]:
%time LinReg_diabetes.build_model( processed_X, diabetes_Y.flatten(), 10000)
Out[71]:
In [74]:
LinRegloaded_diabetes = LinearReg_loaded( diabetes_X1, diabetes_Y,
features1, numberoftraining )
In [75]:
%time LinRegloaded_diabetes.build_model( 10000)
Out[75]:
Multiple number of features case:
In [76]:
features = diabetes_X.shape[1]
In [77]:
LinReg_diabetes = LinearReg( features, numberoftraining, 0.01)
processed_X = LinReg_diabetes.preprocess_X( diabetes_X )
In [78]:
%time LinReg_diabetes.build_model( processed_X, diabetes_Y.flatten(), 10000)
Out[78]:
In [79]:
LinRegloaded_diabetes = LinearReg_loaded( diabetes_X, diabetes_Y,
features, numberoftraining )
In [80]:
%time LinRegloaded_diabetes.build_model( 10000)
Out[80]:
In [5]:
data_ex1data2 = pd.read_csv('./coursera_Ng/machine-learning-ex1/ex1/ex1data2.txt', header=None)
X_ex1data2 = data_ex1data2.iloc[:,0:2]
y_ex1data2 = data_ex1data2.iloc[:,2]
m_ex1data2 = y_ex1data2.shape[0]
X_ex1data2=X_ex1data2.values.astype(np.float32)
y_ex1data2=y_ex1data2.values.reshape((m_ex1data2,1)).astype(np.float32)
print(type(X_ex1data2))
print(type(y_ex1data2))
print(X_ex1data2.shape)
print(y_ex1data2.shape)
print(m_ex1data2)
print(X_ex1data2[:5])
print(y_ex1data2[:5])
In [46]:
((X_ex1data2[:,1] - X_ex1data2[:,1].mean())/( X_ex1data2[:,1].std()) ).std()
Out[46]:
In [49]:
# feature Normalize
#X_ex1data2_norm = sklearn.preprocessing.Normalizer.transform(X_ex1data2 )
X_ex1data2_norm = (X_ex1data2 - np.mean(X_ex1data2, axis=0)) / np.std(X_ex1data2, axis=0)
print(X_ex1data2_norm[:,0].mean())
print(X_ex1data2_norm[:,0].std())
print(X_ex1data2_norm[:,1].mean())
print(X_ex1data2_norm[:,1].std())
In [51]:
# X_ex1data2_norm[:5];
In [61]:
X=T.matrix(dtype=theano.config.floatX)
y=T.matrix(dtype=theano.config.floatX)
Theta=theano.shared(np.zeros((2,1)).astype(theano.config.floatX))
b = theano.shared(np.zeros(1).astype(theano.config.floatX))
In [16]:
print(b.get_value().shape)
In [62]:
yhat = T.dot( X, Theta) + b
In [63]:
# L2 norm
J = np.cast[theano.config.floatX](0.5)*T.mean( T.sqr( yhat-y))
In [64]:
alpha=0.01 # learning rate
# sandbox.cuda.basic_ops.gpu_from_host
updateThetab = [ Theta-np.float32(alpha)*T.grad(J,Theta), b-np.float32(alpha)*T.grad(J,b)]
gradientDescent_step = theano.function(inputs=[X,y],
outputs=J,
updates = zip([Theta,b],updateThetab) )
In [66]:
num_iters =400
JList=[]
for iter in range(num_iters):
err = gradientDescent_step(X_ex1data2_norm,y_ex1data2)
JList.append(err)
In [67]:
# Final mode:
print(Theta.get_value())
print(b.get_value())
In [72]:
# JList[-10:]
plt.plot(JList)
plt.show()
cf. ex3
, Programming Exercise 3: Multi-class Classification and Neural Networks, Machine Learning
In [83]:
os.getcwd()
Out[83]:
In [86]:
os.listdir( './coursera_Ng/machine-learning-ex3/' )
Out[86]:
In [87]:
os.listdir( './coursera_Ng/machine-learning-ex3/ex3' )
Out[87]:
In [90]:
# Load saved matrices from file
multiclscls_data = scipy.io.loadmat('./coursera_Ng/machine-learning-ex3/ex3/ex3data1.mat')
import the classes from ML
In [9]:
import sys
import os
In [9]:
os.getcwd()
Out[9]:
In [10]:
#sys.path.append( os.getcwd() + '/ML')
sys.path.append( os.getcwd() + '/ML' )
In [11]:
from gradDes import LogReg
In [12]:
# Test case for Cost function J_{\theta} with regularization
theta_t = np.vstack( np.array( [-2, -1, 1, 2]) )
X_t = np.array( [i/10. for i in range(1,16)]).reshape((3,5)).T
#X_t = np.hstack( ( np.ones((5,1)), X_t) ) # no need to preprocess the input data X with column of 1's
y_t = np.vstack( np.array( [1,0,1,0,1]))
In [13]:
MulClsCls_digits = LogReg( X_t, y_t, 3,5,0.01, 3. )
In [14]:
MulClsCls_digits.calculate_cost()
Out[14]:
In [19]:
MulClsCls_digits.z.get_value()
In [21]:
print( MulClsCls_digits.X.get_value() )
MulClsCls_digits.y.get_value()
Out[21]:
In [23]:
calc_z_test = theano.function([], MulClsCls_digits.z)
In [24]:
calc_z_test()
Out[24]:
In [25]:
MulClsCls_digits.theta.set_value( theta_t.astype('float32') )
In [27]:
calc_z_test()
Out[27]:
In [35]:
MulClsCls_digits.calculate_cost()
Out[35]:
In [43]:
print( 1/(1+np.exp( np.dot( -np.hstack( ( np.ones((5,1)), X_t) ), theta_t) ) ) )
h_test = 1/(1+np.exp( np.dot( -np.hstack( ( np.ones((5,1)), X_t) ), theta_t) ) )
print( np.dot( (h_test - y_t).T, h_test- y_t) * 0.5/5 ) # non-regularized J_theta cost term
np.dot( theta_t[1:].T, theta_t[1:]) * 3 / (2.* 5)
Out[43]:
In [33]:
MulClsCls_digits.predict()
Out[33]:
In [ ]:
MulClsCls_digit
In [44]:
theano.config.floatX
Out[44]:
In [ ]:
In [ ]:
cf. 2 Neural Networks, 2.1 Model representation, ex3.pdf
In [45]:
os.getcwd()
Out[45]:
In [46]:
os.listdir( './coursera_Ng/machine-learning-ex3/' )
Out[46]:
In [47]:
os.listdir( './coursera_Ng/machine-learning-ex3/ex3/' )
Out[47]:
$ \Theta_1, \Theta_2 $
In [48]:
# Load saved matrices from file
nn3_data = scipy.io.loadmat('./coursera_Ng/machine-learning-ex3/ex3/ex3weights.mat')
In [53]:
print( nn3_data.keys() )
print( type( nn3_data['Theta1']) )
print( type( nn3_data['Theta2']) )
print( nn3_data['Theta1'].shape )
print( nn3_data['Theta2'].shape )
In [54]:
Theta1[0]
In [61]:
%load_ext tikzmagic
In [ ]:
In [66]:
np.random.seed(0)
s_l = 400 # (layer) size of layer l, i.e. number of nodes, units in layer l
s_lp1 = 25
al = theano.shared( np.random.randn(s_l+1,1).astype('float32'), name="al")
#alp1 = theano.shared( np.random.randn(s_lp1,1).astype('float32'), name="al")
#Thetal = theano.shared( np.random.randn( s_lp1,s_l+1).astype('float32') , name="Thetal")
# Feedforward, forward propagation
#z = T.dot( Thetal, al)
#g = T.nnet.sigmoid( z)
In [8]:
s_l = 25
s_lp1 = 10
In [19]:
rng = np.random.RandomState(99)
Theta_values = np.asarray( rng.uniform(
low=-np.sqrt( 6. / (s_l+ s_lp1)),
high=np.sqrt( 6./(s_l + s_lp1)), size=(s_lp1,s_l+1)), dtype=theano.config.floatX )
print( Theta_values.shape )
print( Theta_values.dtype )
#Theta_values *= np.float32(4)
Theta_values *= 4.
print( Theta_values.dtype)
Theta_values.shape
Out[19]:
In [14]:
np.float32( 4)
Out[14]:
In [8]:
%env
Out[8]:
In [10]:
os.getcwd()
Out[10]:
In [11]:
print( sys.path )
In [12]:
#sys.path.append( os.getcwd() + '/ML')
sys.path.append( '../DeepLearningTutorials/code/' )
In [14]:
#from logistic_sgd import LogisticRegression, load_data, sgd_optimization_mnist, predict
import logistic_sgd
In [10]:
MNIST_MTLdat = logistic_sgd.load_data("../DeepLearningTutorials/data/mnist.pkl.gz") # list of training data
In [36]:
print(len(MNIST_MTLdat))
print(type(MNIST_MTLdat))
for ele in MNIST_MTLdat: print type(ele), len(ele) # test_set_x, test_set_y, valid_set_x, valid_set_y, train_set_x,
In [37]:
print( MNIST_MTLdat[0][0].get_value().shape)
print( type(MNIST_MTLdat[0][1]))
print( MNIST_MTLdat[0][1].get_scalar_constant_value )
In [38]:
print( type( MNIST_MTLdat[1][1] ) )
MNIST_MTLdat[1][1].shape
Out[38]:
In [39]:
dir(MNIST_MTLdat[0][1]) ;
In [25]:
import gzip
import six.moves.cPickle as pickle
with gzip.open("../DeepLearningTutorials/data/mnist.pkl.gz", 'rb') as f:
try:
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
except:
train_set, valid_set, test_set = pickle.load(f)
In [26]:
print( type( train_set[0] ))
print( train_set[0].shape )
print( type( train_set[1]))
print( train_set[1].shape )
print( type( valid_set[0] ))
print( valid_set[0].shape )
print( type( valid_set[1]))
print( valid_set[1].shape )
print( type( test_set[0] ))
print( test_set[0].shape )
print( type( test_set[1]))
print( test_set[1].shape )
In [15]:
X = train_set[0].T
In [17]:
pd.DataFrame(X.T).describe()
Out[17]:
In [18]:
28*28
Out[18]:
In [94]:
X_i = theano.shared( X.astype("float32"))
In [95]:
m = X_i.get_value().shape[1]
In [45]:
a1 = T.stack( [ theano.shared( np.ones((1,m)).astype("float32") ) , X_i ] , axis=1 )
In [41]:
print( type(a1) )
#print( a1.get_scalar_constant_value() )
dir(a1)
a1.get_parents()
Out[41]:
In [46]:
a1.ndim
Out[46]:
In [96]:
a1_0 = theano.shared( np.ones((1,m)).astype("float32"),name='a1_0')
In [97]:
a1 = T.stack( [a1_0,X_i], axis=0)
In [98]:
d = X_i.get_value().shape[0]
s_2 = d/2
rng1 = np.random.RandomState(1234)
Theta1_values = np.asarray( rng1.uniform( low=-np.sqrt(6./(d+s_2)),high=np.sqrt(6./(d+s_2)),size=(s_2,d+1)),
dtype=theano.config.floatX)
Theta1 = theano.shared(value=Theta1_values, name="Theta",borrow=True)
In [99]:
#rng1.uniform( low=-np.sqrt(6./(d+s_2)),high=np.sqrt(6./(d+s_2)),size=(s_2,d+1))
z1 = T.dot( Theta1, a1)
a2 = T.tanh(z1)
In [100]:
passthru1 = theano.function( [], a2)
In [101]:
print(d)
passthru1()
In [108]:
print(X.shape)
X_i = theano.shared( X.astype("float32"))
#m = X_i.get_value().shape[1]
m = X.shape[1]
print(m)
a1_0 = theano.shared( np.ones((1,m)).astype("float32"),name='a1_0')
print(a1_0.get_value().shape)
a1 = T.stack( [a1_0,X_i], axis=0)
addintercept = theano.function([],a1)
In [109]:
addintercept()
In [131]:
d = X_i.get_value().shape[0]
print(d)
s_2 = d/2
print(s_2)
rng1 = np.random.RandomState(1234)
Theta1_values = np.asarray( rng1.uniform( low=-np.sqrt(6./(d+s_2)),high=np.sqrt(6./(d+s_2)),size=(s_2,d)),
dtype=theano.config.floatX)
Theta1 = theano.shared(value=Theta1_values, name="Theta1",borrow=True)
b_values = np.vstack( np.zeros(s_2) ).astype(theano.config.floatX)
b1 = theano.shared(value=b_values, name='b1',borrow=True)
a1_values=np.array( np.zeros( (d,m)), dtype=theano.config.floatX)
a1 = theano.shared(value=a1_values, name='a1', borrow=True)
lin_z2 = T.dot( Theta1, a1) + T.tile(b1,(1,m))
#lin_z2 = T.dot( Theta1, a1)
In [132]:
test_mult = theano.function([],lin_z2)
print( type(b_values))
b_values.dtype
Out[132]:
In [133]:
test_mult()
Out[133]:
In [126]:
print( b1.get_value().shape )
T.tile( b1, (0,m))
Out[126]:
In [13]:
import sys
import os
In [14]:
#sys.path.append( os.getcwd() + '/ML')
sys.path.append( os.getcwd() + '/ML' )
In [11]:
from NN import Layer, cost_functional, cost_functional_noreg, gradientDescent_step
Boilerplate sample data, from Coursera's Machine Learning Introduction
In [12]:
# Load Training Data
print("Loading and Visualizing Data ... \n")
ex4data1 = scipy.io.loadmat('./coursera_Ng/machine-learning-ex4/ex4/ex4data1.mat')
In [14]:
ex4data1.keys()
Out[14]:
In [16]:
print( ex4data1['X'].shape )
print( ex4data1['y'].shape )
In [13]:
test_rng = np.random.RandomState(1234)
#Theta1 = Layer( test_rng, 1, 400,25, 5000)
In [29]:
#help(Theta1.al.set_value); # Beginning with Theano 0.3.1, set_value will work in-place on the GPU, if ... source on CPU
Theta1.al.set_value( ex4data1['X'].T.astype(theano.config.floatX))
In [21]:
Theta1.alp1
Out[21]:
In [30]:
print( type( Theta1.alp1 ) )
Theta2 = Layer( test_rng, 2, 25,10,5000, al=Theta1.alp1 )
In [24]:
Theta2.alp1
Out[24]:
In [35]:
predicted = theano.function([],sandbox.cuda.basic_ops.gpu_from_host( Theta2.alp1 ) )
In [37]:
predicted().shape
Out[37]:
In [39]:
print( ex4data1['y'].shape )
pd.DataFrame( ex4data1['y']).describe()
Out[39]:
In [21]:
# recall that whereas the original labels (in the variable y) were 1, 2, ..., 10, for the purpose of training a
# neural network, we need to recode the labels as vectors containing only values 0 or 1
K=10
m = ex4data1['y'].shape[0]
y_prob = [np.zeros(K) for row in ex4data1['y']] # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
y_prob[i][ ex4data1['y'][i]-1] = 1
y_prob = np.array(y_prob).T.astype(theano.config.floatX) # size dims. (K,m)
print(y_prob.shape)
In [47]:
print( type(y_prob) )
type( np.asarray( y_prob, dtype=theano.config.floatX) )
Out[47]:
In [48]:
help( T.nlinalg.trace )
In [49]:
y_sh_var = theano.shared( np.asarray( y_prob,dtype=theano.config.floatX),name='y')
In [54]:
h_test = Theta2.alp1
J = sandbox.cuda.basic_ops.gpu_from_host(
(-T.nlinalg.trace( T.dot( T.log( h_test ), y_sh_var.T)) - T.nlinalg.trace(
T.dot( T.log( np.float32(1.)-h_test),(np.float32(1.)- y_sh_var.T ) )))/np.float32(m)
)
In [55]:
print(type(J))
test_cost_func = theano.function([],J)
In [56]:
test_cost_func()
Out[56]:
In [58]:
J_test_build = sandbox.cuda.basic_ops.gpu_from_host( -T.nlinalg.trace( T.dot( T.log(h_test),y_sh_var.T) ) )
test_cost_build_func = theano.function([], J_test_build)
In [59]:
test_cost_build_func()
Out[59]:
Sanity check using ex4.m
, Exercise 4 or Programming Exercise 4 from Coursera's Machine Learning Introduction by Ng
In [14]:
Theta_testvals = scipy.io.loadmat('./coursera_Ng/machine-learning-ex4/ex4/ex4weights.mat')
In [16]:
print( Theta_testvals.keys() )
print( Theta_testvals['Theta1'].shape )
print( Theta_testvals['Theta2'].shape )
Theta1_testval = Theta_testvals['Theta1'][:,1:]
b1_testval = Theta_testvals['Theta1'][:,0:1]
print( Theta1_testval.shape )
print( b1_testval.shape )
Theta2_testval = Theta_testvals['Theta2'][:,1:]
b2_testval = Theta_testvals['Theta2'][:,0:1]
print( Theta2_testval.shape )
print( b2_testval.shape )
In [15]:
Theta1 = Layer( test_rng, 1, 400,25, 5000, activation=T.nnet.sigmoid)
In [17]:
Theta1.Theta.set_value( Theta1_testval.astype("float32"))
Theta1.b.set_value( b1_testval.astype('float32') )
Theta1.al.set_value( ex4data1['X'].T.astype('float32'))
For $\Theta^{(2)}$, the key to connecting $\Theta^{(2)}$ with $\Theta^{(1)}$ is to set the argument in class Layer
with al=Theta1.alp1
,
In [18]:
Theta2 = Layer( test_rng, 2, 25,10,5000, al=Theta1.alp1 , activation=T.nnet.sigmoid)
In [19]:
Theta2.Theta.set_value( Theta2_testval.astype('float32'))
Theta2.b.set_value( b2_testval.astype('float32'))
In [107]:
h_test = Theta2.alp1
J = sandbox.cuda.basic_ops.gpu_from_host(
T.mean( T.sum(
- y_sh_var * T.log( h_test ) - ( np.float32( 1) - y_sh_var) * T.log( np.float32(1) - h_test), axis =0), axis=0)
)
#J = sandbox.cuda.basic_ops.gpu_from_host(
# T.log(h_test) * y_sh_var
# )
In [108]:
test_cost_func = theano.function([],J)
In [109]:
test_cost_func()
Out[109]:
In [80]:
print(type( y_sh_var) )
print( y_sh_var.get_value().shape )
print( type( h_test ))
In [94]:
checklayer2 = theano.function([], sandbox.cuda.basic_ops.gpu_from_host(Theta1.alp1))
In [102]:
checklayer2()
Out[102]:
In [112]:
testreg = theano.function([], T.sum( Theta1.Theta * Theta1.Theta ) )
In [113]:
testreg()
Out[113]:
In [114]:
range(1,3)
Out[114]:
In [115]:
Thetas_lst = [ Theta1.Theta, Theta2.Theta ]
In [118]:
T.sum( [ T.sum( theta*theta) for theta in Thetas_lst] )
Out[118]:
In [22]:
cost_func_test = cost_functional(3, 1, y_prob, Theta2.alp1, [Theta1.Theta, Theta2.Theta])
In [23]:
cost_test = theano.function([], cost_func_test)
In [24]:
cost_test() # (this value should be about 0.383770)
Out[24]:
In [26]:
grad_test = T.grad( cost_func_test,[Theta1.Theta, Theta2.Theta])
In [27]:
grad_test_test = theano.function([], grad_test)
In [31]:
print( type(grad_test_test() ) )
print( len( grad_test_test() ))
print( type(grad_test_test()[0] ))
print( grad_test_test()[0].shape )
print( grad_test_test()[1].shape )
In [41]:
print( range(6))
print( list( "Ernest") )
zip( range(6), list("Ernest"))
print( type(grad_test))
In [40]:
print( grad_test_test.maker.fgraph.toposort() )
In [43]:
0.01 * grad_test
In [44]:
test_update = [(Theta,sandbox.cuda.basic_ops.gpu_from_host( Theta - np.float32(0.01)*T.grad(cost_func_test, Theta)+0.0001*Theta ) ) for Theta in [Theta1.Theta, Theta2.Theta] ]
In [46]:
test_gradDes_step = theano.function( inputs=[], updates= test_update )
In [47]:
test_gradDes_step()
Out[47]:
In [52]:
print( Theta1.Theta.get_value() )
print( Theta2.Theta.get_value() )
In [53]:
test_gradDes_step()
Out[53]:
In [54]:
print( Theta1.Theta.get_value() )
print( Theta2.Theta.get_value() )
In [23]:
gradDes_test_res = gradientDescent_step(cost_func_test, [Theta1.Theta, Theta2.Theta], 0.01, 0.00001 )
In [25]:
print( type(gradDes_test_res) )
gradDes_step_test = gradDes_test_res[1]
In [26]:
gradDes_step_test()
Out[26]:
In [27]:
print( Theta1.Theta.get_value() )
print( Theta2.Theta.get_value() )
In [28]:
gradDes_step_test()
Out[28]:
In [29]:
print( Theta1.Theta.get_value() )
print( Theta2.Theta.get_value() )
In [30]:
y_prob.shape
Out[30]:
In [31]:
ex4data1['y'].shape
Out[31]:
In [51]:
pd.DataFrame( ex4data1['y']).describe()
Out[51]:
In [39]:
print( Theta2.alp1.shape )
print( Theta2.alp1.shape.ndim )
# Theta2.alp1.shape.get_scalar_constant_value()
predicted_logreg = theano.function([],Theta2.alp1)
In [42]:
pd.DataFrame( predicted_logreg().T ).describe()
Out[42]:
In [46]:
pd.DataFrame(predicted_logreg().T).describe().iloc[1:-1,:].plot()
Out[46]:
In [49]:
print( np.argmax( predicted_logreg(), axis=0).shape )
np.vstack( np.argmax( predicted_logreg(),axis=0) ).shape
Out[49]:
In [52]:
pd.DataFrame( np.vstack( np.argmax(predicted_logreg(),axis=0)) + 1).describe()
Out[52]:
In [55]:
res = np.float32( ( np.vstack( np.argmax( predicted_logreg(),axis=0)) + 1 ) == ex4data1['y'] )
pd.DataFrame(res).describe()
Out[55]:
In [56]:
range(1,3)
Out[56]:
In [57]:
predicted_logreg().shape
Out[57]:
In [64]:
print(y_prob.shape); print( np.argmax( y_prob,axis=0 ).shape)
In [9]:
sys.path.append( os.getcwd() + '/ML' )
In [10]:
from NN import Layer, cost_functional, cost_functional_noreg, gradientDescent_step, MLP
In [11]:
# Load Training Data
print("Loading and Visualizing Data ... \n")
ex4data1 = scipy.io.loadmat('./coursera_Ng/machine-learning-ex4/ex4/ex4data1.mat')
# recall that whereas the original labels (in the variable y) were 1, 2, ..., 10, for the purpose of training a
# neural network, we need to recode the labels as vectors containing only values 0 or 1
K=10
m = ex4data1['y'].shape[0]
y_prob = [np.zeros(K) for row in ex4data1['y']] # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
y_prob[i][ ex4data1['y'][i]-1] = 1
y_prob = np.array(y_prob).T.astype(theano.config.floatX) # size dims. (K,m)
print(ex4data1['X'].T.shape)
print(y_prob.shape)
In [12]:
digitsMLP = MLP(3,[400,25,10], 5000, ex4data1['X'].T, y_prob, T.nnet.sigmoid, 1., 0.1, 0.0000)
In [17]:
digitsMLP.train_model(100000)
In [18]:
digitsMLP.accuracy_log_reg()
Out[18]:
In [19]:
print( digitsMLP.Thetas[0].Theta.get_value() )
digitsMLP.Thetas[1].Theta.get_value()
Out[19]:
In [20]:
digitsMLP.predicted_vals_logreg()
Out[20]:
In [21]:
testL1a2 = theano.function([], digitsMLP.Thetas[0].alp1 )
print( testL1a2() )
testL2a2 = theano.function([], digitsMLP.Thetas[1].al )
print( testL2a2() )
In [33]:
[1,2,3,4,5] + [8,1,5]
Out[33]:
In [22]:
print( digitsMLP.y.shape )
y_cls_test = np.vstack( np.argmax( digitsMLP.y, axis=0) )
print( y_cls_test.shape )
pd.DataFrame( y_cls_test ).describe()
Out[22]:
In [23]:
pred_y_cls_test = np.vstack( np.argmax( digitsMLP.predicted_vals_logreg() , axis=0))
print( pred_y_cls_test.shape )
pd.DataFrame( pred_y_cls_test ).describe()
Out[23]:
In [24]:
np.mean( pred_y_cls_test == y_cls_test )
Out[24]:
In [27]:
K=10
m = len(train_set[1])
y_train_prob = [np.zeros(K) for row in train_set[1]] # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
y_train_prob[i][ train_set[1][i]] = 1
y_train_prob = np.array(y_train_prob).T.astype(theano.config.floatX) # size dims. (K,m)
print( y_train_prob.shape )
In [28]:
print( pd.DataFrame( y_train_prob).describe() )
In [29]:
m,d= train_set[0].shape
MNIST_MTL = MLP(3,[d,25,10], m, train_set[0].T, y_train_prob, T.nnet.sigmoid, 1., 0.1, 0.00001)
In [31]:
MNIST_MTL.accuracy_log_reg()
Out[31]:
In [32]:
print( MNIST_MTL.Thetas[0].Theta.get_value() )
MNIST_MTL.Thetas[1].Theta.get_value()
Out[32]:
In [33]:
MNIST_MTL.predicted_vals_logreg()
Out[33]:
In [34]:
MNIST_MTL.train_model(100000)
In [35]:
MNIST_MTL.accuracy_log_reg()
Out[35]:
In [36]:
print( MNIST_MTL.Thetas[0].Theta.get_value() )
MNIST_MTL.Thetas[1].Theta.get_value()
Out[36]:
In [37]:
MNIST_MTL.predicted_vals_logreg()
Out[37]:
Save the mode; cf. Getting Started, DeepLearning 0.1 documentation, Loading and Saving Models
In [38]:
import cPickle
In [40]:
save_file = open('./saved_models/MNIST_MTL_log_reg','wb')
In [41]:
for Thet in MNIST_MTL.Thetas:
cPickle.dump( Thet.Theta.get_value(borrow=True), save_file,-1) # the -1 is for HIGHEST priority
cPickle.dump( Thet.b.get_value(borrow=True), save_file,-1)
In [42]:
save_file.close()
In [50]:
MNIST_MTL.Thetas[0].al.set_value( valid_set[0].T.astype(theano.config.floatX) )
In [47]:
K=10
m = len(valid_set[1])
y_valid_prob = [np.zeros(K) for row in valid_set[1]] # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
y_valid_prob[i][ valid_set[1][i]] = 1
y_valid_prob = np.array(y_valid_prob).T.astype(theano.config.floatX) # size dims. (K,m)
print( y_valid_prob.shape )
In [48]:
MNIST_MTL.y = y_valid_prob
In [51]:
MNIST_MTL.predicted_vals_logreg()
In [53]:
theano.function([], MNIST_MTL.Thetas[0].alp1)()
In [56]:
Layer1 = MNIST_MTL.Thetas[0]
Layer2 = MNIST_MTL.Thetas[1]
m = valid_set[0].shape[0]
print(m)
In [60]:
a2 = T.nnet.sigmoid( T.dot( Layer1.Theta, Layer1.al) + T.tile( Layer1.b, (1,m)) )
a3 = T.nnet.sigmoid( T.dot( Layer2.Theta, a2) + T.tile( Layer2.b, (1,m)) )
valid_pred = theano.function([], a3)()
print( valid_pred.shape)
In [61]:
pd.DataFrame( valid_pred.T).describe()
Out[61]:
In [62]:
np.mean( np.vstack( np.argmax( valid_pred,axis=0)) == np.vstack( valid_set[1] ) )
Out[62]:
In [63]:
X_in = T.matrix()
In [64]:
X_in.set_value( valid_set[0].T.astype(theano.config.floatX))
In [68]:
a2_giv = T.nnet.sigmoid( T.dot( Layer1.Theta, X_in) + T.tile(Layer1.b, (1,m)))
a3_giv = T.nnet.sigmoid( T.dot( Layer2.Theta, a2_giv) + T.tile( Layer2.b, (1,m)) )
valid_pred_givens = theano.function([], outputs=a3_giv, givens={ X_in: valid_set[0].T.astype(theano.config.floatX)} )
In [72]:
print( valid_pred_givens().shape )
pd.DataFrame( valid_pred_givens().T).describe()
Out[72]:
In [78]:
np.mean( np.vstack( np.argmax( valid_pred_givens(),axis=0)) == np.vstack( valid_set[1] ) )
Out[78]:
In [79]:
test_pred_givens = theano.function([], outputs=a3_giv, givens={ X_in: test_set[0].T.astype(theano.config.floatX)} )
In [80]:
np.mean( np.vstack( np.argmax( test_pred_givens(),axis=0)) == np.vstack( test_set[1] ) )
Out[80]:
In [81]:
range(1,3)
Out[81]:
In [82]:
range(3)
Out[82]:
In [83]:
range(1,3-1)
Out[83]:
In [11]:
gls_data = pd.read_csv( "./kaggle/glass.csv")
In [12]:
gls_data.describe()
Out[12]:
In [16]:
gls_data.get_values().shape
Out[16]:
In [12]:
X_gls = gls_data.get_values()[:,:-1]
print(X_gls.shape)
y_gls = gls_data.get_values()[:,-1]
print(y_gls.shape)
print( y_gls[:10])
X_gls_train = gls_data.get_values()[:-14,:-1]
print(X_gls_train.shape)
y_gls_train = gls_data.get_values()[:-14,-1]
print(y_gls_train.shape)
In [29]:
K=7
m = len(y_gls_train)
y_gls_train_prob = [np.zeros(K) for row in y_gls_train] # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
y_gls_train_prob[i][ y_gls_train[i]-1] = 1
y_gls_train_prob = np.array(y_gls_train_prob).T.astype(theano.config.floatX) # size dims. (K,m)
print( y_gls_train_prob.shape )
In [42]:
gls_MLP = MLP( 3, [9,8,7],200, X_gls_train.T, y_gls_train_prob, T.nnet.sigmoid, 0.01,0.05,0.0001 )
In [43]:
gls_MLP.accuracy_log_reg()
Out[43]:
In [44]:
gls_MLP.train_model(10000)
In [45]:
gls_MLP.accuracy_log_reg()
Out[45]:
In [46]:
gls_MLP.predicted_vals_logreg()
Out[46]:
In [47]:
gls_MLP.train_model(10000)
gls_MLP.accuracy_log_reg()
Out[47]:
In [ ]:
ga
In [48]:
X_gls_test = gls_data.get_values()[-14:,:-1]
print( X_gls_test.shape )
y_gls_test = gls_data.get_values()[-14:,-1]
print( y_gls_test.shape)
In [49]:
gls_predict_on_test = gls_MLP.predict_on( 14, X_gls_test.T )
In [51]:
np.mean( np.vstack( np.argmax( gls_predict_on_test(), axis=0) ) == (y_gls_test-1) )
Out[51]:
In [52]:
y_gls_test
Out[52]:
In [53]:
np.vstack( np.argmax( gls_predict_on_test(), axis=0))
Out[53]:
In [17]:
X_sym = T.matrix()
In [33]:
rng = np.random.RandomState(1234)
Thetab1 = Layer( rng, 1, 4,3,2, al = X_sym, activation=T.nnet.sigmoid)
In [34]:
Thetab1.alp1
Thetab1.Theta.get_value().shape
Out[34]:
In [35]:
Thetab2 = Layer( rng, 2, 3,2,2, al=Thetab1.alp1, activation=T.nnet.sigmoid)
In [36]:
Thetab2.al = Thetab1.alp1
In [30]:
X_sym.shape[0]
Out[30]:
In [31]:
T.tile( Thetab1.b, (1, X_sym.shape[0]))
Out[31]:
In [42]:
test12comp = theano.function( [], outputs=Thetab2.alp1, givens={ X_sym : X42test} )
In [40]:
X42test = np.array([1,2,3,4,5,6,7,8]).reshape((4,2)).astype(theano.config.floatX)
In [43]:
test12comp()
Out[43]:
In [44]:
X43test = np.array(range(1,13)).reshape((4,3)).astype(theano.config.floatX)
In [45]:
X43test
Out[45]:
In [46]:
test43comp = theano.function( [], outputs=Thetab2.alp1, givens={ X_sym : X43test} )
In [47]:
test43comp()
In [50]:
print( type(Thetab1.al ))
In [52]:
lin_zlp1 = T.dot(Thetab1.Theta, Thetab1.al)+T.tile( Thetab1.b, (1,Thetab1.al.shape[1]))
a1p1 = Thetab1.g( lin_zlp1 )
In [51]:
Thetab1.al = X_sym
In [53]:
Thetab2.al = a1p1
In [54]:
lin_z2p1 = T.dot(Thetab2.Theta, Thetab2.al)+T.tile( Thetab2.b, (1, Thetab2.al.shape[1]))
a2p1 = Thetab2.g( lin_z2p1 )
In [55]:
test_gen_conn = theano.function([], outputs=a2p1, givens={ Thetab1.al : X42test })
In [56]:
test_gen_conn()
Out[56]:
In [57]:
test_gen_conn = theano.function([], outputs=a2p1, givens={ Thetab1.al : X43test })
In [58]:
test_gen_conn()
Out[58]:
GPU test
In [59]:
test_gen_conn = theano.function([], outputs=sandbox.cuda.basic_ops.gpu_from_host(a2p1), givens={ Thetab1.al : X42test })
In [60]:
test_gen_conn()
Out[60]:
In [61]:
test_gen_conn = theano.function([], outputs=sandbox.cuda.basic_ops.gpu_from_host(a2p1), givens={ Thetab1.al : X43test })
In [62]:
test_gen_conn()
Out[62]:
In [9]:
sys.path.append( os.getcwd() + '/ML' )
In [10]:
from NN import MLP
In [11]:
# Load Training Data
print("Loading and Visualizing Data ... \n")
ex4data1 = scipy.io.loadmat('./coursera_Ng/machine-learning-ex4/ex4/ex4data1.mat')
# recall that whereas the original labels (in the variable y) were 1, 2, ..., 10, for the purpose of training a
# neural network, we need to recode the labels as vectors containing only values 0 or 1
K=10
m = ex4data1['y'].shape[0]
y_prob = [np.zeros(K) for row in ex4data1['y']] # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
y_prob[i][ ex4data1['y'][i]-1] = 1
y_prob = np.array(y_prob).T.astype(theano.config.floatX) # size dims. (K,m)
print(ex4data1['X'].T.shape)
print(y_prob.shape)
In [12]:
digitsMLP = MLP( 3, [400,25,10], ex4data1['X'].T, y_prob, T.nnet.sigmoid, 1.)
In [13]:
digitsMLP.build_update(ex4data1['X'].T, y_prob, 0.01, 0.00001)
In [14]:
digitsMLP.predicted_vals_logreg()
Out[14]:
In [15]:
digitsMLP.accuracy_logreg( ex4data1['X'].T, y_prob)
Out[15]:
In [16]:
digitsMLP.train_model(10000)
In [17]:
digitsMLP.accuracy_logreg( ex4data1['X'].T, y_prob)
Out[17]:
In [18]:
digitsMLP.train_model(50000)
In [19]:
digitsMLP.accuracy_logreg( ex4data1['X'].T, y_prob)
Out[19]:
In [20]:
import gzip
import six.moves.cPickle as pickle
with gzip.open("../DeepLearningTutorials/data/mnist.pkl.gz", 'rb') as f:
try:
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
except:
train_set, valid_set, test_set = pickle.load(f)
In [21]:
K=10
m = len(train_set[1])
y_train_prob = [np.zeros(K) for row in train_set[1]] # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
y_train_prob[i][ train_set[1][i]] = 1
y_train_prob = np.array(y_train_prob).T.astype(theano.config.floatX) # size dims. (K,m)
print( y_train_prob.shape )
In [22]:
MNIST_MLP = MLP( 3,[784,49,10], train_set[0].T, y_train_prob, T.nnet.sigmoid, 1.)
In [23]:
MNIST_MLP.build_update( train_set[0].T, y_train_prob, 0.01, 0.0001)
In [24]:
MNIST_MLP.accuracy_logreg( train_set[0].T, y_train_prob)
Out[24]:
In [25]:
MNIST_MLP.train_model(50000)
In [26]:
MNIST_MLP.accuracy_logreg( train_set[0].T, y_train_prob)
Out[26]:
In [30]:
%time MNIST_MLP.train_model(100000)
In [31]:
MNIST_MLP.accuracy_logreg( train_set[0].T,y_train_prob)
Out[31]:
In [32]:
m = len(valid_set[1])
y_valid_prob = [np.zeros(K) for row in valid_set[1]] # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
y_valid_prob[i][ valid_set[1][i]] = 1
y_valid_prob = np.array(y_valid_prob).T.astype(theano.config.floatX) # size dims. (K,m)
print( y_valid_prob.shape )
In [33]:
m = len(test_set[1])
y_test_prob = [np.zeros(K) for row in test_set[1]] # list of 5000 numpy arrays of size dims. (10,)
for i in range( m):
y_test_prob[i][ test_set[1][i]] = 1
y_test_prob = np.array(y_test_prob).T.astype(theano.config.floatX) # size dims. (K,m)
print( y_test_prob.shape )
In [34]:
MNIST_MLP.accuracy_logreg( valid_set[0].T,y_valid_prob)
Out[34]:
In [35]:
MNIST_MLP.accuracy_logreg( test_set[0].T,y_test_prob)
Out[35]:
In [40]:
MNIST_d = train_set[0].T.shape[0]
print(MNIST_d)
MNIST_MLP = MLP( 3,[MNIST_d,25,10], train_set[0].T, y_train_prob, T.nnet.sigmoid, 1.)
MNIST_MLP.build_update( train_set[0].T, y_train_prob, 0.1, 0.00001)
In [41]:
MNIST_MLP.accuracy_logreg( train_set[0].T, y_train_prob)
Out[41]:
In [42]:
MNIST_MLP.train_model(150000)
In [43]:
MNIST_MLP.accuracy_logreg( train_set[0].T, y_train_prob)
Out[43]:
In [44]:
MNIST_MLP.accuracy_logreg( valid_set[0].T, y_valid_prob)
Out[44]:
In [45]:
MNIST_MLP.accuracy_logreg( test_set[0].T, y_test_prob)
Out[45]:
In [ ]: