I ran this at the command prompt
THEANO_FLAGS='mode=FAST_RUN,device=gpu,floatX=float32,lib.cnmem=1,allow_gc=False' jupyter notebook
In [1]:
%matplotlib inline
In [2]:
import theano
In [3]:
from theano import function, config, sandbox, shared
import theano.tensor as T
In [4]:
print( theano.config.device )
print( theano.config.lib.cnmem) # cf. http://deeplearning.net/software/theano/library/config.html
print( theano.config.print_active_device)# Print active device at when the GPU device is initialized.
In [5]:
print(theano.config.allow_gc)
print(theano.config.optimizer_excluding)
In [6]:
import numpy as np
import scipy
In [7]:
import sys
sys.path.append( './ML' )
In [8]:
from SVM import SVM, SVM_serial, SVM_parallel
In [9]:
import pandas as pd
In [10]:
X = np.random.randn(300,2)
y = np.logical_xor(X[:,0] > 0, X[:,1] > 0)
In [10]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
Load and prepare data set
dataset for grid search
In [11]:
iris = load_iris()
X = iris.data
y = iris.target
In [12]:
# Dataset for decision function visualization: we only keep the first two
# features in X and sub-sample the dataset to keep only 2 classes and
# make it a binary classification problem
X_2d = X[:,:2]
X_2d=X_2d[y>0]
y_2d=y[y>0]
y_2d -= 1
In [13]:
# It is usually a good idea to scale the data for SVM training.
# We are cheating a bit in this example in scaling all of the data,
# instead of fitting the transformation on the training set and
# just applying it on the test set.
scaler = StandardScaler()
X= scaler.fit_transform(X)
X_2d=scaler.fit_transform(X_2d)
In [14]:
print(type(X)); print(X.shape); print(type(X_2d));print(X_2d.shape);print(type(y));print(y.shape);
print(type(y_2d));print(y_2d.shape)
In [15]:
ratio_of_train_to_total = 0.6
numberofexamples = len(y_2d)
numberoftrainingexamples = int(numberofexamples*ratio_of_train_to_total)
numbertovalidate = (numberofexamples - numberoftrainingexamples)/2
numbertotest= numberofexamples - numberoftrainingexamples - numbertovalidate
print(numberofexamples);print(numbertotest);print(numberoftrainingexamples);print(numbertovalidate)
In [16]:
shuffledindices = np.random.permutation( numberofexamples)
In [17]:
X_2d_train = X_2d[:numberoftrainingexamples]
y_2d_train = y_2d[:numberoftrainingexamples]
X_2d_valid = X_2d[numberoftrainingexamples:numberoftrainingexamples + numbertovalidate]
y_2d_valid = y_2d[numberoftrainingexamples:numberoftrainingexamples + numbertovalidate]
X_2d_test = X_2d[numberoftrainingexamples + numbertovalidate:]
y_2d_test = y_2d[numberoftrainingexamples + numbertovalidate:]
Clarke, Fokoue, and Zhang in Principles and Theory for Data Mining and Machine Learning (2009) and Bishop, Pattern Recognition and Machine Learning (2007) both, for support vector machines, for the case of binary classification, has $y\in \lbrace -1, 1\rbrace$, as opposed to $y\in \lbrace 0,1 \rbrace$ for $K=2$ total number of classes that outcome $y$ could belong to. Should this be made more explicit, noted more prominently, in practice?
In [39]:
y_2d_train
Out[39]:
In [18]:
y_2d_train[y_2d_train < 1] = -1
In [19]:
print(y_2d_train.shape);print(y_2d_train)
In [20]:
y_2d_valid[y_2d_valid < 1] = -1
In [21]:
y_2d_test[y_2d_test < 1] = -1
In [19]:
where_ex6_is_str = './coursera_Ng/machine-learning-ex6/ex6/'
ex6data1_mat_data = scipy.io.loadmat( where_ex6_is_str + "ex6data1.mat")
In [18]:
SVM_iris = SVM(X_2d_train,y_2d_train,len(y_2d_train),1.0,1,0.001)
In [19]:
SVM_iris.build_W();
.build_update might take a while for FAST_COMPILE (that flag command that's typed in before the notebook starts for theano)
In [20]:
SVM_iris.build_update();
In [21]:
SVM_iris.train_model_full();
In [22]:
SVM_iris.build_b();
In [25]:
SVM_iris.make_predict(X_2d_valid[0])
Out[25]:
In [23]:
SVM_iris.make_predictions(X_2d_valid)
Out[23]:
In [24]:
X_2d_test
Out[24]:
In [25]:
y_2d_test
Out[25]:
In [26]:
y_test_pred= SVM_iris.make_predictions(X_2d_test)
In [30]:
np.array( [np.array(yhat) for yhat in y_test_pred] )
Out[30]:
In [26]:
y_valid_pred = [ SVM_iris.make_predict(X_2d_valid_ele) for X_2d_valid_ele in X_2d_valid ]
In [27]:
y_valid_pred = [y_valid_pred_ele[0] for y_valid_pred_ele in y_valid_pred]
In [28]:
y_valid_pred = np.array( y_valid_pred).flatten()
In [30]:
#y_valid_pred[ y_valid_pred>0 ] = 1
#y_valid_pred[ y_valid_pred<0 ] = -1
y_valid_pred = np.sign( y_valid_pred)
In [31]:
(y_2d_valid == y_valid_pred).astype(theano.config.floatX).sum()/len(y_valid_pred)
Out[31]:
In [29]:
y_valid_pred
Out[29]:
In [30]:
y_2d_valid
Out[30]:
In [46]:
SVM_iris_X
In [65]:
SVM_iris = SVM(X_2d_train,y_2d_train,len(y_2d_train),0.1,1.0,0.001)
In [66]:
SVM_iris.build_W();
SVM_iris.build_update();
SVM_iris.train_model_full();
SVM_iris.build_b();
In [67]:
y_valid_pred = np.array( [ SVM_iris.make_predict(X_2d_valid_ele)[0] for X_2d_valid_ele in X_2d_valid ] ).flatten()
In [68]:
y_valid_pred[ y_valid_pred>0 ] = 1
y_valid_pred[ y_valid_pred<0 ] = -1
In [69]:
(y_2d_valid == y_valid_pred).astype(theano.config.floatX).sum()/len(y_valid_pred)
Out[69]:
In [70]:
SVM_iris = SVM(X_2d_train,y_2d_train,len(y_2d_train),0.1,0.1,0.001)
In [71]:
SVM_iris.build_W();
SVM_iris.build_update();
SVM_iris.train_model_full();
SVM_iris.build_b();
In [72]:
y_valid_pred = np.array( [ SVM_iris.make_predict(X_2d_valid_ele)[0] for X_2d_valid_ele in X_2d_valid ] ).flatten()
In [73]:
y_valid_pred[ y_valid_pred>0 ] = 1
y_valid_pred[ y_valid_pred<0 ] = -1
In [74]:
(y_2d_valid == y_valid_pred).astype(theano.config.floatX).sum()/len(y_valid_pred)
Out[74]:
In [75]:
SVM_iris = SVM(X_2d_train,y_2d_train,len(y_2d_train),0.01,0.1,0.001)
In [76]:
SVM_iris.build_W();
SVM_iris.build_update();
SVM_iris.train_model_full();
SVM_iris.build_b();
In [77]:
y_valid_pred = np.array( [ SVM_iris.make_predict(X_2d_valid_ele)[0] for X_2d_valid_ele in X_2d_valid ] ).flatten()
In [78]:
y_valid_pred[ y_valid_pred>0 ] = 1
y_valid_pred[ y_valid_pred<0 ] = -1
In [79]:
(y_2d_valid == y_valid_pred).astype(theano.config.floatX).sum()/len(y_valid_pred)
Out[79]:
In [ ]:
In [53]:
m_val = np.cast["int32"](X.shape[0])
Xi = theano.shared( np.zeros_like(X[0],dtype=theano.config.floatX) )
X = theano.shared( np.zeros_like(X,dtype=theano.config.floatX) )
y = theano.shared( np.random.randint(2,size=m_val))
yi = theano.shared( np.cast["int32"]( np.random.randint(2)) )
m = theano.shared( m_val )
lambda_mult = theano.shared( np.zeros(m_val).astype(theano.config.floatX) ) # lambda Lagrange multipliers
In [63]:
Xi.set_value( X[np.int32(1)] )
In [41]:
np.random.randint(2,size=4)
Out[41]:
In [46]:
np.random.randint(2)
Out[46]:
In [67]:
X = np.random.randn(300,2)
y = np.logical_xor(X[:,0] > 0, X[:,1] > 0)
In [70]:
def rbf(Xi,Xj,sigma):
""" rbf - radial basis function"""
kernel_result = T.exp( -( (Xi-Xj)**2).sum()/ ( np.float32(2*sigma) )
return kernel_result
class SVM(object):
""" SVM - Support Vector Machines
"""
def __init__(self,X,y,m,C,sigma,alpha):
assert m == X.shape[0] and m == y.shape[0]
self.C = np.float32(C)
self.sigma = np.float32(sigma)
self.alpha = np.float32(alpha)
self._m = theano.shared( np.int32(m))
# self._Xi = theano.shared( X[0].astype(theano.config.floatX) )
self.X = theano.shared( X.astype(theano.config.floatX) )
self.y = theano.shared( y.astype(theano.config.floatX) )
# self._yi = theano.shared( y[0].astype(theano.config.floatX) )
self.lambda_mult = theano.shared( np.random.rand(m).astype(theano.config.floatX) ) # lambda Lagrange multipliers
def build_W(self):
m = self._m.get_value()
X = self.X
y = self.y
lambda_mult = self.lambda_mult
def dual_step(Xj,yj,lambdaj, # input sequences we iterate over j=0,1,...m-1
cumulative_sum, # previous iteration
prodi,Xi,sigma): # non-sequences that aren't iterated over
prodj = prodi*lambdaj*yj*rbf(Xi,Xj,sigma)
return prodj +
for i in range(m):
Xi = self.X[i]
yi = self.y[i]
lambdai = self.lambda_mult[i]
prodi = lambdai*yi
theano.scan(fn=dual_step,
sequences=[X,y,lambda_mult],
non_sequences=[prodi,Xi,sigma])
In [69]:
y[0].astype(theano.config.floatX)
Out[69]:
In [74]:
test_SVM = SVM(X,y,len(y),1.,0.1,0.01)
In [80]:
range(test_SVM._m.get_value());
In [77]:
np.random.rand(4)
Out[77]:
In [81]:
test_SVM.X
Out[81]:
In [9]:
m=4
d=2
X_val=np.arange(2,m*d+2).reshape(m,d).astype(theano.config.floatX)
X=theano.shared( X_val)
y_val=np.random.randint(2,size=m).astype(theano.config.floatX)
y=theano.shared( y_val )
lambda_mult_val = np.random.rand(m).astype(theano.config.floatX)
lambda_mult = theano.shared( lambda_mult_val ) # lambda Lagrange multipliers
sigma_val = 2.0
sigma = theano.shared( np.float32(sigma_val))
In [11]:
np.random.randint(2,size=4)
Out[11]:
In [13]:
X[1]
Out[13]:
In [14]:
np.random.rand(4)
Out[14]:
In [ ]:
#lambda_mult = theano.shared( np.zeros(m_val).astype(theano.config.floatX) ) # lambda Lagrange multipliers
In [16]:
prodi = lambda_mult[1]*y[1]
In [41]:
sigma=0.5
def step(Xj,Xi):
rbf = T.exp(-(Xj-Xi)**2/(np.float32(2.*sigma**2)))
return sandbox.cuda.basic_ops.gpu_from_host(rbf)
In [42]:
output,update=theano.scan(fn=step, sequences=[X,],non_sequences=[X[1],])
In [43]:
test_rbf = theano.function(inputs=[],outputs=output,updates=update )
In [44]:
print(test_rbf().shape)
test_rbf()
Out[44]:
In [45]:
#Check
prodi_val = lambda_mult_val[1]*y_val[1]
In [47]:
for j in range(4):
print( np.exp(-((X_val[j]-X_val[1])**2).sum(0)/(np.float32(2.*sigma**2))) )
In [48]:
X_val
Out[48]:
In [39]:
X_val[3]
Out[39]:
In [49]:
prodi = lambda_mult[0]*y[0]
In [55]:
sigma=0.5
def step(Xj,yj,lambda_multj,Xi):
rbf = lambda_multj*yj*T.exp(-((Xj-Xi)**2).sum()/(np.float32(2.*sigma**2)))
return sandbox.cuda.basic_ops.gpu_from_host(rbf)
In [56]:
output,update=theano.scan(fn=step, sequences=[X,y,lambda_mult],non_sequences=[X[0],])
In [57]:
test_rbf = theano.function(inputs=[],outputs=output,updates=update )
In [58]:
print(test_rbf().shape)
test_rbf()
Out[58]:
In [59]:
sigma=0.5
def rbf(Xj,Xi,sigma):
rbf = T.exp(-((Xj-Xi)**2).sum()/(np.float32(2.*sigma**2)))
return rbf
def step(Xj,yj,lambda_multj,Xi,yi,lambda_multi):
# W_i = lambda_multi*yi*lambda_multj*yj*T.exp(-((Xj-Xi)**2).sum()/(np.float32(2.*sigma**2)))
W_i = lambda_multi*yi*lambda_multj*yj*rbf(Xj,Xi,sigma)
return W_i
In [60]:
output,update=theano.scan(fn=step, sequences=[X,y,lambda_mult],non_sequences=[X[0],y[0],lambda_mult[0]])
In [61]:
test_rbf = theano.function(inputs=[],outputs=output,updates=update )
In [62]:
test_rbf()
Out[62]:
In [63]:
output1,update1=theano.scan(fn=step, sequences=[X,y,lambda_mult],non_sequences=[X[1],y[1],lambda_mult[1]])
In [66]:
test_rbf1 = theano.function(inputs=[],outputs=output1,updates=update1 )
In [67]:
test_rbf1()
Out[67]:
In [69]:
test_rbf = theano.function(inputs=[],outputs=output+output1 )
In [70]:
test_rbf()
Out[70]:
In [71]:
output,update=theano.scan(fn=step, sequences=[X,y,lambda_mult],non_sequences=[X[0],y[0],lambda_mult[0]])
In [74]:
updates=[update,]
In [75]:
for i in range(1,4):
outputi,updatei=theano.scan(fn=step, sequences=[X,y,lambda_mult],non_sequences=[X[i],y[i],lambda_mult[i]])
output += outputi
updates.append(update)
In [76]:
test_rbf = theano.function(inputs=[],outputs=output )
In [77]:
test_rbf()
Out[77]:
In [81]:
sigma=1.
In [82]:
for j in range(4):
print( np.exp(-((X_val[j]-X_val[0])**2).sum()/(np.float32(2.*sigma**2))) )
In [83]:
X_val
Out[83]:
In [84]:
np.sum( [ np.exp(-((X_val[j]-X_val[0])**2).sum()/(np.float32(2.*sigma**2))) for j in range(4)])
Out[84]:
In [85]:
def step(Xj,Xi):
rbf = T.exp(-((Xj-Xi)**2).sum()/(np.float32(2.*sigma**2)))
return rbf
In [86]:
output,update=theano.scan(fn=step, sequences=[X,],non_sequences=[X[0],])
In [87]:
test_rbf = theano.function(inputs=[],outputs=output,updates=update )
In [88]:
test_rbf()
Out[88]:
In [107]:
def step(Xj,Xi):
rbf = T.exp(-((Xj-Xi)**2).sum()/(np.float32(2.*sigma**2)))
return rbf
In [108]:
output,update=theano.scan(fn=step, sequences=[X],outputs_info=[None,],non_sequences=[X[0]])
In [109]:
test_rbf = theano.function(inputs=[],outputs=output,updates=update )
In [110]:
test_rbf()
Out[110]:
In [113]:
output,update=theano.reduce(fn=step, sequences=[X],outputs_info=[None,],non_sequences=[X[0]])
test_rbf = theano.function(inputs=[],outputs=output,updates=update )
test_rbf()
Out[113]:
In [114]:
def step(Xj,cumulative_sum,Xi):
rbf = T.exp(-((Xj-Xi)**2).sum()/(np.float32(2.*sigma**2)))
return cumulative_sum + rbf
In [116]:
W_i0 = theano.shared( np.float32(0.))
In [117]:
output,update=theano.scan(fn=step, sequences=[X],outputs_info=[W_i0,],non_sequences=[X[0]])
In [118]:
test_rbf = theano.function(inputs=[],outputs=output,updates=update )
In [119]:
test_rbf()
Out[119]:
In [120]:
# Also this works:
output,update=theano.reduce(fn=step, sequences=[X],outputs_info=[W_i0,],non_sequences=[X[0]])
test_rbf = theano.function(inputs=[],outputs=output,updates=update )
test_rbf()
Out[120]:
In [125]:
sigma=0.5
def rbf(Xj,Xi,sigma):
rbf = T.exp(-((Xj-Xi)**2).sum()/(np.float32(2.*sigma**2)))
return rbf
def step(Xj,yj,lambda_multj,cumulative_sum, Xi,yi,lambda_multi):
W_i = lambda_multi*yi*lambda_multj*yj*rbf(Xj,Xi,sigma)
return cumulative_sum + W_i
In [128]:
W_00 = theano.shared( np.float32(0.))
output,update=theano.reduce(fn=step, sequences=[X,y,lambda_mult],outputs_info=[W_00],
non_sequences=[X[0],y[0],lambda_mult[0]])
updates=[update,]
In [133]:
for i in range(1,m):
W_i0 = theano.shared( np.float32(0.))
outputi,updatei=theano.reduce(fn=step, sequences=[X,y,lambda_mult],
outputs_info=[W_i0],
non_sequences=[X[i],y[i],lambda_mult[i]])
output += outputi
updates.append(update)
In [134]:
test_rbf = theano.function(inputs=[],outputs=output )
In [135]:
test_rbf()
Out[135]:
In [138]:
#sanity check
cum_sum_val=0.
for i in range(m):
toadd=np.sum([lambda_mult_val[i]*y_val[i]*lambda_mult_val[j]*y_val[j]*np.exp(-((X_val[j]-X_val[i])**2).sum()/(np.float32(2.*sigma**2))) for j in range(4)])
cum_sum_val += toadd
print(cum_sum_val)
In [11]:
test_SVM=SVM(X_val,y_val,m,1.0,2.0,0.01)
In [14]:
test_f= theano.function( inputs=[], outputs=T.dot( test_SVM.y, test_SVM.lambda_mult))
In [15]:
test_f()
Out[15]:
In [17]:
test_f= theano.function( inputs=[], outputs=T.dot( test_SVM.y, test_SVM.y ))
In [18]:
test_f()
Out[18]:
In [19]:
test_SVM.y.get_value()
Out[19]:
In [20]:
theano.ifelse( T.lt(test_SVM.y,np.float32(0)), np.float32(0), test_SVM.y )
In [25]:
lower_bound = theano.shared( np.float32(0.) )
theano.ifelse.ifelse( T.lt(test_SVM.y, lower_bound), lower_bound, test_SVM.y )
In [35]:
lower_bound = theano.shared( np.float32(0.5) )
#lower_bound_check=T.switch( T.lt(test_SVM.y, lower_bound), lower_bound, test_SVM.y )
lower_bound_check=T.switch( T.lt(test_SVM.y, lower_bound), test_SVM.y, lower_bound )
test_f=theano.function(inputs=[],outputs=lower_bound_check)
In [36]:
test_f()
Out[36]:
In [37]:
np.ndarray(5)
Out[37]:
In [31]:
dir(scipy);
In [11]:
with open("./Data/train.1",'rb') as f:
train_1_lst = f.readlines()
f.close()
# strip of '\n'
train_1_lst = [x.strip() for x in train_1_lst]
print(len(train_1_lst))
In [12]:
train_1_lst=[line.replace('1:','').replace('2:','').replace('3:','').replace('4:','') for line in train_1_lst]
In [13]:
train_1_lst=[line.split() for line in train_1_lst]
train_1_arr=np.array( [[float(ele) for ele in line] for line in train_1_lst] )
In [14]:
train_1_y=train_1_arr[:,0]
train_1_X=train_1_arr[:,1:]
In [15]:
print(train_1_y.shape)
print(train_1_X.shape)
In [69]:
with open("./Data/test.1",'rb') as f:
test_1_lst = f.readlines()
f.close()
# strip of '\n'
test_1_lst = [x.strip() for x in test_1_lst]
print(len(test_1_lst))
test_1_lst=[line.replace('1:','').replace('2:','').replace('3:','').replace('4:','') for line in test_1_lst]
test_1_lst=[line.split() for line in test_1_lst]
test_1_arr=np.array( [[float(ele) for ele in line] for line in test_1_lst] )
test_1_y=test_1_arr[:,0]
test_1_X=test_1_arr[:,1:]
In [11]:
with open("./Data/train.3",'rb') as f:
train_3_lst = f.readlines()
f.close()
# strip of '\n'
train_3_lst = [x.strip() for x in train_3_lst]
print(len(train_3_lst))
train_3_lst=[line.replace('1:','').replace('2:','').replace('3:','').replace('4:','').replace('5:','').replace('6:','').replace('7:','').replace('8:','').replace('9:','').replace('10:','').replace('11:','').replace('12:','').replace('13:','').replace('14:','').replace('15:','').replace('16:','').replace('17:','').replace('18:','').replace('19:','').replace('20:','').replace('21:','').replace('22:','') for line in train_3_lst]
train_3_lst=[line.split() for line in train_3_lst]
train_3_DF=pd.DataFrame( train_3_lst)
In [12]:
train_3_y = train_3_DF[0].as_matrix().astype(theano.config.floatX)
train_3_X = train_3_DF.ix[:,1:].as_matrix().astype(theano.config.floatX)
print(train_3_X.shape)
In [52]:
ratiotraintotot = 0.2
numberofexamples1 = len(train_1_y)
numberoftrain1 = int( numberofexamples1 * ratiotraintotot )
numberofvalid1 = numberofexamples1 - numberoftrain1
In [19]:
shuffled_idx = np.random.permutation(numberofexamples1)
In [53]:
train1_idx = shuffled_idx[:numberoftrain1]
valid1_idx = shuffled_idx[numberoftrain1:]
In [21]:
from sklearn.svm import SVC
In [22]:
clf=SVC()
In [54]:
clf.fit(train_1_X[train1_idx],train_1_y[train1_idx])
Out[54]:
In [55]:
(clf.predict(train_1_X[valid1_idx]) == train_1_y[valid1_idx]).astype(theano.config.floatX).sum()/len(valid1_idx)
Out[55]:
In [76]:
(clf.predict(test_1_X) == test_1_y).astype(theano.config.floatX).sum()/float(len(test_1_y))
Out[76]:
In [25]:
pd.DataFrame(train_1_X).describe()
Out[25]:
In [26]:
scaler = StandardScaler()
train_1_X_scaled = scaler.fit_transform(train_1_X)
In [27]:
pd.DataFrame(train_1_X_scaled).describe()
Out[27]:
In [28]:
pd.DataFrame(train_1_y).describe()
Out[28]:
In [28]:
train_1_y[ train_1_y < 1] = -1
In [29]:
len(train1_idx)
Out[29]:
In [56]:
SVM_1 = SVM_parallel(train_1_X_scaled[train1_idx],train_1_y[train1_idx],len(train_1_y[train1_idx]),1.0,1.,0.001)
In [57]:
SVM_1.build_W();
In [58]:
SVM_1.build_update()
Out[58]:
In [59]:
SVM_1.train_model_full()
SVM_1.build_b()
Out[59]:
In [35]:
In [60]:
#yhat_parallel = SVM_1.make_predictions(train_1_X_scaled[valid1_idx]) ;
yhat_parallel = SVM_1.make_predictions_parallel(train_1_X_scaled[valid1_idx[:300]]) ;
In [36]:
yhat_parallel_2 = SVM_1.make_predictions_parallel(train_1_X_scaled[valid1_idx[:100]]) ;
In [61]:
yhat_parallel[0].shape
Out[61]:
In [37]:
yhat_parallel_2
Out[37]:
In [62]:
yhat = np.sign( yhat_parallel[0])
In [63]:
#(yhat == train_1_y[valid1_idx[:100]]).sum()/float(len(train_1_y[valid1_idx[:100]]))
(yhat == train_1_y[valid1_idx[:300]]).sum()/float(len(train_1_y[valid1_idx[:300]]))
Out[63]:
In [64]:
len(valid1_idx)
Out[64]:
In [65]:
yhat_1000 = SVM_1.make_predictions_parallel(train_1_X_scaled[valid1_idx[:1000]]) ;
In [67]:
yhat_1000 = np.sign( yhat_1000[0])
In [68]:
(yhat_1000 == train_1_y[valid1_idx[:1000]]).sum()/float(len(train_1_y[valid1_idx[:1000]]))
Out[68]:
In [70]:
test_1_X_scaled = scaler.transform(test_1_X)
In [71]:
yhat_test = SVM_1.make_predictions_parallel(test_1_X_scaled) ;
In [73]:
yhat_test = np.sign( yhat_test[0])
In [74]:
(yhat_test == test_1_y).sum()/float(len(test_1_y))
Out[74]:
In [42]:
train_1_y[valid1_idx[:100]]
Out[42]:
So other people have this same problem too with Python, inherently with Python: https://github.com/Theano/Theano/issues/689
In [46]:
import sys
In [34]:
sys.getrecursionlimit()
Out[34]:
In [40]:
sys.setrecursionlimit(50000)
In [41]:
sys.getrecursionlimit()
Out[41]:
In [ ]:
yhat_valid = SVM_1.make_predictions(train_1_X_scaled[valid1_idx])
In [79]:
SVM_1 = SVM_parallel(train_1_X_scaled,train_1_y,len(train_1_y),2.0,1.,0.01)
In [80]:
SVM_1.build_W();
SVM_1.build_update();
In [81]:
SVM_1.train_model_full(100) # 8 hours
SVM_1.build_b()
Out[81]:
In [82]:
yhat_test = SVM_1.make_predictions_parallel(test_1_X_scaled) ;
In [83]:
yhat_test = np.sign( yhat_test[0])
In [88]:
(yhat_test == test_1_y).sum()/float(len(test_1_y))
Out[88]:
In [85]:
test_1_y
Out[85]:
In [87]:
test_1_y[ test_1_y < 1] = -1
In [86]:
yhat_test[ ]
Out[86]:
In [90]:
# SVC
clf=SVC(C=2.0,gamma=2.0)
clf.fit(train_1_X_scaled,train_1_y)
Out[90]:
In [91]:
(clf.predict(test_1_X_scaled) == test_1_y).sum()/float(len(test_1_y))
Out[91]:
In [ ]:
SVM_1_C2 = SVM_1
In [102]:
SVM_1 = SVM_parallel(train_1_X_scaled,train_1_y,len(train_1_y),2.0,0.25,0.001)
In [103]:
SVM_1.build_W();
SVM_1.build_update();
In [104]:
%time SVM_1.train_model_full(10) # CPU times: user 43min 45s, sys: 1min 10s, total: 44min 56s
#Wall time: 44min 54s
SVM_1.build_b()
Out[104]:
In [105]:
yhat_test = SVM_1.make_predictions_parallel(test_1_X_scaled) ;
yhat_test = np.sign( yhat_test[0]);
In [106]:
(yhat_test == test_1_y).sum()/float(len(test_1_y))
Out[106]:
In [107]:
SVM_1_C2 = SVM_1
In [108]:
SVM_1 = SVM_parallel(train_1_X_scaled,train_1_y,len(train_1_y),2.0,0.20,0.001) # sigma=0.2
In [109]:
SVM_1.build_W();
SVM_1.build_update();
In [110]:
%time SVM_1.train_model_full(20)
SVM_1.build_b()
Out[110]:
In [111]:
yhat_test = SVM_1.make_predictions_parallel(test_1_X_scaled) ;
yhat_test = np.sign( yhat_test[0]);
In [112]:
(yhat_test == test_1_y).sum()/float(len(test_1_y)) # sigma = 0.2
Out[112]:
In [113]:
SVM_1 = SVM_parallel(train_1_X_scaled,train_1_y,len(train_1_y),2.0,0.30,0.001)
In [114]:
SVM_1.build_W();
SVM_1.build_update();
In [115]:
%time SVM_1.train_model_full(15)
SVM_1.build_b()
Out[115]:
In [116]:
yhat_test = SVM_1.make_predictions_parallel(test_1_X_scaled) ;
yhat_test = np.sign( yhat_test[0]);
In [117]:
(yhat_test == test_1_y).sum()/float(len(test_1_y))
Out[117]:
cf. A Practical Guide to Support Vector Classication, Chih-Wei Hsu, Chih-Chung Chang, and Chih-Jen Lin
http://www.csie.ntu.edu.tw/~cjlin/papers/guide/data/
Get and data clean/data wrangle/preprocess the test data, test_3 for vehicle data set
In [13]:
with open("./Data/test.3",'rb') as f:
test_3_lst = f.readlines()
f.close()
# strip of '\n'
test_3_lst = [x.strip() for x in test_3_lst]
print(len(test_3_lst))
test_3_lst=[line.replace('1:','').replace('2:','').replace('3:','').replace('4:','').replace('5:','').replace('6:','').replace('7:','').replace('8:','').replace('9:','').replace('10:','').replace('11:','').replace('12:','').replace('13:','').replace('14:','').replace('15:','').replace('16:','').replace('17:','').replace('18:','').replace('19:','').replace('20:','').replace('21:','').replace('22:','') for line in test_3_lst]
test_3_lst=[line.split() for line in test_3_lst]
test_3_DF=pd.DataFrame( test_3_lst)
In [14]:
test_3_y = test_3_DF[0].as_matrix().astype(theano.config.floatX)
test_3_X = test_3_DF.ix[:,1:].as_matrix().astype(theano.config.floatX)
print(test_3_X.shape)
print(test_3_y.shape)
Scale the train.3 Vehicle data
In [15]:
scaler = StandardScaler()
train_3_X_scaled = scaler.fit_transform(train_3_X)
In [16]:
train_3_X
Out[16]:
Clean the data where I choose to fill in missing values, NaN values, with the mean, due to the distribution of the data
In [17]:
train_3_X_pd = pd.DataFrame(train_3_X)
train_3_X_pd_cleaned = train_3_X_pd.where( pd.notnull( train_3_X_pd ), train_3_X_pd.mean(), axis='columns')
In [18]:
train_3_X_pd.describe()
Out[18]:
In [19]:
train_3_X_pd_cleaned.describe()
Out[19]:
In [20]:
train_3_X_scaled = scaler.fit_transform( train_3_X_pd_cleaned.as_matrix() )
In [21]:
train_3_y
Out[21]:
In [22]:
SVM_3 = SVM_parallel(train_3_X_scaled,train_3_y,len(train_3_y),128.0,2.0,0.001) # sigma=2.0
In [23]:
SVM_3.build_W();
SVM_3.build_update();
In [24]:
%time SVM_3.train_model_full(20)
SVM_3.build_b()
Out[24]:
In [30]:
print(test_3_y.shape)
test_3_y
Out[30]:
In [26]:
print(test_3_X.shape)
test_3_X_scaled = scaler.transform( test_3_X)
In [32]:
pd.DataFrame( train_3_X_scaled).describe()
Out[32]:
In [33]:
pd.DataFrame( test_3_X_scaled).describe()
Out[33]:
In [27]:
%time yhat_test3 = SVM_3.make_predictions_parallel( test_3_X_scaled)
In [28]:
yhat_test3 = np.sign( yhat_test3[0]);
In [29]:
(yhat_test3 == test_3_y).sum()/float(len(test_3_y))
Out[29]:
In [38]:
SVM_3._yhat.get_value()
Out[38]:
In [41]:
yhat_test3[0]
Out[41]:
In [42]:
np.sign( yhat_test3[0])
Out[42]:
In [45]:
test_3_y
Out[45]:
In [30]:
yhat_test3
Out[30]:
In [31]:
np.place( yhat_test3, yhat_test3 < 0., 0.)
In [34]:
yhat_test3
Out[34]:
In [50]:
yPratt_test_results = SVM_3.make_prob_Pratt(yhat_test3)
In [52]:
alpha = np.float32(0.01)
yhat = SVM_3._yhat
y_sh = theano.shared( yhat_test3.astype(theano.config.floatX ) )
A = theano.shared( np.float32( np.random.rand() ) )
B = theano.shared( np.float32( np.random.rand() ) )
Prob_1_given_yhat = np.float32(1.)/(np.float32(1.)+ T.exp(A*yhat +B))
costfunctional = T.nnet.binary_crossentropy( Prob_1_given_yhat, y_sh).mean()
DA, DB = T.grad(costfunctional, [A,B])
train = theano.function(inputs=[],outputs=[Prob_1_given_yhat, costfunctional],
updates=[(A,A-alpha*DA),(B,B-alpha*DB)],name="train")
probabilities = theano.function(inputs=[], outputs=Prob_1_given_yhat,name="probabilities")
In [54]:
training_steps=10000
for i in range(training_steps):
pred,err = train()
probabilities_vals = probabilities()
In [57]:
print(len(yhat_test3))
print(len(probabilities_vals))
In [62]:
probabilities_vals
Out[62]:
In [61]:
(probabilities_vals > 0.5).astype(theano.config.floatX)
Out[61]:
In [33]:
np.place( yhat_test3, yhat_test3 < 0., 0.)
In [35]:
%time yPratt_test_results = SVM_3.make_prob_Pratt(yhat_test3)
In [36]:
yPratt_test_results[0]
Out[36]:
In [37]:
(yPratt_test_results[0] > 0.7).astype(theano.config.floatX)
Out[37]:
In [ ]: