notebook.community

Edit and run



In [1]:

    
%matplotlib inline



In [2]:

    
import theano









    



WARNING (theano.sandbox.cuda): The cuda backend is deprecated and will be removed in the next release (v0.10).  Please switch to the gpuarray backend. You can get more information about how to switch at this URL:
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GTX 980 Ti (CNMeM is enabled with initial size: 80.0% of memory, cuDNN 5105)



In [3]:

    
from theano import function, config, sandbox, shared 
import theano.tensor as T



In [4]:

    
print( theano.config.device )
print( theano.config.lib.cnmem)  # cf. http://deeplearning.net/software/theano/library/config.html
print( theano.config.print_active_device)# Print active device at when the GPU device is initialized.









    



gpu
0.8
True



In [5]:

    
print(theano.config.allow_gc)
print(theano.config.optimizer_excluding)









    



False



In [6]:

    
import sys
sys.path.append( '../ML' )



In [7]:

    
from DNN import DNN, Feedforward



In [8]:

    
import numpy as np
import pandas as pd



In [9]:

    
import os
print(os.getcwd())
os.listdir( os.getcwd() )









    



/home/topolo/PropD/MLgrabbag/kaggle






    Out[9]:





['2017datascibowl',
 'HOG_SVM32.ipynb',
 'HOG_process_322.ipynb',
 'data_password.txt',
 'HOG_DNN.ipynb',
 'LSTM_model201702271930.save',
 'cleaning_dueSigmaFin.pyc',
 'LSTM_model201702280608.save',
 'DatSciBow2017_DNN.ipynb',
 '.ipynb_checkpoints',
 'dueSigmaFinancial_kaggle.py',
 'HOG_process.ipynb',
 'LSTM_model.save',
 'LSTM_model201703012346.save',
 'DatSciBow2017_FullPreprocessTutorial.ipynb',
 'LSTM_model201702282350.save',
 'HOG_process_32.ipynb',
 'GRU_model201703022010.save',
 'DueSigmaFin_runs.ipynb',
 'ImagePreprocessing.ipynb',
 'dueSigmaFinancial_local.ipynb',
 'GRU_model201703012348.save',
 'GRU_model201703050709.save',
 'GRU_model201703021741.save',
 'kaggle.ipynb',
 'glass.csv',
 'DatSciBow2017_SVM.ipynb',
 '__init__.py',
 'train.h5',
 'HOG_process2.ipynb',
 'dueSigmaFinancial_local_GRUs.ipynb',
 'HOG_DNN_32.ipynb',
 'cleaning_dueSigmaFin.py']



In [10]:

    
def load_feat_vec(patientid,sub_name="stage1_feat"):
    f=file("./2017datascibowl/"+sub_name+"/"+patientid+"feat_vec","rb")
    arr = np.load(f)
    f.close()
    return arr



In [11]:

    
def prepare_inputX(sub_name="stage1_HOG32", ratio_of_train_to_total = 0.4,
                                                    ratio_valid_to_rest = 0.2):
    patients_stage1_feat = os.listdir('./2017datascibowl/'+sub_name)

    patients_stage1_feat = [id.replace("feat_vec","") for id in patients_stage1_feat]  # remove the suffix "feat_vec"
    
    # get y labels
    y_ids = pd.read_csv('./2017datascibowl/stage1_labels.csv')
    
    y_ids_found=y_ids.loc[y_ids['id'].isin(patients_stage1_feat)]
    
    m = len(patients_stage1_feat)
    found_indices =[]
    for i in range(m):
        if patients_stage1_feat[i] in y_ids_found['id'].as_matrix():
            found_indices.append(i)

    patients_stage1_feat_found = [patients_stage1_feat[i] for i in found_indices]
    
    y_found=[]
    for i in range(len(patients_stage1_feat_found)):
        if (patients_stage1_feat_found[i] in y_ids_found['id'].as_matrix()):
            cancer_val = y_ids_found.loc[y_ids_found['id']==patients_stage1_feat_found[i]]['cancer'].as_matrix()
            y_found.append( cancer_val )
    y_found=np.array(y_found).flatten()
    
    assert (len(y_found)==len(patients_stage1_feat_found))


    numberofexamples = len(patients_stage1_feat_found)
    numberoftrainingexamples = int(numberofexamples*ratio_of_train_to_total)
    numbertovalidate = int((numberofexamples - numberoftrainingexamples)*ratio_valid_to_rest)
    numbertotest= numberofexamples - numberoftrainingexamples - numbertovalidate
    
    shuffledindices = np.random.permutation( numberofexamples)
    
    patients_train = [patients_stage1_feat_found[id] for id in shuffledindices[:numberoftrainingexamples]]
    patients_valid = [patients_stage1_feat_found[id] for id in shuffledindices[numberoftrainingexamples:numberoftrainingexamples+numbertovalidate]]
    patients_test  = [patients_stage1_feat_found[id] for id in shuffledindices[numberoftrainingexamples+numbertovalidate:]]

    y_train = y_found[shuffledindices[:numberoftrainingexamples]]
    y_valid = y_found[shuffledindices[numberoftrainingexamples:numberoftrainingexamples+numbertovalidate]]
    y_test  = y_found[shuffledindices[numberoftrainingexamples+numbertovalidate:]]
    
    patients_train_vecs = [load_feat_vec(id,sub_name) for id in patients_train]
    patients_train_vecs = np.array(patients_train_vecs)
    
    patients_valid_vecs = [load_feat_vec(id,sub_name) for id in patients_valid]
    patients_valid_vecs = np.array(patients_valid_vecs)
    
    patients_test_vecs = [load_feat_vec(id,sub_name) for id in patients_test]
    patients_test_vecs = np.array(patients_test_vecs)

    patient_ids = {"train":patients_train,"valid":patients_valid,"test":patients_test}
    ys = {"train":y_train,"valid":y_valid,"test":y_test}
    Xs = {"train":patients_train_vecs,"valid":patients_valid_vecs,"test":patients_test_vecs}
    
    return patient_ids, ys, Xs



In [12]:

    
patient_ids32, ys32,Xs32=prepare_inputX("stage1_HOG32",0.375,0.2)



In [13]:

    
y_train_rep2 = np.copy(ys32["train"])  # 2nd representation
y_train_rep2[y_train_rep2<=0]=-1

y_valid_rep2 = np.copy(ys32["valid"])  # 2nd representation
y_valid_rep2[y_valid_rep2<=0]=-1

y_test_rep2 = np.copy(ys32["test"])  # 2nd representation
y_test_rep2[y_test_rep2<=0]=-1



In [14]:

    
d = Xs32["train"][0].shape[0]
print(d)



In [15]:

    
Ff32 = Feedforward(2,[d,d/128,1],activation_fxn=T.nnet.sigmoid,psi_Lm1=T.nnet.sigmoid)



In [16]:

    
d/128









    Out[16]:





2560



In [17]:

    
X32=theano.shared( Xs32["train"].astype(theano.config.floatX))
Ff32.connect_through(X32)









    Out[17]:





sigmoid.0



In [16]:

    
DNN32 = DNN(Ff32,ys32["train"] , X32.get_value())









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-424ba203651d> in <module>()
----> 1 DNN32 = DNN(Ff32,ys32["train"] , X32.get_value())

NameError: name 'X32' is not defined



In [20]:

    
DNN32.build_J_xent()









    Out[20]:





GpuFromHost.0



In [21]:

    
DNN32.build_update(alpha=0.0001)



In [23]:

    
%time DNN32.train_model_full(max_iters=10000) # max_iters=3, CPU times: user 320 ms, sys: 713 ms, total: 1.03 s









    



theano.config.allow_gc =:  False
CPU times: user 16min 43s, sys: 41min 23s, total: 58min 7s
Wall time: 58min 6s






    Out[23]:





array([ 0.60423833,  0.601605  ,  0.59934652, ...,  0.22131497,
        0.22130144,  0.22128801])



In [24]:

    
DNN32.DNN_model.connect_through(theano.shared(Xs32["valid"].astype(theano.config.floatX)))









    



---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-24-e8c108ead055> in <module>()
----> 1 DNN32.DNN_model.connect_through(theano.shared(Xs32["valid"].astype(theano.config.floatX)))

/home/topolo/PropD/Theano/theano/compile/sharedvalue.pyc in shared(value, name, strict, allow_downcast, **kwargs)
    266             try:
    267                 var = ctor(value, name=name, strict=strict,
--> 268                            allow_downcast=allow_downcast, **kwargs)
    269                 utils.add_tag_trace(var)
    270                 return var

/home/topolo/PropD/Theano/theano/sandbox/cuda/var.pyc in float32_shared_constructor(value, name, strict, allow_downcast, borrow, broadcastable, target)
    186         # type.broadcastable is guaranteed to be a tuple, which this next
    187         # function requires
--> 188         deviceval = type_support_filter(value, type.broadcastable, False, None)
    189 
    190     try:

MemoryError: ('Error allocating 264771904 bytes of device memory (CNMEM_STATUS_OUT_OF_MEMORY).', "you might consider using 'theano.shared(..., borrow=True)'")

Breakdown (step-by-step) of long-term persistence (saving work, saving files, I/O); I'll



In [29]:

    
import cPickle



In [26]:

    
params_val32 = [weight.get_value() for weight in DNN32.DNN_model.__get_state__()['params'] ]



In [35]:

    
print(len(params_val32))



In [31]:

    
f = open("./2017datascibowl/DNN32_L3_128.pkl",'wb')
for param in params_val32:
    cPickle.dump(param,f,protocol=cPickle.HIGHEST_PROTOCOL)  # Python problem cf.
    # https://github.com/numpy/numpy/issues/2396
f.close()









    



---------------------------------------------------------------------------
SystemError                               Traceback (most recent call last)
<ipython-input-31-486676351de4> in <module>()
      1 f = open("./2017datascibowl/DNN32_L3_128.pkl",'wb')
      2 for param in params_val32:
----> 3     cPickle.dump(param,f,protocol=cPickle.HIGHEST_PROTOCOL)
      4 f.close()

SystemError: error return without exception set



In [34]:

    
# above is a problem with Python, for large arrays
for param_idx in range(len(params_val32)):
    f = open("./2017datascibowl/DNN32_L3_128"+str(param_idx)+".pkl",'wb')
    np.save(f,params_val32[param_idx])
    f.close()

load back saved parameters



In [15]:

    
params_reloaded = []
no_params=4
filename_reload = "./2017datascibowl/DNN32_L3_128"
for param_idx in range(no_params):
    name_reload = filename_reload+str(param_idx)+".pkl"
    param_val = np.load(name_reload)
    params_reloaded.append(param_val)



In [16]:

    
Ff32_reloaded = Feedforward(2,[d,d/128,1],activation_fxn=T.nnet.sigmoid,psi_Lm1=T.nnet.sigmoid)



In [17]:

    
Ff32_reloaded.__set_state__(*params_reloaded)



In [19]:

    
Ff32_reloaded.__get_state__()['params'][0].get_value()









    Out[19]:





array([[-0.01051898,  0.00416382, -0.00212343, ..., -0.01638052,
        -0.00529799, -0.01309736],
       [ 0.00960883,  0.0087943 , -0.00077023, ..., -0.00224601,
         0.00270979, -0.00083148],
       [ 0.01697689, -0.00064749,  0.00363629, ..., -0.01195615,
         0.00590108, -0.00295875],
       ..., 
       [ 0.00355044, -0.00947237, -0.00872016, ..., -0.0155024 ,
         0.00426584,  0.00798085],
       [ 0.00575262,  0.01189689, -0.01234404, ..., -0.00221689,
         0.00172807, -0.0081163 ],
       [-0.00503765, -0.00537038, -0.00895535, ..., -0.01033893,
         0.00831992, -0.00455313]], dtype=float32)



In [20]:

    
params_reloaded[0]









    Out[20]:





array([[-0.01051898,  0.00416382, -0.00212343, ..., -0.01638052,
        -0.00529799, -0.01309736],
       [ 0.00960883,  0.0087943 , -0.00077023, ..., -0.00224601,
         0.00270979, -0.00083148],
       [ 0.01697689, -0.00064749,  0.00363629, ..., -0.01195615,
         0.00590108, -0.00295875],
       ..., 
       [ 0.00355044, -0.00947237, -0.00872016, ..., -0.0155024 ,
         0.00426584,  0.00798085],
       [ 0.00575262,  0.01189689, -0.01234404, ..., -0.00221689,
         0.00172807, -0.0081163 ],
       [-0.00503765, -0.00537038, -0.00895535, ..., -0.01033893,
         0.00831992, -0.00455313]], dtype=float32)

Submissions out



In [27]:

    
stage2_sample_submission_csv = pd.read_csv("./2017datascibowl/stage2_sample_submission.csv")



In [28]:

    
sub_name="stage2_HOG32"
patients_sample2_vecs = np.array( [load_feat_vec(id,sub_name) for id in stage2_sample_submission_csv['id'].as_matrix()] )



In [17]:

    
print(patients_sample2_vecs.shape)

d=patients_sample2_vecs.shape[1]
print(d)









    



(506, 327688)
327688



In [24]:

    
Ff32_reloaded.connect_through(theano.shared( patients_sample2_vecs.astype(theano.config.floatX)))









    Out[24]:





sigmoid.0



In [25]:

    
yhat_sample2 = theano.function(inputs=[],outputs=Ff32_reloaded._get_outer_layer_() )()



In [28]:

    
yhat_sample2.shape









    Out[28]:





(506, 1)



In [29]:

    
sample2_out = pd.DataFrame(zip(stage2_sample_submission_csv['id'].as_matrix(),yhat_sample2.flatten()))
sample2_out.columns=["id","cancer"]



In [30]:

    
sample2_out.to_csv("./2017datascibowl/sample2submit01.csv",index=False)

Other models; $L=3$



In [16]:

    
d/1024









    Out[16]:





320



In [15]:

    
Ff32 = Feedforward(3,[d,d/256,d/1024,1],activation_fxn=T.nnet.sigmoid,psi_Lm1=T.nnet.sigmoid)



In [16]:

    
X32=theano.shared( Xs32["train"].astype(theano.config.floatX))
Ff32.connect_through(X32)









    Out[16]:





sigmoid.0



In [17]:

    
DNN32 = DNN(Ff32,ys32["train"] , X32.get_value())



In [18]:

    
DNN32.build_J_xent()









    Out[18]:





GpuFromHost.0



In [19]:

    
DNN32.build_update(alpha=0.0001)



In [20]:

    
%time DNN32.train_model_full(max_iters=3) # max_iters=3, CPU times: user 320 ms, sys: 713 ms, total: 1.03 s









    



theano.config.allow_gc =:  False
CPU times: user 309 ms, sys: 559 ms, total: 868 ms
Wall time: 867 ms






    Out[20]:





array([ 3.58742404,  2.56585026,  1.65380883])



In [21]:

    
%time DNN32.train_model_full(max_iters=15000) # max_iters=3, CPU times: user 309 ms, sys: 559 ms, total: 868 ms









    



theano.config.allow_gc =:  False
CPU times: user 21min 39s, sys: 52min 31s, total: 1h 14min 10s
Wall time: 1h 14min 9s






    Out[21]:





array([ 1.02403891,  0.73851693,  0.63771349, ...,  0.2418585 ,
        0.24184632,  0.24183418])



In [24]:

    
DNN32.save_parameters("./2017datascibowl/DNNHOG32_L3_256_")



In [25]:

    
DNN32.DNN_model.connect_through(theano.shared( patients_sample2_vecs.astype(theano.config.floatX)))









    Out[25]:





sigmoid.0



In [26]:

    
yhat_sample2 = theano.function(inputs=[],outputs=DNN32.DNN_model._get_outer_layer_() )()



In [27]:

    
sample2_out = pd.DataFrame(zip(stage2_sample_submission_csv['id'].as_matrix(),yhat_sample2.flatten()))
sample2_out.columns=["id","cancer"]



In [28]:

    
sample2_out.to_csv("./2017datascibowl/sample2submit02.csv",index=False)

L=3, train to rest ratio$= 0.575,0.595$



In [12]:

    
patient_ids, ys,Xs=prepare_inputX("stage1_HOG32",0.575,0.2)



In [14]:

    
d = Xs32["train"][0].shape[0]
print(d)



In [15]:

    
Ff = Feedforward(3,[d,d/256,d/256,1],activation_fxn=T.nnet.sigmoid,psi_Lm1=T.nnet.sigmoid)



In [16]:

    
X=theano.shared( Xs32["train"].astype(theano.config.floatX))
Ff.connect_through(X)









    Out[16]:





sigmoid.0



In [19]:

    
DNN_HOG = DNN(Ff,ys32["train"] )



In [20]:

    
DNN_HOG.build_J_xent()









    Out[20]:





GpuFromHost.0



In [21]:

    
DNN_HOG.build_update(alpha=0.0001)



In [22]:

    
%time DNN_HOG.train_model_full(max_iters=2) # max_iters=2 CPU times: user 374 ms, sys: 679 ms, total: 1.05 s









    



theano.config.allow_gc =:  False
CPU times: user 339 ms, sys: 551 ms, total: 889 ms
Wall time: 887 ms






    Out[22]:





array([ 2.02084756,  1.39609814])



In [23]:

    
%time DNN_HOG.train_model_full(max_iters=10000)









    



theano.config.allow_gc =:  False
CPU times: user 21min 54s, sys: 50min 32s, total: 1h 12min 27s
Wall time: 1h 12min 26s






    Out[23]:





array([ 0.96289539,  0.74127501,  0.6490553 , ...,  0.4474774 ,
        0.44746727,  0.44745716])



In [25]:

    
DNN_HOG.save_parameters("./2017datascibowl/DNNHOG32_L3_256_")



In [27]:

    
# predictions on validation set
DNN_HOG.DNN_model.connect_through(theano.shared(Xs32["valid"].astype(theano.config.floatX)))

yhat_valid = theano.function(inputs=[],outputs=DNN_HOG.DNN_model._get_outer_layer_() )()



In [36]:

    
( (yhat_valid>0.50).astype(theano.config.floatX)==ys32["valid"]).mean() # threshold 0.8 0.72493536340132148









    Out[36]:





0.721057167480609



In [37]:

    
%time DNN_HOG.train_model_full(max_iters=5000)









    



theano.config.allow_gc =:  False
CPU times: user 10min 49s, sys: 25min 29s, total: 36min 19s
Wall time: 36min 18s






    Out[37]:





array([ 0.44744706,  0.4474369 ,  0.44742677, ...,  0.39755404,
        0.39754432,  0.39753458])



In [38]:

    
DNN_HOG.save_parameters("./2017datascibowl/DNNHOG32_L3_256b_")



In [39]:

    
# predictions on validation set
DNN_HOG.DNN_model.connect_through(theano.shared(Xs32["valid"].astype(theano.config.floatX)))

yhat_valid = theano.function(inputs=[],outputs=DNN_HOG.DNN_model._get_outer_layer_() )()



In [42]:

    
( (yhat_valid>0.50).astype(theano.config.floatX)==ys32["valid"]).mean() # threshold 0.8 0.72493536340132148









    Out[42]:





0.7055443837977593



In [12]:

    
patient_ids, ys,Xs=prepare_inputX("stage1_HOG32",0.595,0.2)



In [14]:

    
d = Xs["train"][0].shape[0]
print(d)



In [17]:

    
# reload the parameters after clearing the GPU RAM
params_reloaded = []
no_params=6
filename_reload = "./2017datascibowl/DNNHOG32_L3_256b_"
for param_idx in range(no_params):
    name_reload = filename_reload+str(param_idx)
    param_val = np.load(name_reload)
    params_reloaded.append(param_val)



In [19]:

    
Ff_reloaded = Feedforward(3,[d,d/256,d/256,1],activation_fxn=T.nnet.sigmoid,psi_Lm1=T.nnet.sigmoid)
Ff_reloaded.__set_state__(*params_reloaded)



In [22]:

    
X=theano.shared( Xs["train"].astype(theano.config.floatX))
Ff_reloaded.connect_through(X)

DNN_HOG_reloaded = DNN(Ff_reloaded,ys["train"] )

DNN_HOG_reloaded.build_J_xent()

DNN_HOG_reloaded.build_update(alpha=0.0001)



In [23]:

    
%time DNN_HOG_reloaded.train_model_full(max_iters=2) # CPU times: user 342 ms, sys: 580 ms, total: 922 ms









    



theano.config.allow_gc =:  False
CPU times: user 342 ms, sys: 580 ms, total: 922 ms
Wall time: 919 ms






    Out[23]:





array([ 0.45656514,  0.45633772])



In [24]:

    
%time DNN_HOG_reloaded.train_model_full(max_iters=15000)









    



theano.config.allow_gc =:  False
CPU times: user 34min 38s, sys: 1h 24min 19s, total: 1h 58min 57s
Wall time: 1h 58min 56s






    Out[24]:





array([ 0.45626116,  0.45620534,  0.45615417, ...,  0.30237636,
        0.30236885,  0.30236131])



In [25]:

    
DNN_HOG_reloaded.save_parameters("./2017datascibowl/DNNHOG32_L3_256c_")



In [26]:

    
# predictions on validation set
DNN_HOG_reloaded.DNN_model.connect_through(theano.shared(Xs["valid"].astype(theano.config.floatX)))

yhat_valid = theano.function(inputs=[],outputs=DNN_HOG_reloaded.DNN_model._get_outer_layer_() )()



In [29]:

    
( (yhat_valid>0.50).astype(theano.config.floatX)==ys["valid"]).mean() # threshold 0.8 0.76991150442477874









    Out[29]:





0.74124833581329785



In [33]:

    
DNN_HOG_reloaded.DNN_model.connect_through(theano.shared( patients_sample2_vecs.astype(theano.config.floatX)))









    



---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-33-f09afd21e572> in <module>()
----> 1 DNN_HOG_reloaded.DNN_model.connect_through(theano.shared( patients_sample2_vecs.astype(theano.config.floatX)))

/home/topolo/PropD/Theano/theano/compile/sharedvalue.pyc in shared(value, name, strict, allow_downcast, **kwargs)
    266             try:
    267                 var = ctor(value, name=name, strict=strict,
--> 268                            allow_downcast=allow_downcast, **kwargs)
    269                 utils.add_tag_trace(var)
    270                 return var

/home/topolo/PropD/Theano/theano/sandbox/cuda/var.pyc in float32_shared_constructor(value, name, strict, allow_downcast, borrow, broadcastable, target)
    186         # type.broadcastable is guaranteed to be a tuple, which this next
    187         # function requires
--> 188         deviceval = type_support_filter(value, type.broadcastable, False, None)
    189 
    190     try:

MemoryError: ('Error allocating 663240512 bytes of device memory (CNMEM_STATUS_OUT_OF_MEMORY).', "you might consider using 'theano.shared(..., borrow=True)'")



In [15]:

    
# reload the parameters after clearing the GPU RAM
params_reloaded = []
no_params=6
filename_reload = "./2017datascibowl/DNNHOG32_L3_256c_"
for param_idx in range(no_params):
    name_reload = filename_reload+str(param_idx)
    param_val = np.load(name_reload)
    params_reloaded.append(param_val)



In [18]:

    
Ff_reloaded = Feedforward(3,[d,d/256,d/256,1],activation_fxn=T.nnet.sigmoid,psi_Lm1=T.nnet.sigmoid)
Ff_reloaded.__set_state__(*params_reloaded)



In [19]:

    
Ff_reloaded.connect_through(theano.shared( patients_sample2_vecs.astype(theano.config.floatX)))









    Out[19]:





sigmoid.0



In [22]:

    
yhat_sample2 = theano.function(inputs=[],outputs=Ff_reloaded._get_outer_layer_() )()

sample2_out = pd.DataFrame(zip(stage2_sample_submission_csv['id'].as_matrix(),yhat_sample2.flatten()))
sample2_out.columns=["id","cancer"]



In [23]:

    
sample2_out.to_csv("./2017datascibowl/sample2submit05_L3_256.csv",index=False)

L=3, train to rest ratio$= +0.595$



In [12]:

    
# reload the parameters after clearing the GPU RAM
params_reloaded = []
no_params=6
filename_reload = "./2017datascibowl/DNNHOG32_L3_256b_"
for param_idx in range(no_params):
    name_reload = filename_reload+str(param_idx)
    param_val = np.load(name_reload)
    params_reloaded.append(param_val)



In [13]:

    
patient_ids, ys,Xs=prepare_inputX("stage1_HOG32",0.635,0.2)  # 0.615

d = Xs["train"][0].shape[0]
print(d)



In [15]:

    
Ff_reloaded = Feedforward(3,[d,d/256,d/256,1],activation_fxn=T.nnet.sigmoid,psi_Lm1=T.nnet.sigmoid)
Ff_reloaded.__set_state__(*params_reloaded)



In [16]:

    
X=theano.shared( Xs["train"].astype(theano.config.floatX))
Ff_reloaded.connect_through(X)

DNN_HOG_reloaded = DNN(Ff_reloaded,ys["train"] )

DNN_HOG_reloaded.build_J_xent()

DNN_HOG_reloaded.build_update(alpha=0.0001)



In [17]:

    
%time DNN_HOG_reloaded.train_model_full(max_iters=2) #









    



theano.config.allow_gc =:  False
CPU times: user 0 ns, sys: 383 ms, total: 383 ms
Wall time: 1 s






    Out[17]:





array([ 0.41254067,  0.41235557])



In [18]:

    
%time DNN_HOG_reloaded.train_model_full(max_iters=10000)









    



theano.config.allow_gc =:  False
CPU times: user 21min 52s, sys: 55min 52s, total: 1h 17min 44s
Wall time: 1h 17min 43s






    Out[18]:





array([ 0.47579074,  0.47573674,  0.4756839 , ...,  0.36137226,
        0.36136359,  0.36135498])



In [19]:

    
DNN_HOG_reloaded.save_parameters("./2017datascibowl/DNNHOG32_L3_256d_")



In [20]:

    
# predictions on validation set
DNN_HOG_reloaded.DNN_model.connect_through(theano.shared(Xs["valid"].astype(theano.config.floatX)))

yhat_valid = theano.function(inputs=[],outputs=DNN_HOG_reloaded.DNN_model._get_outer_layer_() )()



In [23]:

    
( (yhat_valid>0.50).astype(theano.config.floatX)==ys["valid"]).mean() # threshold 0.8 0.7289719626168224









    Out[23]:





0.70757271377412878



In [24]:

    
# predictions on test set
DNN_HOG_reloaded.DNN_model.connect_through(theano.shared(Xs["test"].astype(theano.config.floatX)))

yhat_test = theano.function(inputs=[],outputs=DNN_HOG_reloaded.DNN_model._get_outer_layer_() )()



In [27]:

    
( (yhat_test>0.50).astype(theano.config.floatX)==ys["test"]).mean() # threshold 0.8 0.72764466168894437









    Out[27]:





0.7158607027309285



In [21]:

    
# reload the parameters after clearing the GPU RAM
params_reloaded = []
no_params=6
filename_reload = "./2017datascibowl/DNNHOG32_L3_256d_"
for param_idx in range(no_params):
    name_reload = filename_reload+str(param_idx)
    param_val = np.load(name_reload)
    params_reloaded.append(param_val)



In [24]:

    
%time DNN_HOG_reloaded.train_model_full(max_iters=5000)









    



theano.config.allow_gc =:  False
CPU times: user 12min 43s, sys: 30min 11s, total: 42min 55s
Wall time: 42min 55s






    Out[24]:





array([ 0.41232234,  0.41229433,  0.41226661, ...,  0.35503957,
        0.35503042,  0.35502127])



In [25]:

    
DNN_HOG_reloaded.save_parameters("./2017datascibowl/DNNHOG32_L3_256e_")



In [26]:

    
# predictions on validation set
DNN_HOG_reloaded.DNN_model.connect_through(theano.shared(Xs["valid"].astype(theano.config.floatX)))

yhat_valid = theano.function(inputs=[],outputs=DNN_HOG_reloaded.DNN_model._get_outer_layer_() )()



In [28]:

    
( (yhat_valid>0.50).astype(theano.config.floatX)==ys["valid"]).mean() # threshold 0.8 0.74990388312187617









    Out[28]:





0.74990388312187617



In [33]:

    
print(X.get_value().shape)
print(Xs["train"].shape)
print(X.get_value()[:3]);
print(Xs["train"][:3]);









    



(887, 327688)
(887, 327688)
[[ 0.  0.  0. ...,  0.  1.  0.]
 [ 0.  0.  0. ...,  0.  1.  0.]
 [ 0.  0.  0. ...,  0.  1.  0.]]
[[ 0.  0.  0. ...,  0.  1.  0.]
 [ 0.  0.  0. ...,  0.  1.  0.]
 [ 0.  0.  0. ...,  0.  1.  0.]]



In [34]:

    
DNN_HOG_reloaded.DNN_model.connect_through(theano.shared(Xs["train"].astype(theano.config.floatX)))









    Out[34]:





sigmoid.0



In [35]:

    
DNN_HOG_reloaded.build_J_xent()

DNN_HOG_reloaded.build_update(alpha=0.0001)



In [36]:

    
%time DNN_HOG_reloaded.train_model_full(max_iters=2000)









    



theano.config.allow_gc =:  False
CPU times: user 4min 47s, sys: 12min 20s, total: 17min 8s
Wall time: 17min 8s






    Out[36]:





array([ 0.35501212,  0.35500294,  0.35499376, ...,  0.33740544,
        0.33739692,  0.33738837])



In [37]:

    
DNN_HOG_reloaded.save_parameters("./2017datascibowl/DNNHOG32_L3_256f_")



In [38]:

    
# predictions on validation set
DNN_HOG_reloaded.DNN_model.connect_through(theano.shared(Xs["valid"].astype(theano.config.floatX)))

yhat_valid = theano.function(inputs=[],outputs=DNN_HOG_reloaded.DNN_model._get_outer_layer_() )()



In [41]:

    
( (yhat_valid>0.50).astype(theano.config.floatX)==ys["valid"]).mean() # threshold 0.8 0.74990388312187617









    Out[41]:





0.72491349480968859



In [43]:

    
DNN_HOG_reloaded.DNN_model.connect_through(theano.shared(Xs["train"].astype(theano.config.floatX)))

DNN_HOG_reloaded.build_J_xent()

DNN_HOG_reloaded.build_update(alpha=0.0001)









    



---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-43-d0e875d63ef4> in <module>()
----> 1 DNN_HOG_reloaded.DNN_model.connect_through(theano.shared(Xs["train"].astype(theano.config.floatX)))
      2 
      3 DNN_HOG_reloaded.build_J_xent()
      4 
      5 DNN_HOG_reloaded.build_update(alpha=0.0001)

/home/topolo/PropD/Theano/theano/compile/sharedvalue.pyc in shared(value, name, strict, allow_downcast, **kwargs)
    266             try:
    267                 var = ctor(value, name=name, strict=strict,
--> 268                            allow_downcast=allow_downcast, **kwargs)
    269                 utils.add_tag_trace(var)
    270                 return var

/home/topolo/PropD/Theano/theano/sandbox/cuda/var.pyc in float32_shared_constructor(value, name, strict, allow_downcast, borrow, broadcastable, target)
    186         # type.broadcastable is guaranteed to be a tuple, which this next
    187         # function requires
--> 188         deviceval = type_support_filter(value, type.broadcastable, False, None)
    189 
    190     try:

MemoryError: ('Error allocating 1162637024 bytes of device memory (CNMEM_STATUS_OUT_OF_MEMORY).', "you might consider using 'theano.shared(..., borrow=True)'")



In [12]:

    
# reload the parameters after clearing the GPU RAM
params_reloaded = []
no_params=6
filename_reload = "./2017datascibowl/DNNHOG32_L3_256f_"
for param_idx in range(no_params):
    name_reload = filename_reload+str(param_idx)
    param_val = np.load(name_reload)
    params_reloaded.append(param_val)



In [13]:

    
patient_ids, ys,Xs=prepare_inputX("stage1_HOG32",0.655,0.2)  # 0.615, 0.635

d = Xs["train"][0].shape[0]
print(d)



In [14]:

    
Ff_reloaded = Feedforward(3,[d,d/256,d/256,1],activation_fxn=T.nnet.sigmoid,psi_Lm1=T.nnet.sigmoid)
Ff_reloaded.__set_state__(*params_reloaded)



In [15]:

    
X=theano.shared( Xs["train"].astype(theano.config.floatX))
Ff_reloaded.connect_through(X)

DNN_HOG_reloaded = DNN(Ff_reloaded,ys["train"] )

DNN_HOG_reloaded.build_J_xent()

DNN_HOG_reloaded.build_update(alpha=0.0001)



In [16]:

    
%time DNN_HOG_reloaded.train_model_full(max_iters=1500)









    



theano.config.allow_gc =:  False
CPU times: user 3min 36s, sys: 8min 53s, total: 12min 29s
Wall time: 12min 29s






    Out[16]:





array([ 0.38097757,  0.3805989 ,  0.38056234, ...,  0.35795695,
        0.35794604,  0.35793516])



In [17]:

    
DNN_HOG_reloaded.save_parameters("./2017datascibowl/DNNHOG32_L3_256g_")



In [18]:

    
# predictions on validation set
DNN_HOG_reloaded.DNN_model.connect_through(theano.shared(Xs["valid"].astype(theano.config.floatX)))

yhat_valid = theano.function(inputs=[],outputs=DNN_HOG_reloaded.DNN_model._get_outer_layer_() )()



In [21]:

    
( (yhat_valid>0.50).astype(theano.config.floatX)==ys["valid"]).mean() # threshold 0.8 0.72916666666666663









    Out[21]:





0.66232638888888884



In [22]:

    
# predictions on test set
DNN_HOG_reloaded.DNN_model.connect_through(theano.shared(Xs["test"].astype(theano.config.floatX)))

yhat_test = theano.function(inputs=[],outputs=DNN_HOG_reloaded.DNN_model._get_outer_layer_() )()



In [25]:

    
( (yhat_test>0.50).astype(theano.config.floatX)==ys["test"]).mean() # threshold 0.8 0.74741603801444334









    Out[25]:





0.69458240489677581



In [29]:

    
DNN_HOG_reloaded.DNN_model.connect_through(theano.shared( patients_sample2_vecs.astype(theano.config.floatX)))









    Out[29]:





sigmoid.0



In [30]:

    
yhat_sample2 = theano.function(inputs=[],outputs=DNN_HOG_reloaded.DNN_model._get_outer_layer_() )()

sample2_out = pd.DataFrame(zip(stage2_sample_submission_csv['id'].as_matrix(),yhat_sample2.flatten()))
sample2_out.columns=["id","cancer"]



In [31]:

    
sample2_out.to_csv("./2017datascibowl/sample2submit06_L3_256_066.csv",index=False)



In [42]:

    
print(DNN_HOG_reloaded.y.get_value()[:10])
print(ys["train"][:10])









    



[ 0.  0.  0.  1.  1.  0.  1.  0.  0.  0.]
[0 0 0 1 1 0 1 0 0 0]



In [22]:

    
print(params_reloaded[0]); print(params_reloaded[2])









    



[[ -1.05394311e-02   4.18995507e-03  -2.12602178e-03 ...,   1.23951416e-02
    4.43352899e-03   1.26545550e-02]
 [ -1.40867829e-02   1.24492031e-02  -1.56735536e-02 ...,  -1.64048374e-02
   -5.29065821e-03  -1.31486915e-02]
 [  9.62750334e-03   8.75628181e-03  -7.75774941e-04 ...,   6.71705711e-05
   -6.65440504e-03   1.26282685e-02]
 ..., 
 [  9.62470565e-03   1.42008010e-02  -1.69687513e-02 ...,   9.14281467e-04
   -1.16204340e-02   1.68460626e-02]
 [ -3.80297215e-03   5.01478324e-04  -7.52616161e-03 ...,  -5.15476149e-03
   -2.23281421e-03  -2.89960811e-03]
 [  1.34664727e-02  -2.64157518e-03  -1.10893007e-02 ...,  -1.51445381e-02
    1.50058875e-02   1.36591052e-03]]
[[ 0.13591398  0.05975517  0.00216788 ...,  0.11721792  0.17021373
   0.12387137]
 [ 0.18551475 -0.07370686  0.10704312 ...,  0.12297633  0.03753624
   0.06093714]
 [-0.07069941 -0.17607622 -0.05969213 ..., -0.08114195  0.08655923
  -0.09912383]
 ..., 
 [-0.03615148 -0.06664355  0.18831345 ...,  0.1776958   0.1222437
   0.14410536]
 [ 0.16230944  0.03704982  0.04771241 ...,  0.13605502  0.05308732
   0.10281823]
 [-0.16344711 -0.15765999  0.12361822 ...,  0.05752134  0.01395508
  -0.0770214 ]]



In [23]:

    
print( DNN_HOG_reloaded.DNN_model.__get_state__()['params'][0].get_value() )
print( DNN_HOG_reloaded.DNN_model.__get_state__()['params'][2].get_value() )









    



[[ -1.05394311e-02   4.18996345e-03  -2.12602108e-03 ...,   1.23951416e-02
    4.43352619e-03   1.26545578e-02]
 [ -1.40867829e-02   1.24492003e-02  -1.56735536e-02 ...,  -1.64048374e-02
   -5.29065728e-03  -1.31486924e-02]
 [  9.62750334e-03   8.75628088e-03  -7.75774941e-04 ...,   6.71705639e-05
   -6.65440410e-03   1.26282685e-02]
 ..., 
 [  9.62470565e-03   1.42008010e-02  -1.69687513e-02 ...,   9.14281467e-04
   -1.16204340e-02   1.68460626e-02]
 [ -3.80297098e-03   5.01571631e-04  -7.52615789e-03 ...,  -5.15476009e-03
   -2.23284052e-03  -2.89956946e-03]
 [  1.34664727e-02  -2.64157518e-03  -1.10893007e-02 ...,  -1.51445381e-02
    1.50058875e-02   1.36591052e-03]]
[[ 0.13591398  0.05975512  0.00216788 ...,  0.1172179   0.17021368
   0.12387137]
 [ 0.18551475 -0.07370693  0.10704312 ...,  0.12297631  0.03753618
   0.06093714]
 [-0.07069941 -0.17607625 -0.05969213 ..., -0.08114197  0.08655919
  -0.09912383]
 ..., 
 [-0.03615148 -0.06664355  0.18831345 ...,  0.1776958   0.1222437
   0.14410536]
 [ 0.16230944  0.03704983  0.04771241 ...,  0.13605502  0.05308732
   0.10281823]
 [-0.16344711 -0.15766005  0.12361822 ...,  0.05752132  0.01395503
  -0.0770214 ]]



In [ ]: