In [1]:
%matplotlib inline

In [2]:
import theano


WARNING (theano.sandbox.cuda): The cuda backend is deprecated and will be removed in the next release (v0.10).  Please switch to the gpuarray backend. You can get more information about how to switch at this URL:
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GTX 980 Ti (CNMeM is enabled with initial size: 80.0% of memory, cuDNN 5105)

In [3]:
from theano import function, config, sandbox, shared 
import theano.tensor as T

In [4]:
print( theano.config.device )
print( theano.config.lib.cnmem)  # cf. http://deeplearning.net/software/theano/library/config.html
print( theano.config.print_active_device)# Print active device at when the GPU device is initialized.


gpu
0.8
True

In [5]:
print(theano.config.allow_gc)
print(theano.config.optimizer_excluding)


False


In [6]:
import sys
sys.path.append( '../ML' )

In [7]:
from SVM import SVM, SVM_serial, SVM_parallel

In [8]:
import numpy as np
import pandas as pd

In [9]:
import os
os.getcwd()
os.listdir( os.getcwd() )


Out[9]:
['2017datascibowl',
 'HOG_SVM32.ipynb',
 'HOG_process_322.ipynb',
 'data_password.txt',
 'LSTM_model201702271930.save',
 'cleaning_dueSigmaFin.pyc',
 'LSTM_model201702280608.save',
 'DatSciBow2017_DNN.ipynb',
 '.ipynb_checkpoints',
 'dueSigmaFinancial_kaggle.py',
 'HOG_process.ipynb',
 'LSTM_model.save',
 'LSTM_model201703012346.save',
 'DatSciBow2017_FullPreprocessTutorial.ipynb',
 'LSTM_model201702282350.save',
 'HOG_process_32.ipynb',
 'GRU_model201703022010.save',
 'DueSigmaFin_runs.ipynb',
 'ImagePreprocessing.ipynb',
 'dueSigmaFinancial_local.ipynb',
 'GRU_model201703012348.save',
 'GRU_model201703050709.save',
 'GRU_model201703021741.save',
 'kaggle.ipynb',
 'glass.csv',
 'DatSciBow2017_SVM.ipynb',
 '__init__.py',
 'train.h5',
 'HOG_process2.ipynb',
 'dueSigmaFinancial_local_GRUs.ipynb',
 'cleaning_dueSigmaFin.py']

In [10]:
patients_stage1_HOG32 = os.listdir('./2017datascibowl/stage1_HOG32')
print(len(patients_stage1_HOG32))


1595

In [11]:
patients_stage1_HOG32 = [patientname.replace("feat_vec","") for patientname in patients_stage1_HOG32]

Training, (Cross-)Validation, Test Set randomization and processing


In [12]:
def load_feat_vec(patientid,sub_name="stage1_feat"):
    f=file("./2017datascibowl/"+sub_name+"/"+patientid+"feat_vec","rb")
    arr = np.load(f)
    f.close()
    return arr

In [13]:
def prepare_inputX(sub_name="stage1_feat_lowres64", ratio_of_train_to_total = 0.4,
                                                    ratio_valid_to_rest = 0.2):
    patients_stage1_feat = os.listdir('./2017datascibowl/'+sub_name)

    patients_stage1_feat = [id.replace("feat_vec","") for id in patients_stage1_feat]  # remove the suffix "feat_vec"
    
    # get y labels
    y_ids = pd.read_csv('./2017datascibowl/stage1_labels.csv')
    
    y_ids_found=y_ids.loc[y_ids['id'].isin(patients_stage1_feat)]
    
    m = len(patients_stage1_feat)
    found_indices =[]
    for i in range(m):
        if patients_stage1_feat[i] in y_ids_found['id'].as_matrix():
            found_indices.append(i)

    patients_stage1_feat_found = [patients_stage1_feat[i] for i in found_indices]
    
    y_found=[]
    for i in range(len(patients_stage1_feat_found)):
        if (patients_stage1_feat_found[i] in y_ids_found['id'].as_matrix()):
            cancer_val = y_ids_found.loc[y_ids_found['id']==patients_stage1_feat_found[i]]['cancer'].as_matrix()
            y_found.append( cancer_val )
    y_found=np.array(y_found).flatten()
    
    assert (len(y_found)==len(patients_stage1_feat_found))


    numberofexamples = len(patients_stage1_feat_found)
    numberoftrainingexamples = int(numberofexamples*ratio_of_train_to_total)
    numbertovalidate = int((numberofexamples - numberoftrainingexamples)*ratio_valid_to_rest)
    numbertotest= numberofexamples - numberoftrainingexamples - numbertovalidate
    
    shuffledindices = np.random.permutation( numberofexamples)
    
    patients_train = [patients_stage1_feat_found[id] for id in shuffledindices[:numberoftrainingexamples]]
    patients_valid = [patients_stage1_feat_found[id] for id in shuffledindices[numberoftrainingexamples:numberoftrainingexamples+numbertovalidate]]
    patients_test  = [patients_stage1_feat_found[id] for id in shuffledindices[numberoftrainingexamples+numbertovalidate:]]

    y_train = y_found[shuffledindices[:numberoftrainingexamples]]
    y_valid = y_found[shuffledindices[numberoftrainingexamples:numberoftrainingexamples+numbertovalidate]]
    y_test  = y_found[shuffledindices[numberoftrainingexamples+numbertovalidate:]]
    
    patients_train_vecs = [load_feat_vec(id,sub_name) for id in patients_train]
    patients_train_vecs = np.array(patients_train_vecs)
    
    patients_valid_vecs = [load_feat_vec(id,sub_name) for id in patients_valid]
    patients_valid_vecs = np.array(patients_valid_vecs)
    
    patients_test_vecs = [load_feat_vec(id,sub_name) for id in patients_test]
    patients_test_vecs = np.array(patients_test_vecs)

    patient_ids = {"train":patients_train,"valid":patients_valid,"test":patients_test}
    ys = {"train":y_train,"valid":y_valid,"test":y_test}
    Xs = {"train":patients_train_vecs,"valid":patients_valid_vecs,"test":patients_test_vecs}
    
    return patient_ids, ys, Xs

In [14]:
patient_ids32, ys32,Xs32=prepare_inputX("stage1_HOG32",0.275,0.2)

In [15]:
y_train_rep2 = np.copy(ys32["train"])  # 2nd representation
y_train_rep2[y_train_rep2<=0]=-1

y_valid_rep2 = np.copy(ys32["valid"])  # 2nd representation
y_valid_rep2[y_valid_rep2<=0]=-1

y_test_rep2 = np.copy(ys32["test"])  # 2nd representation
y_test_rep2[y_test_rep2<=0]=-1

In [16]:
C_trial=[0.1,1.0,10.,100.]
sigma_trial=[0.1,1.0,10.]

In [17]:
C_trial[3]


Out[17]:
100.0

In [18]:
SVM_stage1 = SVM_parallel(Xs32["train"],y_train_rep2,len(y_train_rep2),
                          C_trial[3],sigma_trial[1],0.005 )  # C=100.,sigma=1.0, alpha=0.001

SVM_stage1.build_W();
SVM_stage1.build_update();

In [35]:
%time SVM_stage1.train_model_full(3) # iterations=3,CPU times: user 3min 50s, sys: 7min 19s, total: 11min 9s


CPU times: user 3min 50s, sys: 7min 19s, total: 11min 9s
Wall time: 11min 9s
Out[35]:
array([-125.46400452, -105.90122986, -104.37313843])

In [36]:
%time SVM_stage1.train_model_full(100)


CPU times: user 2h 10min 7s, sys: 4h 2min, total: 6h 12min 8s
Wall time: 6h 11min 58s
Out[36]:
array([-104.77923584, -105.69400024, -106.59057617, -107.46916199,
       -108.33040619, -109.17446899, -110.00161743, -110.81251526,
       -111.60710144, -112.38591003, -113.14927673, -113.89740753,
       -114.63069153, -115.34938049, -116.05379486, -116.74415588,
       -117.42070007, -118.08388519, -118.73379517, -119.37085724,
       -119.99517059, -120.60704803, -121.20681763, -121.79460144,
       -122.3706665 , -122.93541718, -123.48885345, -124.03118896,
       -124.56280518, -125.0838089 , -125.59445953, -126.09502411,
       -126.58554077, -127.06625366, -127.53747559, -127.99925232,
       -128.45196533, -128.89555359, -129.3303833 , -129.75650024,
       -130.17419434, -130.58351135, -130.98468018, -131.37805176,
       -131.76333618, -132.14105225, -132.51130676, -132.87409973,
       -133.22972107, -133.57829285, -133.92001343, -134.25466919,
       -134.58291626, -134.90447998, -135.21975708, -135.52871704,
       -135.8314209 , -136.12820435, -136.41918945, -136.70426941,
       -136.98361206, -137.25753784, -137.52590942, -137.78881836,
       -138.04675293, -138.29940796, -138.54702759, -138.78985596,
       -139.02780151, -139.26086426, -139.48944092, -139.71348572,
       -139.93292236, -140.14805603, -140.35900879, -140.56561279,
       -140.76821899, -140.96679688, -141.16139221, -141.3520813 ,
       -141.53897095, -141.7220459 , -141.9016571 , -142.07775879,
       -142.25018311, -142.41915894, -142.58491516, -142.74729919,
       -142.90647888, -143.06234741, -143.21533203, -143.3651123 ,
       -143.51199341, -143.65596008, -143.79693604, -143.93530273,
       -144.07070923, -144.20352173, -144.33361816, -144.46125793])

In [37]:
SVM_stage1.build_b()


Out[37]:
(Elemwise{mul,no_inplace}.0, OrderedUpdates())

In [39]:
yhat32_valid = SVM_stage1.make_predictions_parallel( Xs32["valid"] )

In [40]:
accuracy_score_temp=(np.sign(yhat32_valid[0]) == y_valid_rep2).sum()/float(len(y_valid_rep2))
print(accuracy_score_temp)


0.732673267327

In [42]:
y_valid_rep2


Out[42]:
array([-1, -1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1, -1, -1,  1,
        1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1,  1,  1, -1, -1,
       -1, -1, -1,  1,  1, -1,  1, -1, -1, -1, -1,  1, -1,  1,  1, -1, -1,
       -1,  1,  1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1,  1, -1, -1, -1,
       -1, -1,  1, -1,  1,  1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,
       -1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1,  1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1,  1,  1,  1,  1, -1,  1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1,  1, -1, -1, -1,  1, -1, -1, -1,  1, -1, -1,
       -1, -1, -1, -1,  1, -1, -1,  1,  1,  1, -1, -1,  1, -1,  1, -1, -1,
       -1,  1, -1, -1, -1,  1,  1,  1, -1,  1,  1,  1,  1, -1, -1, -1, -1,
       -1,  1,  1, -1, -1, -1, -1, -1, -1, -1, -1,  1,  1, -1,  1])

Predictions

Predictions on valid set

To go out to competition, over sample only


In [32]:
stage1_sample_submission_csv = pd.read_csv("./2017datascibowl/stage1_sample_submission.csv")

In [33]:
sub_name="stage1_HOG32"
patients_sample_vecs = np.array( [load_feat_vec(id,sub_name) for id in stage1_sample_submission_csv['id'].as_matrix()] )

In [34]:
print(len(patients_sample_vecs))


198

In [51]:
%time yhat_sample = SVM_stage1.make_predictions_parallel( patients_sample_vecs[:2] )


---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-51-2e04cd2f12c4> in <module>()
----> 1 get_ipython().magic(u'time yhat_sample = SVM_stage1.make_predictions_parallel( patients_sample_vecs[:2] )')

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s)
   2161         magic_name, _, magic_arg_s = arg_s.partition(' ')
   2162         magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2163         return self.run_line_magic(magic_name, magic_arg_s)
   2164 
   2165     #-------------------------------------------------------------------------

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
   2082                 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
   2083             with self.builtin_trap:
-> 2084                 result = fn(*args,**kwargs)
   2085             return result
   2086 

<decorator-gen-60> in time(self, line, cell, local_ns)

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
    191     # but it's overkill for just that one bit of state.
    192     def magic_deco(arg):
--> 193         call = lambda f, *a, **k: f(*a, **k)
    194 
    195         if callable(arg):

/home/topolo/Public/anaconda2/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in time(self, line, cell, local_ns)
   1175         else:
   1176             st = clock2()
-> 1177             exec(code, glob, local_ns)
   1178             end = clock2()
   1179             out = None

<timed exec> in <module>()

/home/topolo/PropD/MLgrabbag/ML/SVM.pyc in make_predictions_parallel(self, X_pred_vals)
    708 
    709                 predictions_function = theano.function(inputs=[],outputs=output)
--> 710                 predictions_vals = predictions_function()
    711                 self._yhat = theano.shared( predictions_vals ) # added this line later
    712 

/home/topolo/PropD/Theano/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    896                     node=self.fn.nodes[self.fn.position_of_error],
    897                     thunk=thunk,
--> 898                     storage_map=getattr(self.fn, 'storage_map', None))
    899             else:
    900                 # old-style linkers raise their own exceptions

/home/topolo/PropD/Theano/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
    323         # extra long error message in that case.
    324         pass
--> 325     reraise(exc_type, exc_value, exc_trace)
    326 
    327 

/home/topolo/PropD/Theano/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    882         try:
    883             outputs =\
--> 884                 self.fn() if output_subset is None else\
    885                 self.fn(output_subset=output_subset)
    886         except Exception:

RuntimeError: Cuda error: kernel_reduce_ccontig_node_544270fe7a21a748315f83abfe0913cc_0: out of memory. (grid: 1 x 1; block: 256 x 1 x 1)

Apply node that caused the error: GpuCAReduce{add}{1}(<CudaNdarrayType(float32, vector)>)
Toposort index: 3
Inputs types: [CudaNdarrayType(float32, vector)]
Inputs shapes: [(384,)]
Inputs strides: [(1,)]
Inputs values: ['not shown']
Outputs clients: [[GpuElemwise{Composite{(i0 * (i1 - i2))}}[(0, 1)](CudaNdarrayConstant{0.00260416674428}, GpuCAReduce{add}{1}.0, GpuSubtensor{int64}.0)]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

steps towards persisting (saving) SVM models


In [50]:
f32=open("./2017datascibowl/lambda_multHOG32_C100sigma1","wb")
np.save(f32,SVM_stage1.lambda_mult.get_value())
f32.close()

In [ ]:
yhat_sample_rep2 = np.copy(yhat_sample[0])  # representation 2, {-1,1}, not representation of binary classes as {0,1}
yhat_sample_rep2 = np.sign( yhat_sample_rep2);  # representation 1, {0,1}, not representation of binary classes as {-1,1}
yhat_sample_rep1 = np.copy(yhat_sample_rep2)
np.place(yhat_sample_rep1,yhat_sample_rep1<0.,0.)

In [19]:
f32load=open("./2017datascibowl/lambda_multHOG32_C100sigma1","rb")
testload32=np.load(f32load)
f32load.close()

In [20]:
SVM_stage1_reloaded = SVM_parallel(Xs32["train"],y_train_rep2,len(y_train_rep2),
                          C_trial[3],sigma_trial[1],0.005 )  # C=100.,sigma=1.0, alpha=0.001

In [21]:
SVM_stage1_reloaded.lambda_mult.get_value()[:20]


Out[21]:
array([ 0.19390427,  0.64167058,  0.72906101,  0.38467768,  0.84446287,
        0.05001149,  0.6134901 ,  0.32032672,  0.00691326,  0.59485805,
        0.14579017,  0.53200012,  0.0235727 ,  0.8490023 ,  0.71107656,
        0.40220758,  0.16119316,  0.06592037,  0.20122707,  0.29117793], dtype=float32)

In [22]:
testload32[:20]


Out[22]:
array([ 0.60887176,  0.45740411,  0.46938595,  0.44038945,  0.41185132,
        0.46100062,  0.34098434,  0.41064668,  0.34098434,  0.47914159,
        0.5706324 ,  0.34098434,  0.41076884,  0.48570067,  0.57883215,
        1.30358076,  1.06718528,  1.32138276,  0.48879373,  1.31265843], dtype=float32)

In [23]:
SVM_stage1_reloaded.lambda_mult.set_value( testload32 )

In [24]:
SVM_stage1_reloaded.lambda_mult.get_value()[:20]


Out[24]:
array([ 0.60887176,  0.45740411,  0.46938595,  0.44038945,  0.41185132,
        0.46100062,  0.34098434,  0.41064668,  0.34098434,  0.47914159,
        0.5706324 ,  0.34098434,  0.41076884,  0.48570067,  0.57883215,
        1.30358076,  1.06718528,  1.32138276,  0.48879373,  1.31265843], dtype=float32)

In [25]:
SVM_stage1_reloaded.build_b()


Out[25]:
(Elemwise{mul,no_inplace}.0, OrderedUpdates())

In [37]:
%time yhat_sample = SVM_stage1_reloaded.make_predictions_parallel( patients_sample_vecs )


CPU times: user 35.6 s, sys: 1min 16s, total: 1min 52s
Wall time: 1min 52s

In [38]:
np.sign(yhat_sample[0])


Out[38]:
array([-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1.], dtype=float32)

In [39]:
yhat_sample_rep2 = np.copy(yhat_sample[0])  # representation 2, {-1,1}, not representation of binary classes as {0,1}
yhat_sample_rep2 = np.sign( yhat_sample_rep2);  # representation 1, {0,1}, not representation of binary classes as {-1,1}
yhat_sample_rep1 = np.copy(yhat_sample_rep2)
np.place(yhat_sample_rep1,yhat_sample_rep1<0.,0.)

In [40]:
Prattscaling_results = SVM_stage1_reloaded.make_prob_Pratt(yhat_sample_rep1)

In [41]:
Prattscaling_results


Out[41]:
(array([ 0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507,  0.00981507,  0.00981507,
         0.00981507,  0.00981507,  0.00981507], dtype=float32),
 Elemwise{true_div,no_inplace}.0)

Submissions 2


In [26]:
stage2_sample_submission_csv = pd.read_csv("./2017datascibowl/stage2_sample_submission.csv")

In [27]:
sub_name="stage2_HOG32"
patients_sample2_vecs = np.array( [load_feat_vec(id,sub_name) for id in stage2_sample_submission_csv['id'].as_matrix()] )

In [28]:
print(len(patients_sample2_vecs))


506

In [30]:
%time yhat_sample2 = SVM_stage1_reloaded.make_predictions_parallel( patients_sample2_vecs )


CPU times: user 54.5 s, sys: 2min 12s, total: 3min 6s
Wall time: 3min 6s

In [31]:
patients_sample2_vecs.shape


Out[31]:
(506, 327688)

In [47]:
Xs32["train"].shape


Out[47]:
(384, 327688)

In [32]:
np.sign(yhat_sample2[0])


Out[32]:
array([-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.], dtype=float32)

In [33]:
yhat_sample2_rep2 = np.copy(yhat_sample2[0])  # representation 2, {-1,1}, not representation of binary classes as {0,1}
yhat_sample2_rep2 = np.sign( yhat_sample2_rep2);  # representation 1, {0,1}, not representation of binary classes as {-1,1}
yhat_sample2_rep1 = np.copy(yhat_sample2_rep2)
np.place(yhat_sample2_rep1,yhat_sample2_rep1<0.,0.)

In [34]:
Prattscaling_results2 = SVM_stage1_reloaded.make_prob_Pratt(yhat_sample2_rep1)

In [35]:
Prattscaling_results2


Out[35]:
(array([ 0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909,  0.00983909,  0.00983909,  0.00983909,  0.00983909,
         0.00983909], dtype=float32), Elemwise{true_div,no_inplace}.0)

In [37]:
sample2_out = pd.DataFrame(zip(stage2_sample_submission_csv['id'].as_matrix(),Prattscaling_results2[0]))
sample2_out.columns=["id","cancer"]
sample2_out.to_csv("./2017datascibowl/sample2submit00.csv",index=False)

In [ ]: