In [1]:
import numpy as np
import h5py
import pickle
from time import time
from tools import stack

from sklearn.cross_validation import StratifiedKFold
from sklearn import cross_validation
from sklearn import svm
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.lda import LDA
from sklearn.qda import QDA
from sklearn.linear_model import LogisticRegression

FEAT_TRAIN = '/media/raid_arr/data/ndsb/features/features_train.hdf5'
# FEAT_TRAIN = '~/data/ndsb/features/features_train.hdf5'
FEAT_TEST = '/media/raid_arr/data/ndsb/features/features_test.hdf5'

LAYER = 'fc2'

In [2]:
f_train = h5py.File(FEAT_TRAIN, 'r')
f_test = h5py.File(FEAT_TEST, 'r')
train_feats_db = f_train[LAYER]
train_lbls_db = f_train['lbls']

test_feats_db = f_test[LAYER]
test_lbls_db = f_test['lbls']
test_paths_db = f_test['im_paths']

train_feats = train_feats_db[()]
train_lbls = train_lbls_db[()]

test_feats = test_feats_db[()]
test_lbls = test_lbls_db[()]
test_paths = test_paths_db[()]

In [3]:
skf = StratifiedKFold(train_lbls_db, n_folds=5, shuffle=True, random_state=0 )

clfs = [
        svm.SVC(probability=True),
        RandomForestClassifier(n_jobs=-1, criterion='entropy'),
        ExtraTreesClassifier(n_jobs=-1, criterion='entropy'),
        GaussianNB(),
        #GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=n_trees)
        ]

stk = stack.Stacking(LogisticRegression, clfs, skf, stackingc=False, proba=True)

tic = time()
stk.fit(train_feats, train_lbls)
toc = time() - tic
print toc

pickle.dump(stk, open('/media/raid_arr/data/ndsb/classifiers/STACK_fc2.p', 'wb'))

print 'DONE'


Training and validating the base (level-0) estimator(s)...

Fold [0]
  Training base (level-0) estimator 0... done. 2554.37974119 sec
  Training base (level-0) estimator 1... done. 28.7118680477 sec
  Training base (level-0) estimator 2... done. 1.69392299652 sec
  Training base (level-0) estimator 3... done. 0.257086038589 sec
Fold [1]
  Training base (level-0) estimator 0... done. 2550.16790199 sec
  Training base (level-0) estimator 1... done. 28.0343458652 sec
  Training base (level-0) estimator 2... done. 1.69406604767 sec
  Training base (level-0) estimator 3... done. 0.270102977753 sec
Fold [2]
  Training base (level-0) estimator 0... done. 2574.91269493 sec
  Training base (level-0) estimator 1... done. 28.4909050465 sec
  Training base (level-0) estimator 2... done. 1.65711593628 sec
  Training base (level-0) estimator 3... done. 0.256214141846 sec
Fold [3]
  Training base (level-0) estimator 0... done. 2566.49723196 sec
  Training base (level-0) estimator 1... done. 28.1904380322 sec
  Training base (level-0) estimator 2... done. 1.68364691734 sec
  Training base (level-0) estimator 3... done. 0.245138883591 sec
Fold [4]
  Training base (level-0) estimator 0... done. 2568.9092052 sec
  Training base (level-0) estimator 1... done. 27.3895051479 sec
  Training base (level-0) estimator 2... done. 1.6963570118 sec
  Training base (level-0) estimator 3... done. 0.239163160324 sec

Training meta (level-1) estimator... done.
Re-training base (level-0) estimator 0 on full data... done.
Re-training base (level-0) estimator 1 on full data... done.
Re-training base (level-0) estimator 2 on full data... done.
Re-training base (level-0) estimator 3 on full data... done.
17989.346657
DONE

In [5]:
# PREDICTION
stk = pickle.load(open('/media/raid_arr/data/ndsb/classifiers/STACK_logreg_fc2.p', 'rb'))
tic = time()
#     pred = clf.predict_proba(test_feats_db)
pred = stk.predict_proba(test_feats)
print 'Prediction done in:', time() - tic
pickle.dump(pred, open('/media/raid_arr/data/ndsb/STACK_fc2_pred.p', 'wb'))


Prediction done in: 6736.06350589

In [6]:
# SUBMISSION CREATION
# test_files_all, images, labels = zip(*data)
import tools.submission as sub
f_name='SUBMISSION_PL_deep_56000_' + 'STACK_logreg' + '_fc2.csv'
sub.make_submission(test_paths_db, pred, f_name=f_name)

print 'Submission created:', f_name


Submission created: SUBMISSION_PL_deep_56000_STACK_logreg_fc2.csv