In [2]:
import numpy as np
import pandas as pd
import xgboost as xgb
import time
import random
from sklearn.model_selection import StratifiedKFold

In [3]:
#For sampling rows from input file
random_seed = 9
subset = 0.4

In [4]:
n_rows = 1183747;
train_rows = int(n_rows * subset)
random.seed(random_seed)
skip = sorted(random.sample(xrange(1,n_rows + 1),n_rows-train_rows))
data = pd.read_csv("D:/Kaggle_ws/Bosch/input/train_numeric.csv", index_col=0, dtype=np.float32, skiprows=skip)
y = data['Response'].values
del data['Response']
X = data.values

In [5]:
param = {}
param['objective'] = 'binary:logistic'
param['eval_metric'] = 'auc'
param['max_depth'] = 5
param['eta'] = 0.3
param['silent'] = 0
param['updater'] = 'grow_gpu'
#param['updater'] = 'grow_colmaker'

In [6]:
num_round = 10
cv = StratifiedKFold(n_splits=5)
cv.get_n_splits(X, y)


Out[6]:
5

In [7]:
i = 0
for train_index, test_index in cv.split(X, y):
    i += 1
    dtrain = xgb.DMatrix(X[train_index], label=y[train_index])
    tmp = time.time()
    bst = xgb.train(param, dtrain, num_round)
    boost_time = time.time() - tmp
    res = bst.eval(xgb.DMatrix(X[test_index], label=y[test_index]))
    print("Fold: {}, Boost Time {}".format(res, str(boost_time)))
    del bst


Fold: [0]	eval-auc:0.642644, Boost Time 8.67499995232
Fold: [0]	eval-auc:0.678631, Boost Time 7.84599995613
Fold: [0]	eval-auc:0.654254, Boost Time 7.75099992752
Fold: [0]	eval-auc:0.649877, Boost Time 7.6930000782
Fold: [0]	eval-auc:0.689286, Boost Time 7.72399997711

In [8]:
dtrain = xgb.DMatrix("train_numeric.libsvm#dtrain.cache")
tmp = time.time()
bst = xgb.train(param, dtrain, num_round)
boost_time = time.time() - tmp
# res = bst.eval(xgb.DMatrix(X[test_index], label=y[test_index]))
# print("Fold: {}, Boost Time {}".format(res, str(boost_time)))
# del bst


---------------------------------------------------------------------------
XGBoostError                              Traceback (most recent call last)
<ipython-input-8-1b7eb4831a0c> in <module>()
----> 1 dtrain = xgb.DMatrix("train_numeric.libsvm#dtrain.cache")
      2 tmp = time.time()
      3 bst = xgb.train(param, dtrain, num_round)
      4 boost_time = time.time() - tmp
      5 # res = bst.eval(xgb.DMatrix(X[test_index], label=y[test_index]))

d:\Anaconda\envs\Deep2\lib\site-packages\xgboost-0.6-py2.7.egg\xgboost\core.pyc in __init__(self, data, label, missing, weight, silent, feature_names, feature_types)
    263             _check_call(_LIB.XGDMatrixCreateFromFile(c_str(data),
    264                                                      ctypes.c_int(silent),
--> 265                                                      ctypes.byref(self.handle)))
    266         elif isinstance(data, scipy.sparse.csr_matrix):
    267             self._init_from_csr(data)

d:\Anaconda\envs\Deep2\lib\site-packages\xgboost-0.6-py2.7.egg\xgboost\core.pyc in _check_call(ret)
    128     """
    129     if ret != 0:
--> 130         raise XGBoostError(_LIB.XGBGetLastError())
    131 
    132 

XGBoostError: [01:40:26] D:\Workarea_git\xgboost_vs\dmlc-core\src\io\local_filesys.cc:66: LocalFileSystem.GetPathInfo train_numeric.libsvm Error:No such file or directory

In [ ]: