In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb

In [5]:
path = "/home/siyuan/Documents/Boya/bimbo_kaggle_reformat-master"

In [7]:
# hold_data is for for parameter tuining, already got them
train_data = pd.read_csv(path + "/data/processed/train.csv")
train_label = pd.read_csv(path + "/data/processed/train_label.csv")
test_data = pd.read_csv(path + "/data/processed/test_label.csv")


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-7-ded1b771d19e> in <module>()
      1 # hold_data is for for parameter tuining, already got them
----> 2 train_data = pd.read_csv(path + "/data/processed/train.csv")
      3 train_label = pd.read_csv(path + "/data/processed/train_label.csv")
      4 test_data = pd.read_csv(path + "/data/processed/test_label.csv")

/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    560                     skip_blank_lines=skip_blank_lines)
    561 
--> 562         return _read(filepath_or_buffer, kwds)
    563 
    564     parser_f.__name__ = name

/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    313 
    314     # Create the parser.
--> 315     parser = TextFileReader(filepath_or_buffer, **kwds)
    316 
    317     if (nrows is not None) and (chunksize is not None):

/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc in __init__(self, f, engine, **kwds)
    643             self.options['has_index_names'] = kwds['has_index_names']
    644 
--> 645         self._make_engine(self.engine)
    646 
    647     def close(self):

/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc in _make_engine(self, engine)
    797     def _make_engine(self, engine='c'):
    798         if engine == 'c':
--> 799             self._engine = CParserWrapper(self.f, **self.options)
    800         else:
    801             if engine == 'python':

/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc in __init__(self, src, **kwds)
   1211         kwds['allow_leading_cols'] = self.index_col is not False
   1212 
-> 1213         self._reader = _parser.TextReader(src, **kwds)
   1214 
   1215         # XXX

pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas/parser.c:3427)()

pandas/parser.pyx in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:6861)()

IOError: File /home/siyuan/Documents/Boya/bimbo_kaggle_reformat-master/data/processed/train.csv does not exist

In [2]:
param = {'booster':'gbtree',
         'nthread': 10,
         'max_depth':5, 
         'eta':0.2,
         'silent':1,
         'subsample':0.7, 
         'objective':'reg:linear',
         'eval_metric':'rmse',
         'colsample_bytree':0.7}

In [ ]:
num_round = 566
dtrain = xgb.DMatrix(train_data, label = train_label, missing= np.nan)
bst_bimbo = xgb.train(param, dtrain, num_round)
print 'training finished!'

bst.save_model(path + '/models/xgb_bimbo.model')

submission_bimbo = bst_bimbo.predict(test_data)
print 'predicting finished!'