In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.cross_validation import train_test_split
import xgboost as xgb
from scipy import sparse
from sklearn.feature_extraction import FeatureHasher
from scipy.sparse import coo_matrix,csr_matrix,csc_matrix, hstack
from sklearn.preprocessing import normalize
from sklearn.utils import shuffle
from sklearn import linear_model
import gc
from sklearn import preprocessing
In [1]:
%ls
In [7]:
stack_train_nn_10= pd.read_pickle('stack_sub/stack_train_nn_10.pickle')
stack_train_xgb_10= pd.read_csv('stack_sub/stack_train_xgb_10.csv',index_col = False,header = None)
train_label = pd.read_csv('train_pivot_xgb_time1.csv',usecols = ['target'])
print stack_train_nn_10.shape
print stack_train_xgb_10.shape
In [10]:
stack_train_xgb_10.rename(columns = {1:'xgb'},inplace = True)
stack_train_nn_10.rename(columns = {'predict':'nn'},inplace = True)
stack_train = pd.DataFrame()
stack_train['xgb'] = stack_train_xgb_10['xgb']
stack_train['nn'] = stack_train_nn_10['nn']
stack_train['target'] = train_label['target']
stack_train.head()
Out[10]:
In [14]:
stack_train.head()
Out[14]:
In [43]:
param_10 = {'booster':'gbtree',
'nthread': 7,
'max_depth':5,
'eta':0.4,
'silent':1,
'subsample':0.7,
'objective':'reg:linear',
'eval_metric':'rmse'}
In [36]:
train_label_10 = stack_train['target']
train_feature_10 = stack_train.drop(['target'],axis = 1)
dtrain_10 = xgb.DMatrix(train_feature_10,label = train_label_10,missing= np.nan)
In [44]:
num_round = 1500
cvresult = xgb.cv(param_10, dtrain_10, num_round, nfold=5,show_stdv=False,
seed = 42, early_stopping_rounds=5,verbose_eval = 1)
print(cvresult.tail())
In [31]:
stack_train_nn_10= pd.read_csv('stack_sub/submission_nn_2.csv',index_col=0)
stack_train_xgb_10= pd.read_csv('stack_sub/submission_xgb_2.csv',index_col=0)
stack_train_xgb_10.reset_index(inplace = True)
stack_train_nn_10.reset_index(inplace = True)
stack_train_xgb_10.rename(columns = {'Demanda_uni_equil':'xgb'},inplace = True)
stack_train_nn_10.rename(columns = {'Demanda_uni_equil':'nn'},inplace = True)
print stack_train_nn_10.shape
print stack_train_xgb_10.shape
In [32]:
stack_train_xgb_10['nn'] = stack_train_nn_10['nn']
stack_train_xgb_10['nn'] = stack_train_xgb_10['nn'].apply(np.log1p)
stack_train_xgb_10['xgb'] = stack_train_xgb_10['xgb'].apply(np.log1p)
In [33]:
stack_train_xgb_10.head()
Out[33]:
In [34]:
stack_train_xgb_10 = stack_train_xgb_10.iloc[:3538385]
print stack_train_xgb_10.shape
stack_train_xgb_10.head()
Out[34]: