In [1]:
import numpy as np
import pandas as pd
import theano
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasRegressor
from keras.regularizers import l2, activity_l2
from sklearn import cross_validation
from sklearn.cross_validation import cross_val_score
from sklearn.cross_validation import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import gc


Using gpu device 0: GeForce GTX 970 (CNMeM is enabled with initial size: 75.0% of memory, cuDNN 4007)
Using Theano backend.

read in data week 10


In [2]:
predictors_target_10 = ['agen_for_log_de', 'ruta_for_log_de', 'cliente_for_log_de',
       'producto_for_log_de', 'agen_ruta_for_log_de',
       'agen_cliente_for_log_de', 'agen_producto_for_log_de',
       'ruta_cliente_for_log_de', 'ruta_producto_for_log_de',
       'cliente_producto_for_log_de', 'cliente_for_log_sum', 'corr',
       't_min_1', 't_min_2', 't_min_3', 't_min_4', 't_min_5', 't1_min_t2',
       't1_min_t3', 't1_min_t4', 't1_min_t5', 't2_min_t3', 't2_min_t4',
       't2_min_t5', 't3_min_t4', 't3_min_t5', 't4_min_t5', 'LR_prod',
       'LR_prod_corr', 't_m_5_cum', 't_m_4_cum', 't_m_3_cum',
       't_m_2_cum', 't_m_1_cum', 'NombreCliente', 'weight',
       'weight_per_piece', 'pieces','target']

In [3]:
predictors_10 = ['agen_for_log_de', 'ruta_for_log_de', 'cliente_for_log_de',
       'producto_for_log_de', 'agen_ruta_for_log_de',
       'agen_cliente_for_log_de', 'agen_producto_for_log_de',
       'ruta_cliente_for_log_de', 'ruta_producto_for_log_de',
       'cliente_producto_for_log_de', 'cliente_for_log_sum', 'corr',
       't_min_1', 't_min_2', 't_min_3', 't_min_4', 't_min_5', 't1_min_t2',
       't1_min_t3', 't1_min_t4', 't1_min_t5', 't2_min_t3', 't2_min_t4',
       't2_min_t5', 't3_min_t4', 't3_min_t5', 't4_min_t5', 'LR_prod',
       'LR_prod_corr', 't_m_5_cum', 't_m_4_cum', 't_m_3_cum',
       't_m_2_cum', 't_m_1_cum', 'NombreCliente', 'weight',
       'weight_per_piece', 'pieces']

In [4]:
dtypes = {'agen_for_log_de':'float32',
        'ruta_for_log_de':'float32',
        'cliente_for_log_de':'float32',
        'producto_for_log_de':'float32',
        'agen_ruta_for_log_de':'float32',
        'agen_cliente_for_log_de':'float32',
        'agen_producto_for_log_de':'float32',
        'ruta_cliente_for_log_de':'float32',
        'ruta_producto_for_log_de':"float32",
        'cliente_producto_for_log_de':'float32',
        'cliente_for_log_sum':'float32',
        'corr':'float32',
        't_min_1':'float32',
        't_min_2':'float32',
        't_min_3':'float32',
        't_min_4':'float32',
        't_min_5':'float32',
        't1_min_t2':'float32',
        't1_min_t3':'float32',
        't1_min_t4':'float32',
        't1_min_t5':'float32',
        't2_min_t3':'float32',
        't2_min_t4':'float32',
        't2_min_t5':'float32',
        't3_min_t4':'float32',
        't3_min_t5':'float32',
        't4_min_t5':'float32',
        'LR_prod':'float32',
        'LR_prod_corr':'float32',
        'target':'float32',
        't_m_5_cum':'float32',
        't_m_4_cum' :'float32',
        't_m_3_cum':'float32',
        't_m_2_cum':'float32',
        't_m_1_cum':'float32',
        'NombreCliente':'int32',
        'weight':'float32',
        'weight_per_piece':'float32',
        'pieces':'float32'}

In [ ]:
f = lambda x : (x-x.mean())/x.std(ddof=0)

In [6]:
def normalize_dataset_10(train_dataset,test_dataset):
    train_dataset_normalize = train_dataset[predictors_10]
    train_dataset_normalize['label'] = 0    
    
    test_dataset_normalize = test_dataset[predictors_10]
    test_dataset_normalize['label'] = 1
    
    whole_dataset = pd.concat([train_dataset_normalize,test_dataset_normalize],copy = False)
    whole_dataset_normalize = whole_dataset.apply(f,axis = 0)
    
    train_dataset_normalize = whole_dataset_normalize.loc[whole_dataset['label'] == 0]
    test_dataset_normalize = whole_dataset_normalize.loc[whole_dataset['label']==1]
    
    train_dataset_normalize.drop(['label'],axis = 1,inplace = True)
    test_dataset_normalize.drop(['label'],axis =1,inplace = True)
    
    train_dataset_normalize['target'] = train_dataset['target']
    
#     target = train_dataset['target']
    return train_dataset_normalize,test_dataset_normalize

In [7]:
train_pivot_xgb_time1 = pd.read_csv('train_pivot_xgb_time1.csv',
                                    usecols = predictors_target_10,dtype = dtypes)
train_pivot_xgb_time1.reset_index(drop = True,inplace = True)

In [8]:
train_pivot_56789_to_10 = pd.read_pickle('train_pivot_56789_to_10_new.pickle')
train_pivot_56789_to_10.reset_index(drop = True,inplace = True)

In [9]:
train_dataset_10_normalize, test_dataset_10_normalize = normalize_dataset_10(train_pivot_xgb_time1,
                                                                          train_pivot_56789_to_10)


/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py:15: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py:17: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [23]:
# train_pivot_xgb_time1 = train_pivot_xgb_time1.sample(1000) 
# train_pivot_xgb_time1.reset_index(drop = True,inplace = True)

create nn train data for model stacking


  • 5 fold

In [13]:
train_dataset_10_normalize.fillna(-1,inplace = True)

In [14]:
k_fold = cross_validation.KFold(n=train_dataset_10_normalize.shape[0], n_folds=5)

a = np.zeros(shape=(train_dataset_10_normalize.shape[0],1))
stack_submission_nn_10 = pd.DataFrame(a,columns=['predict'])

In [15]:
for train_indices, test_indices in k_fold:

    # create model
    model = Sequential()
    model.add(Dense(128, input_dim=38, init='normal', activation='relu',W_regularizer=l2(0.00000001)))
    # model.add(Dropout(0.3))
    model.add(Dense(64, init='normal', activation='relu'))
    # model.add(Dropout(0.3))
    model.add(Dense(32, init='normal', activation='relu'))
    # model.add(Dropout(0.3))
    model.add(Dense(8, init='normal', activation='relu'))
    # model.add(Dropout(0.3))
    model.add(Dense(1, init='normal',activation = 'linear'))
    
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    # fit model
    model.fit(train_dataset_10_normalize.loc[train_indices,predictors_10].as_matrix(),
              train_dataset_10_normalize.loc[train_indices,'target'].as_matrix(),
              nb_epoch=100, shuffle = True, batch_size=128,validation_split = 0,verbose = 2)
    
    print 'model fit finished'
    stack_submission_nn_10.loc[test_indices] = model.predict(train_dataset_10_normalize.\
                                                                       loc[test_indices,predictors_10].as_matrix(),
                                                         batch_size=128, verbose=2)
    print 'model predict finished'


Epoch 1/100
93s - loss: 0.2085
Epoch 2/100
93s - loss: 0.2031
Epoch 3/100
93s - loss: 0.2020
Epoch 4/100
93s - loss: 0.2015
Epoch 5/100
93s - loss: 0.2011
Epoch 6/100
93s - loss: 0.2009
Epoch 7/100
93s - loss: 0.2007
Epoch 8/100
93s - loss: 0.2005
Epoch 9/100
93s - loss: 0.2003
Epoch 10/100
93s - loss: 0.2002
Epoch 11/100
93s - loss: 0.2001
Epoch 12/100
92s - loss: 0.2000
Epoch 13/100
92s - loss: 0.2000
Epoch 14/100
92s - loss: 0.1998
Epoch 15/100
92s - loss: 0.1998
Epoch 16/100
92s - loss: 0.1997
Epoch 17/100
92s - loss: 0.1997
Epoch 18/100
92s - loss: 0.1997
Epoch 19/100
92s - loss: 0.1996
Epoch 20/100
92s - loss: 0.1995
Epoch 21/100
92s - loss: 0.1995
Epoch 22/100
92s - loss: 0.1995
Epoch 23/100
92s - loss: 0.1994
Epoch 24/100
92s - loss: 0.1994
Epoch 25/100
92s - loss: 0.1994
Epoch 26/100
92s - loss: 0.1994
Epoch 27/100
92s - loss: 0.1993
Epoch 28/100
92s - loss: 0.1993
Epoch 29/100
92s - loss: 0.1993
Epoch 30/100
92s - loss: 0.1993
Epoch 31/100
92s - loss: 0.1992
Epoch 32/100
92s - loss: 0.1992
Epoch 33/100
92s - loss: 0.1992
Epoch 34/100
92s - loss: 0.1992
Epoch 35/100
92s - loss: 0.1992
Epoch 36/100
92s - loss: 0.1992
Epoch 37/100
92s - loss: 0.1992
Epoch 38/100
92s - loss: 0.1991
Epoch 39/100
92s - loss: 0.1991
Epoch 40/100
92s - loss: 0.1991
Epoch 41/100
92s - loss: 0.1991
Epoch 42/100
92s - loss: 0.1991
Epoch 43/100
92s - loss: 0.1991
Epoch 44/100
92s - loss: 0.1991
Epoch 45/100
92s - loss: 0.1991
Epoch 46/100
92s - loss: 0.1991
Epoch 47/100
92s - loss: 0.1991
Epoch 48/100
92s - loss: 0.1991
Epoch 49/100
92s - loss: 0.1991
Epoch 50/100
92s - loss: 0.1990
Epoch 51/100
92s - loss: 0.1990
Epoch 52/100
92s - loss: 0.1990
Epoch 53/100
92s - loss: 0.1990
Epoch 54/100
92s - loss: 0.1990
Epoch 55/100
92s - loss: 0.1990
Epoch 56/100
92s - loss: 0.1990
Epoch 57/100
92s - loss: 0.1990
Epoch 58/100
92s - loss: 0.1990
Epoch 59/100
92s - loss: 0.1990
Epoch 60/100
92s - loss: 0.1990
Epoch 61/100
92s - loss: 0.1990
Epoch 62/100
92s - loss: 0.1990
Epoch 63/100
92s - loss: 0.1989
Epoch 64/100
92s - loss: 0.1990
Epoch 65/100
92s - loss: 0.1989
Epoch 66/100
92s - loss: 0.1989
Epoch 67/100
92s - loss: 0.1989
Epoch 68/100
92s - loss: 0.1989
Epoch 69/100
92s - loss: 0.1989
Epoch 70/100
92s - loss: 0.1989
Epoch 71/100
92s - loss: 0.1989
Epoch 72/100
92s - loss: 0.1989
Epoch 73/100
92s - loss: 0.1989
Epoch 74/100
92s - loss: 0.1989
Epoch 75/100
92s - loss: 0.1989
Epoch 76/100
92s - loss: 0.1989
Epoch 77/100
92s - loss: 0.1989
Epoch 78/100
92s - loss: 0.1989
Epoch 79/100
92s - loss: 0.1989
Epoch 80/100
92s - loss: 0.1989
Epoch 81/100
92s - loss: 0.1989
Epoch 82/100
92s - loss: 0.1989
Epoch 83/100
92s - loss: 0.1989
Epoch 84/100
92s - loss: 0.1989
Epoch 85/100
92s - loss: 0.1989
Epoch 86/100
92s - loss: 0.1989
Epoch 87/100
92s - loss: 0.1989
Epoch 88/100
92s - loss: 0.1989
Epoch 89/100
92s - loss: 0.1989
Epoch 90/100
92s - loss: 0.1988
Epoch 91/100
92s - loss: 0.1989
Epoch 92/100
92s - loss: 0.1989
Epoch 93/100
92s - loss: 0.1988
Epoch 94/100
92s - loss: 0.1988
Epoch 95/100
92s - loss: 0.1988
Epoch 96/100
92s - loss: 0.1988
Epoch 97/100
92s - loss: 0.1988
Epoch 98/100
92s - loss: 0.1988
Epoch 99/100
92s - loss: 0.1988
Epoch 100/100
92s - loss: 0.1988
model fit finished
model predict finished
Epoch 1/100
91s - loss: 0.2076
Epoch 2/100
91s - loss: 0.2024
Epoch 3/100
92s - loss: 0.2011
Epoch 4/100
92s - loss: 0.2005
Epoch 5/100
92s - loss: 0.2002
Epoch 6/100
92s - loss: 0.2000
Epoch 7/100
92s - loss: 0.1998
Epoch 8/100
92s - loss: 0.1996
Epoch 9/100
92s - loss: 0.1995
Epoch 10/100
92s - loss: 0.1994
Epoch 11/100
92s - loss: 0.1993
Epoch 12/100
92s - loss: 0.1992
Epoch 13/100
92s - loss: 0.1991
Epoch 14/100
93s - loss: 0.1991
Epoch 15/100
93s - loss: 0.1990
Epoch 16/100
93s - loss: 0.1990
Epoch 17/100
93s - loss: 0.1989
Epoch 18/100
92s - loss: 0.1989
Epoch 19/100
92s - loss: 0.1988
Epoch 20/100
92s - loss: 0.1988
Epoch 21/100
92s - loss: 0.1988
Epoch 22/100
92s - loss: 0.1987
Epoch 23/100
92s - loss: 0.1987
Epoch 24/100
92s - loss: 0.1987
Epoch 25/100
92s - loss: 0.1986
Epoch 26/100
92s - loss: 0.1986
Epoch 27/100
92s - loss: 0.1986
Epoch 28/100
92s - loss: 0.1986
Epoch 29/100
92s - loss: 0.1985
Epoch 30/100
92s - loss: 0.1985
Epoch 31/100
92s - loss: 0.1985
Epoch 32/100
92s - loss: 0.1985
Epoch 33/100
92s - loss: 0.1985
Epoch 34/100
92s - loss: 0.1985
Epoch 35/100
92s - loss: 0.1985
Epoch 36/100
92s - loss: 0.1984
Epoch 37/100
92s - loss: 0.1984
Epoch 38/100
92s - loss: 0.1984
Epoch 39/100
92s - loss: 0.1984
Epoch 40/100
92s - loss: 0.1984
Epoch 41/100
92s - loss: 0.1984
Epoch 42/100
92s - loss: 0.1984
Epoch 43/100
92s - loss: 0.1984
Epoch 44/100
92s - loss: 0.1984
Epoch 45/100
92s - loss: 0.1983
Epoch 46/100
92s - loss: 0.1983
Epoch 47/100
92s - loss: 0.1983
Epoch 48/100
92s - loss: 0.1983
Epoch 49/100
92s - loss: 0.1983
Epoch 50/100
92s - loss: 0.1983
Epoch 51/100
92s - loss: 0.1983
Epoch 52/100
92s - loss: 0.1983
Epoch 53/100
92s - loss: 0.1983
Epoch 54/100
92s - loss: 0.1983
Epoch 55/100
92s - loss: 0.1983
Epoch 56/100
92s - loss: 0.1983
Epoch 57/100
92s - loss: 0.1983
Epoch 58/100
92s - loss: 0.1983
Epoch 59/100
92s - loss: 0.1983
Epoch 60/100
92s - loss: 0.1983
Epoch 61/100
92s - loss: 0.1983
Epoch 62/100
92s - loss: 0.1982
Epoch 63/100
92s - loss: 0.1982
Epoch 64/100
92s - loss: 0.1982
Epoch 65/100
92s - loss: 0.1982
Epoch 66/100
92s - loss: 0.1982
Epoch 67/100
92s - loss: 0.1982
Epoch 68/100
92s - loss: 0.1982
Epoch 69/100
92s - loss: 0.1982
Epoch 70/100
92s - loss: 0.1982
Epoch 71/100
92s - loss: 0.1982
Epoch 72/100
92s - loss: 0.1982
Epoch 73/100
92s - loss: 0.1982
Epoch 74/100
92s - loss: 0.1982
Epoch 75/100
92s - loss: 0.1982
Epoch 76/100
92s - loss: 0.1982
Epoch 77/100
92s - loss: 0.1982
Epoch 78/100
92s - loss: 0.1982
Epoch 79/100
92s - loss: 0.1982
Epoch 80/100
92s - loss: 0.1982
Epoch 81/100
92s - loss: 0.1982
Epoch 82/100
92s - loss: 0.1982
Epoch 83/100
92s - loss: 0.1982
Epoch 84/100
92s - loss: 0.1982
Epoch 85/100
92s - loss: 0.1982
Epoch 86/100
91s - loss: 0.1982
Epoch 87/100
91s - loss: 0.1981
Epoch 88/100
91s - loss: 0.1981
Epoch 89/100
91s - loss: 0.1981
Epoch 90/100
91s - loss: 0.1981
Epoch 91/100
91s - loss: 0.1981
Epoch 92/100
91s - loss: 0.1981
Epoch 93/100
91s - loss: 0.1981
Epoch 94/100
91s - loss: 0.1981
Epoch 95/100
91s - loss: 0.1982
Epoch 96/100
91s - loss: 0.1981
Epoch 97/100
91s - loss: 0.1981
Epoch 98/100
91s - loss: 0.1981
Epoch 99/100
91s - loss: 0.1981
Epoch 100/100
91s - loss: 0.1981
model fit finished
model predict finished
Epoch 1/100
92s - loss: 0.2072
Epoch 2/100
92s - loss: 0.2017
Epoch 3/100
91s - loss: 0.2006
Epoch 4/100
92s - loss: 0.2000
Epoch 5/100
91s - loss: 0.1996
Epoch 6/100
91s - loss: 0.1992
Epoch 7/100
91s - loss: 0.1990
Epoch 8/100
91s - loss: 0.1988
Epoch 9/100
91s - loss: 0.1986
Epoch 10/100
92s - loss: 0.1985
Epoch 11/100
94s - loss: 0.1984
Epoch 12/100
94s - loss: 0.1983
Epoch 13/100
93s - loss: 0.1983
Epoch 14/100
93s - loss: 0.1982
Epoch 15/100
95s - loss: 0.1981
Epoch 16/100
94s - loss: 0.1981
Epoch 17/100
95s - loss: 0.1981
Epoch 18/100
96s - loss: 0.1980
Epoch 19/100
93s - loss: 0.1980
Epoch 20/100
95s - loss: 0.1979
Epoch 21/100
94s - loss: 0.1979
Epoch 22/100
94s - loss: 0.1979
Epoch 23/100
93s - loss: 0.1978
Epoch 24/100
93s - loss: 0.1978
Epoch 25/100
94s - loss: 0.1978
Epoch 26/100
94s - loss: 0.1978
Epoch 27/100
96s - loss: 0.1978
Epoch 28/100
95s - loss: 0.1977
Epoch 29/100
95s - loss: 0.1977
Epoch 30/100
94s - loss: 0.1977
Epoch 31/100
93s - loss: 0.1977
Epoch 32/100
93s - loss: 0.1977
Epoch 33/100
92s - loss: 0.1976
Epoch 34/100
92s - loss: 0.1976
Epoch 35/100
92s - loss: 0.1976
Epoch 36/100
92s - loss: 0.1976
Epoch 37/100
92s - loss: 0.1976
Epoch 38/100
92s - loss: 0.1976
Epoch 39/100
92s - loss: 0.1976
Epoch 40/100
92s - loss: 0.1976
Epoch 41/100
93s - loss: 0.1975
Epoch 42/100
92s - loss: 0.1975
Epoch 43/100
92s - loss: 0.1975
Epoch 44/100
93s - loss: 0.1975
Epoch 45/100
92s - loss: 0.1975
Epoch 46/100
92s - loss: 0.1975
Epoch 47/100
92s - loss: 0.1975
Epoch 48/100
92s - loss: 0.1975
Epoch 49/100
92s - loss: 0.1975
Epoch 50/100
92s - loss: 0.1975
Epoch 51/100
93s - loss: 0.1975
Epoch 52/100
92s - loss: 0.1974
Epoch 53/100
92s - loss: 0.1974
Epoch 54/100
92s - loss: 0.1974
Epoch 55/100
93s - loss: 0.1974
Epoch 56/100
92s - loss: 0.1974
Epoch 57/100
92s - loss: 0.1974
Epoch 58/100
92s - loss: 0.1974
Epoch 59/100
92s - loss: 0.1974
Epoch 60/100
92s - loss: 0.1974
Epoch 61/100
92s - loss: 0.1974
Epoch 62/100
92s - loss: 0.1974
Epoch 63/100
92s - loss: 0.1974
Epoch 64/100
92s - loss: 0.1974
Epoch 65/100
92s - loss: 0.1974
Epoch 66/100
92s - loss: 0.1974
Epoch 67/100
92s - loss: 0.1974
Epoch 68/100
92s - loss: 0.1974
Epoch 69/100
92s - loss: 0.1973
Epoch 70/100
92s - loss: 0.1974
Epoch 71/100
91s - loss: 0.1974
Epoch 72/100
91s - loss: 0.1973
Epoch 73/100
91s - loss: 0.1973
Epoch 74/100
92s - loss: 0.1973
Epoch 75/100
92s - loss: 0.1973
Epoch 76/100
92s - loss: 0.1973
Epoch 77/100
91s - loss: 0.1973
Epoch 78/100
92s - loss: 0.1973
Epoch 79/100
91s - loss: 0.1973
Epoch 80/100
93s - loss: 0.1973
Epoch 81/100
93s - loss: 0.1973
Epoch 82/100
93s - loss: 0.1973
Epoch 83/100
93s - loss: 0.1973
Epoch 84/100
93s - loss: 0.1973
Epoch 85/100
93s - loss: 0.1973
Epoch 86/100
94s - loss: 0.1973
Epoch 87/100
93s - loss: 0.1973
Epoch 88/100
94s - loss: 0.1973
Epoch 89/100
93s - loss: 0.1973
Epoch 90/100
96s - loss: 0.1973
Epoch 91/100
92s - loss: 0.1973
Epoch 92/100
92s - loss: 0.1973
Epoch 93/100
92s - loss: 0.1973
Epoch 94/100
93s - loss: 0.1973
Epoch 95/100
91s - loss: 0.1973
Epoch 96/100
92s - loss: 0.1973
Epoch 97/100
92s - loss: 0.1973
Epoch 98/100
93s - loss: 0.1973
Epoch 99/100
92s - loss: 0.1973
Epoch 100/100
92s - loss: 0.1973
model fit finished
model predict finished
Epoch 1/100
93s - loss: 0.2093
Epoch 2/100
93s - loss: 0.2037
Epoch 3/100
93s - loss: 0.2024
Epoch 4/100
93s - loss: 0.2017
Epoch 5/100
93s - loss: 0.2012
Epoch 6/100
93s - loss: 0.2009
Epoch 7/100
95s - loss: 0.2007
Epoch 8/100
94s - loss: 0.2005
Epoch 9/100
93s - loss: 0.2003
Epoch 10/100
94s - loss: 0.2002
Epoch 11/100
94s - loss: 0.2001
Epoch 12/100
94s - loss: 0.2000
Epoch 13/100
94s - loss: 0.1999
Epoch 14/100
94s - loss: 0.1998
Epoch 15/100
94s - loss: 0.1998
Epoch 16/100
94s - loss: 0.1997
Epoch 17/100
95s - loss: 0.1996
Epoch 18/100
96s - loss: 0.1996
Epoch 19/100
95s - loss: 0.1995
Epoch 20/100
95s - loss: 0.1995
Epoch 21/100
95s - loss: 0.1995
Epoch 22/100
95s - loss: 0.1994
Epoch 23/100
97s - loss: 0.1994
Epoch 24/100
94s - loss: 0.1994
Epoch 25/100
94s - loss: 0.1993
Epoch 26/100
94s - loss: 0.1993
Epoch 27/100
96s - loss: 0.1993
Epoch 28/100
95s - loss: 0.1993
Epoch 29/100
94s - loss: 0.1992
Epoch 30/100
96s - loss: 0.1992
Epoch 31/100
96s - loss: 0.1992
Epoch 32/100
95s - loss: 0.1991
Epoch 33/100
95s - loss: 0.1991
Epoch 34/100
97s - loss: 0.1991
Epoch 35/100
97s - loss: 0.1991
Epoch 36/100
97s - loss: 0.1990
Epoch 37/100
94s - loss: 0.1990
Epoch 38/100
97s - loss: 0.1990
Epoch 39/100
96s - loss: 0.1990
Epoch 40/100
95s - loss: 0.1990
Epoch 41/100
95s - loss: 0.1990
Epoch 42/100
95s - loss: 0.1989
Epoch 43/100
95s - loss: 0.1989
Epoch 44/100
96s - loss: 0.1989
Epoch 45/100
111s - loss: 0.1989
Epoch 46/100
103s - loss: 0.1989
Epoch 47/100
103s - loss: 0.1988
Epoch 48/100
94s - loss: 0.1988
Epoch 49/100
95s - loss: 0.1988
Epoch 50/100
95s - loss: 0.1989
Epoch 51/100
95s - loss: 0.1988
Epoch 52/100
95s - loss: 0.1989
Epoch 53/100
96s - loss: 0.1988
Epoch 54/100
95s - loss: 0.1988
Epoch 55/100
95s - loss: 0.1988
Epoch 56/100
95s - loss: 0.1988
Epoch 57/100
95s - loss: 0.1987
Epoch 58/100
95s - loss: 0.1988
Epoch 59/100
94s - loss: 0.1988
Epoch 60/100
94s - loss: 0.1987
Epoch 61/100
94s - loss: 0.1988
Epoch 62/100
94s - loss: 0.1988
Epoch 63/100
94s - loss: 0.1987
Epoch 64/100
94s - loss: 0.1987
Epoch 65/100
96s - loss: 0.1987
Epoch 66/100
95s - loss: 0.1987
Epoch 67/100
96s - loss: 0.1987
Epoch 68/100
97s - loss: 0.1987
Epoch 69/100
95s - loss: 0.1987
Epoch 70/100
94s - loss: 0.1987
Epoch 71/100
95s - loss: 0.1986
Epoch 72/100
95s - loss: 0.1986
Epoch 73/100
96s - loss: 0.1987
Epoch 74/100
96s - loss: 0.1988
Epoch 75/100
94s - loss: 0.1987
Epoch 76/100
93s - loss: 0.1988
Epoch 77/100
94s - loss: 0.1987
Epoch 78/100
93s - loss: 0.1987
Epoch 79/100
94s - loss: 0.1986
Epoch 80/100
94s - loss: 0.1986
Epoch 81/100
94s - loss: 0.1986
Epoch 82/100
94s - loss: 0.1986
Epoch 83/100
93s - loss: 0.1986
Epoch 84/100
94s - loss: 0.1986
Epoch 85/100
93s - loss: 0.1986
Epoch 86/100
94s - loss: 0.1986
Epoch 87/100
94s - loss: 0.1987
Epoch 88/100
93s - loss: 0.1986
Epoch 89/100
94s - loss: 0.1986
Epoch 90/100
94s - loss: 0.1986
Epoch 91/100
93s - loss: 0.1986
Epoch 92/100
93s - loss: 0.1986
Epoch 93/100
93s - loss: 0.1986
Epoch 94/100
93s - loss: 0.1987
Epoch 95/100
93s - loss: 0.1987
Epoch 96/100
93s - loss: 0.1986
Epoch 97/100
93s - loss: 0.1986
Epoch 98/100
93s - loss: 0.1986
Epoch 99/100
93s - loss: 0.1986
Epoch 100/100
93s - loss: 0.1986
model fit finished
model predict finished
Epoch 1/100
108s - loss: 0.2066
Epoch 2/100
108s - loss: 0.2016
Epoch 3/100
108s - loss: 0.2002
Epoch 4/100
108s - loss: 0.1995
Epoch 5/100
108s - loss: 0.1992
Epoch 6/100
108s - loss: 0.1989
Epoch 7/100
108s - loss: 0.1987
Epoch 8/100
108s - loss: 0.1986
Epoch 9/100
108s - loss: 0.1984
Epoch 10/100
108s - loss: 0.1983
Epoch 11/100
108s - loss: 0.1982
Epoch 12/100
108s - loss: 0.1981
Epoch 13/100
108s - loss: 0.1981
Epoch 14/100
108s - loss: 0.1980
Epoch 15/100
108s - loss: 0.1979
Epoch 16/100
108s - loss: 0.1979
Epoch 17/100
108s - loss: 0.1978
Epoch 18/100
108s - loss: 0.1978
Epoch 19/100
108s - loss: 0.1977
Epoch 20/100
108s - loss: 0.1977
Epoch 21/100
108s - loss: 0.1977
Epoch 22/100
108s - loss: 0.1976
Epoch 23/100
108s - loss: 0.1976
Epoch 24/100
108s - loss: 0.1976
Epoch 25/100
108s - loss: 0.1975
Epoch 26/100
108s - loss: 0.1975
Epoch 27/100
108s - loss: 0.1975
Epoch 28/100
108s - loss: 0.1975
Epoch 29/100
108s - loss: 0.1974
Epoch 30/100
107s - loss: 0.1974
Epoch 31/100
107s - loss: 0.1974
Epoch 32/100
108s - loss: 0.1974
Epoch 33/100
108s - loss: 0.1974
Epoch 34/100
108s - loss: 0.1973
Epoch 35/100
108s - loss: 0.1973
Epoch 36/100
108s - loss: 0.1973
Epoch 37/100
109s - loss: 0.1973
Epoch 38/100
108s - loss: 0.1973
Epoch 39/100
107s - loss: 0.1973
Epoch 40/100
107s - loss: 0.1973
Epoch 41/100
114s - loss: 0.1972
Epoch 42/100
117s - loss: 0.1972
Epoch 43/100
117s - loss: 0.1972
Epoch 44/100
109s - loss: 0.1972
Epoch 45/100
108s - loss: 0.1972
Epoch 46/100
108s - loss: 0.1972
Epoch 47/100
108s - loss: 0.1972
Epoch 48/100
108s - loss: 0.1972
Epoch 49/100
109s - loss: 0.1972
Epoch 50/100
119s - loss: 0.1971
Epoch 51/100
114s - loss: 0.1972
Epoch 52/100
108s - loss: 0.1972
Epoch 53/100
111s - loss: 0.1971
Epoch 54/100
111s - loss: 0.1971
Epoch 55/100
109s - loss: 0.1971
Epoch 56/100
112s - loss: 0.1971
Epoch 57/100
115s - loss: 0.1971
Epoch 58/100
116s - loss: 0.1971
Epoch 59/100
111s - loss: 0.1971
Epoch 60/100
111s - loss: 0.1971
Epoch 61/100
110s - loss: 0.1971
Epoch 62/100
114s - loss: 0.1971
Epoch 63/100
120s - loss: 0.1971
Epoch 64/100
114s - loss: 0.1971
Epoch 65/100
112s - loss: 0.1971
Epoch 66/100
108s - loss: 0.1971
Epoch 67/100
108s - loss: 0.1971
Epoch 68/100
108s - loss: 0.1971
Epoch 69/100
108s - loss: 0.1971
Epoch 70/100
108s - loss: 0.1971
Epoch 71/100
108s - loss: 0.1970
Epoch 72/100
108s - loss: 0.1970
Epoch 73/100
108s - loss: 0.1970
Epoch 74/100
108s - loss: 0.1970
Epoch 75/100
108s - loss: 0.1970
Epoch 76/100
108s - loss: 0.1970
Epoch 77/100
108s - loss: 0.1970
Epoch 78/100
108s - loss: 0.1970
Epoch 79/100
108s - loss: 0.1970
Epoch 80/100
108s - loss: 0.1970
Epoch 81/100
108s - loss: 0.1970
Epoch 82/100
108s - loss: 0.1970
Epoch 83/100
108s - loss: 0.1970
Epoch 84/100
108s - loss: 0.1970
Epoch 85/100
108s - loss: 0.1970
Epoch 86/100
108s - loss: 0.1970
Epoch 87/100
108s - loss: 0.1970
Epoch 88/100
108s - loss: 0.1970
Epoch 89/100
108s - loss: 0.1970
Epoch 90/100
108s - loss: 0.1970
Epoch 91/100
108s - loss: 0.1970
Epoch 92/100
109s - loss: 0.1970
Epoch 93/100
108s - loss: 0.1970
Epoch 94/100
108s - loss: 0.1970
Epoch 95/100
108s - loss: 0.1970
Epoch 96/100
108s - loss: 0.1970
Epoch 97/100
108s - loss: 0.1970
Epoch 98/100
108s - loss: 0.1970
Epoch 99/100
108s - loss: 0.1970
Epoch 100/100
108s - loss: 0.1970
model fit finished
model predict finished

In [16]:
stack_submission_nn_10.head()


Out[16]:
predict
0 3.470119
1 2.667063
2 1.882208
3 3.318728
4 4.153247

In [17]:
stack_submission_nn_10.to_pickle('stack_train_nn_10.pickle')

over model stacking traing prepare



In [9]:
train_dataset_10_normalize.columns.values


Out[9]:
array(['agen_for_log_de', 'ruta_for_log_de', 'cliente_for_log_de',
       'producto_for_log_de', 'agen_ruta_for_log_de',
       'agen_cliente_for_log_de', 'agen_producto_for_log_de',
       'ruta_cliente_for_log_de', 'ruta_producto_for_log_de',
       'cliente_producto_for_log_de', 'cliente_for_log_sum', 'corr',
       't_min_1', 't_min_2', 't_min_3', 't_min_4', 't_min_5', 't1_min_t2',
       't1_min_t3', 't1_min_t4', 't1_min_t5', 't2_min_t3', 't2_min_t4',
       't2_min_t5', 't3_min_t4', 't3_min_t5', 't4_min_t5', 'LR_prod',
       'LR_prod_corr', 't_m_5_cum', 't_m_4_cum', 't_m_3_cum', 't_m_2_cum',
       't_m_1_cum', 'NombreCliente', 'weight', 'weight_per_piece',
       'pieces', 'target'], dtype=object)

In [10]:
test_dataset_10_normalize.shape


Out[10]:
(3538385, 38)

In [11]:
train_nn_time1 = train_dataset_10_normalize[predictors_10].copy()
label_nn_time1 = train_dataset_10_normalize['target'].copy()

In [12]:
train_nn_time1.fillna(-1,inplace = True)
# train_nn_time1.fillna(0,inplace = True)

In [13]:
train_nn_time1 = train_nn_time1.as_matrix()
label_nn_time1 = label_nn_time1.as_matrix()

In [18]:
test_dataset_10_normalize.fillna(-1,inplace = True)

In [19]:
test_nn_time1 = test_dataset_10_normalize.as_matrix()

In [14]:
from sklearn.utils import shuffle
train_nn_time1, label_nn_time1 = shuffle(train_nn_time1, label_nn_time1, random_state=42)

In [99]:
# create model
model = Sequential()
model.add(Dense(128, input_dim=38, init='normal', activation='relu',W_regularizer=l2(0.00000001)))
# model.add(Dropout(0.3))
model.add(Dense(64, init='normal', activation='relu'))
# model.add(Dropout(0.3))
model.add(Dense(32, init='normal', activation='relu'))
# model.add(Dropout(0.3))
model.add(Dense(8, init='normal', activation='relu'))
# model.add(Dropout(0.3))
model.add(Dense(1, init='normal',activation = 'linear'))
# Compile model
model = load_model('model_nn_10_after_l2reg.h5')
model.compile(loss='mean_squared_error', optimizer='adam')

model.fit(train_nn_time1, label_nn_time1, nb_epoch=100, shuffle = True, 
          batch_size=128,validation_split = 0.2,verbose = 2)
time.sleep(0.1)


Train on 16614921 samples, validate on 4153731 samples
Epoch 1/72
99s - loss: 0.1997 - val_loss: 0.2002
Epoch 2/72
98s - loss: 0.1997 - val_loss: 0.1989
Epoch 3/72
98s - loss: 0.1997 - val_loss: 0.1985
Epoch 4/72
97s - loss: 0.1997 - val_loss: 0.1992
Epoch 5/72
97s - loss: 0.1997 - val_loss: 0.1989
Epoch 6/72
98s - loss: 0.1997 - val_loss: 0.1986
Epoch 7/72
97s - loss: 0.1997 - val_loss: 0.2002
Epoch 8/72
98s - loss: 0.1997 - val_loss: 0.1987
Epoch 9/72
99s - loss: 0.1996 - val_loss: 0.2000
Epoch 10/72
98s - loss: 0.1997 - val_loss: 0.2018
Epoch 11/72
97s - loss: 0.1997 - val_loss: 0.1992
Epoch 12/72
97s - loss: 0.1997 - val_loss: 0.1992
Epoch 13/72
97s - loss: 0.1997 - val_loss: 0.1991
Epoch 14/72
97s - loss: 0.1996 - val_loss: 0.2003
Epoch 15/72
100s - loss: 0.1996 - val_loss: 0.1986
Epoch 16/72
97s - loss: 0.1996 - val_loss: 0.1992
Epoch 17/72
97s - loss: 0.1996 - val_loss: 0.2000
Epoch 18/72
97s - loss: 0.1996 - val_loss: 0.1987
Epoch 19/72
97s - loss: 0.1997 - val_loss: 0.1984
Epoch 20/72
97s - loss: 0.1996 - val_loss: 0.1992
Epoch 21/72
97s - loss: 0.1997 - val_loss: 0.1987
Epoch 22/72
98s - loss: 0.1996 - val_loss: 0.1984
Epoch 23/72
98s - loss: 0.1997 - val_loss: 0.2001
Epoch 24/72
97s - loss: 0.1996 - val_loss: 0.1987
Epoch 25/72
97s - loss: 0.1996 - val_loss: 0.1990
Epoch 26/72
98s - loss: 0.1997 - val_loss: 0.1998
Epoch 27/72
98s - loss: 0.1996 - val_loss: 0.1990
Epoch 28/72
97s - loss: 0.1996 - val_loss: 0.1987
Epoch 29/72
98s - loss: 0.1996 - val_loss: 0.1987
Epoch 30/72
97s - loss: 0.1996 - val_loss: 0.1986
Epoch 31/72
98s - loss: 0.1996 - val_loss: 0.1996
Epoch 32/72
98s - loss: 0.1996 - val_loss: 0.1986
Epoch 33/72
98s - loss: 0.1996 - val_loss: 0.1987
Epoch 34/72
98s - loss: 0.1997 - val_loss: 0.1989
Epoch 35/72
98s - loss: 0.1996 - val_loss: 0.2005
Epoch 36/72
97s - loss: 0.1996 - val_loss: 0.1994
Epoch 37/72
97s - loss: 0.1996 - val_loss: 0.1992
Epoch 38/72
98s - loss: 0.1996 - val_loss: 0.2022
Epoch 39/72
97s - loss: 0.1996 - val_loss: 0.1995
Epoch 40/72
97s - loss: 0.1996 - val_loss: 0.1991
Epoch 41/72
97s - loss: 0.1996 - val_loss: 0.1995
Epoch 42/72
99s - loss: 0.1996 - val_loss: 0.1991
Epoch 43/72
98s - loss: 0.1996 - val_loss: 0.1994
Epoch 44/72
99s - loss: 0.1996 - val_loss: 0.1990
Epoch 45/72
98s - loss: 0.1996 - val_loss: 0.2003
Epoch 46/72
99s - loss: 0.1996 - val_loss: 0.1985
Epoch 47/72
98s - loss: 0.1996 - val_loss: 0.1990
Epoch 48/72
98s - loss: 0.1996 - val_loss: 0.1987
Epoch 49/72
99s - loss: 0.1996 - val_loss: 0.1984
Epoch 50/72
98s - loss: 0.1996 - val_loss: 0.1987
Epoch 51/72
98s - loss: 0.1996 - val_loss: 0.1994
Epoch 52/72
99s - loss: 0.1996 - val_loss: 0.1999
Epoch 53/72
100s - loss: 0.1996 - val_loss: 0.1997
Epoch 54/72
100s - loss: 0.1996 - val_loss: 0.1985
Epoch 55/72
103s - loss: 0.1996 - val_loss: 0.1992
Epoch 56/72
100s - loss: 0.1996 - val_loss: 0.1986
Epoch 57/72
99s - loss: 0.1996 - val_loss: 0.1989
Epoch 58/72
102s - loss: 0.1996 - val_loss: 0.1987
Epoch 59/72
100s - loss: 0.1996 - val_loss: 0.1985
Epoch 60/72
99s - loss: 0.1996 - val_loss: 0.1984
Epoch 61/72
98s - loss: 0.1996 - val_loss: 0.2011
Epoch 62/72
98s - loss: 0.1996 - val_loss: 0.1986
Epoch 63/72
96s - loss: 0.1996 - val_loss: 0.1994
Epoch 64/72
96s - loss: 0.1996 - val_loss: 0.1995
Epoch 65/72
95s - loss: 0.1996 - val_loss: 0.1989
Epoch 66/72
95s - loss: 0.1996 - val_loss: 0.1992
Epoch 67/72
95s - loss: 0.1996 - val_loss: 0.2048
Epoch 68/72
95s - loss: 0.1996 - val_loss: 0.1988
Epoch 69/72
95s - loss: 0.1996 - val_loss: 0.1992
Epoch 70/72
95s - loss: 0.1996 - val_loss: 0.2016
Epoch 71/72
95s - loss: 0.1996 - val_loss: 0.2000
Epoch 72/72
95s - loss: 0.1996 - val_loss: 0.1994
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-99-20fd7a5b3078> in <module>()
     16 model.fit(train_nn_time1, label_nn_time1, nb_epoch=72, shuffle = True, 
     17           batch_size=128,validation_split = 0.2,verbose = 2)
---> 18 time.sleep(0.1)

NameError: name 'time' is not defined

In [100]:
model.save('model_nn_10_after_l2reg.h5')

In [75]:
sumbission_nn_10 = model.predict(test_nn_time1, batch_size=128, verbose=0)

In [76]:
submission_nn_10_whole = pd.DataFrame()

In [77]:
submission_nn_10_whole['id'] = train_pivot_56789_to_10['id'].copy()
submission_nn_10_whole['predict'] = pd.DataFrame(sumbission_nn_10)

In [78]:
submission_nn_10_whole.head()


Out[78]:
id predict
0 1569352 1.748155
1 6667200 3.628597
2 1592616 2.999023
3 3909690 4.208099
4 3659672 3.577081

In [74]:
from keras.models import load_model

model = load_model('model_nn_10_after_l2reg.h5')

simple merge nn



In [27]:
%ls


1_xgboost.ipynb                   submission_10_new.csv
3_prediction.ipynb                submission_11.csv
4_keras_nn.ipynb                  submission_11_new.csv
5_random_forest.ipynb             submission_nn_xgb
6_random_forest.ipynb             submission_xgb_10.pickle
agencia_for_cliente_producto.csv  submission_xgb_2.csv
canal_for_cliente_producto.csv    submission_xgb.csv
model_nn_10.h5                    submission_xgb_nn_10.pickle
model_nn_10_whole.h5              submission_xgb_with_nn.csv
origin/                           train_pivot_3456_to_8.csv
pivot_test.pickle                 train_pivot_56789_to_10_new.pickle
pivot_train_with_nan.pickle       train_pivot_56789_to_10.pickle
pivot_train_with_zero.pickle      train_pivot_6789_to_11_new.pickle
preprocessed_products.csv         train_pivot_6789_to_11.pickle
ruta_for_cliente_producto.csv     train_pivot_xgb_time1.csv
submission_10.csv                 train_pivot_xgb_time2.csv

In [79]:
submission_xgb_11 = pd.read_csv('submission_11_new.csv',index_col = 0)
submission_xgb_11['predict'] = submission_xgb_11[['predict_' + str(i) for i in range(20)]].mean(axis=1)
submission_xgb_11.drop(['predict_' + str(i) for i in range(20)],axis =1,inplace = True)
submission_xgb_11.head()


Out[79]:
id predict
0 1547831 4.406201
1 6825659 3.053817
2 5853787 2.684612
3 2316053 1.259826
4 900676 2.301486

In [88]:
submission_xgb_11['predict'].describe()


Out[88]:
count    3.460866e+06
mean     1.575306e+00
std      7.042961e-01
min     -5.776052e-01
25%      1.066204e+00
50%      1.410357e+00
75%      1.900692e+00
max      7.862914e+00
Name: predict, dtype: float64

In [80]:
np.expm1(0.1)


Out[80]:
0.10517091807564763

In [90]:
submission_nn = pd.concat([submission_nn_10_whole,submission_xgb_11],axis = 0,copy = True)

In [91]:
mask = submission_nn[submission_nn['predict'] < 0].index
submission_nn.loc[mask, 'predict'] = 0.001
submission_nn['predict'].describe()


Out[91]:
count    6.999251e+06
mean     1.583278e+00
std      7.158656e-01
min      7.535815e-05
25%      1.066688e+00
50%      1.416326e+00
75%      1.915690e+00
max      8.414549e+00
Name: predict, dtype: float64

In [92]:
submission_nn['predict'] = submission_nn['predict'].apply(np.expm1)
submission_nn.rename(columns = {'predict':'Demanda_uni_equil'},inplace = True)

In [84]:
submission_nn.head()


Out[84]:
id Demanda_uni_equil
0 1569352 4.743992
1 6667200 36.659952
2 1592616 19.065922
3 3909690 66.228609
4 3659672 34.768986

In [93]:
submission_nn['Demanda_uni_equil'] = submission_nn['Demanda_uni_equil'].round(1)

In [94]:
submission_nn['Demanda_uni_equil'].describe()


Out[94]:
count    6.999251e+06
mean     6.320197e+00
std      1.891876e+01
min      0.000000e+00
25%      1.900000e+00
50%      3.100000e+00
75%      5.800000e+00
max      4.511200e+03
Name: Demanda_uni_equil, dtype: float64

In [95]:
submission_nn.to_csv('submission_nn_2.csv',index = False)

In [87]:
-4.000000e-01


Out[87]:
-0.4

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [66]:
mask = submission_nn_10[submission_nn_10['predict'] < 0].index
submission_nn_10.loc[mask, 'predict'] = 0 
submission_nn_10['predict'].describe()

In [67]:


In [68]:



Out[68]:
count    3.538385e+06
mean     1.565618e+00
std      6.791419e-01
min      0.000000e+00
25%      1.078582e+00
50%      1.403452e+00
75%      1.885645e+00
max      1.081926e+01
Name: predict, dtype: float64

In [84]:
submission_nn_10.sort(['id'],inplace = True)
submission_nn_10.reset_index(inplace = True,drop = True)


/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':

In [87]:
submission_nn_10.head()


Out[87]:
predict id
0 1.246212 2
1 1.864159 7
2 1.587973 8
3 1.098800 11
4 1.758513 13

try to do model stacking



In [21]:
%ls


1_xgboost.ipynb                   submission_10_new.csv
3_prediction.ipynb                submission_11.csv
4_keras_nn.ipynb                  submission_11_new.csv
agencia_for_cliente_producto.csv  submission_xgb_2.csv
canal_for_cliente_producto.csv    submission_xgb.csv
model_nn_10.h5                    submission_xgb_with_nn.csv
origin/                           train_pivot_3456_to_8.csv
pivot_test.pickle                 train_pivot_56789_to_10_new.pickle
pivot_train_with_nan.pickle       train_pivot_56789_to_10.pickle
pivot_train_with_zero.pickle      train_pivot_6789_to_11_new.pickle
preprocessed_products.csv         train_pivot_6789_to_11.pickle
ruta_for_cliente_producto.csv     train_pivot_xgb_time1.csv
submission_10.csv                 train_pivot_xgb_time2.csv

In [96]:


In [97]:
submission_xgb_10 = pd.read_csv('submission_10_new.csv',index_col = 0)
submission_xgb_10['predict'] = submission_xgb_10[['predict_' + str(i) for i in range(20)]].mean(axis=1)
submission_xgb_10.drop(['predict_' + str(i) for i in range(20)],axis =1,inplace = True)

In [98]:
submission_xgb_10.sort(['id'],inplace = True)
submission_xgb_10.reset_index(inplace = True,drop = True)


/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':

In [99]:
submission_xgb_10.head()


Out[99]:
id predict
0 2 1.179090
1 7 1.726247
2 8 1.560164
3 11 1.080983
4 13 1.802365

In [102]:
submission_xgb_10['nn_predict'] = submission_nn_10['predict']

In [130]:
submission_xgb_10['combine_predict'] =  submission_xgb_10['predict']*0.8 + submission_xgb_10['nn_predict'] *0.2

In [131]:
submission_xgb_10.head()


Out[131]:
id predict nn_predict combine_predict
0 2 1.179090 1.246212 1.192515
1 7 1.726247 1.864159 1.753829
2 8 1.560164 1.587973 1.565726
3 11 1.080983 1.098800 1.084546
4 13 1.802365 1.758513 1.793594

In [140]:
submission_xgb_10.drop(['combine_predict'],axis =1, inplace = True)
submission_xgb_10.rename(columns = {'predict': 'xgb_predict'},inplace = True)
submission_xgb_10.head()


Out[140]:
id xgb_predict nn_predict
0 2 1.179090 1.246212
1 7 1.726247 1.864159
2 8 1.560164 1.587973
3 11 1.080983 1.098800
4 13 1.802365 1.758513

In [142]:
submission_xgb_10.to_pickle('submission_xgb_nn_10.pickle')

In [128]:
submission_xgb_10.shape


Out[128]:
(3538385, 4)

In [132]:
submission_10 = submission_xgb_10[['id','combine_predict']].copy()
submission_10['combine_predict'] = submission_10['combine_predict'].apply(np.expm1)
submission_10.rename(columns = {'combine_predict':'Demanda_uni_equil'},inplace = True)

In [133]:
submission_10.head()


Out[133]:
id Demanda_uni_equil
0 2 2.295357
1 7 4.776682
2 8 3.786146
3 11 1.958096
4 13 5.011020

In [134]:
submission_xgb_11 = pd.read_csv('submission_11_new.csv',index_col = 0)

In [135]:
submission_xgb_11['Demanda_uni_equil'] = submission_xgb_11[['predict_' + str(i) for i in range(20)]].mean(axis=1)
submission_xgb_11.drop(['predict_' + str(i) for i in range(20)],axis =1,inplace = True)
submission_xgb_11['Demanda_uni_equil'] = submission_xgb_11['Demanda_uni_equil'].apply(np.expm1)
submission_xgb_11.head()


Out[135]:
id Demanda_uni_equil
0 1547831 80.957476
1 6825659 20.196086
2 5853787 13.652510
3 2316053 2.524808
4 900676 8.989011

In [136]:
submission_final  = pd.concat([submission_10,submission_xgb_11],axis = 0)
submission_final.head()


Out[136]:
id Demanda_uni_equil
0 2 2.295357
1 7 4.776682
2 8 3.786146
3 11 1.958096
4 13 5.011020

In [137]:
submission_final['Demanda_uni_equil'] = submission_final['Demanda_uni_equil'].round(1)

In [138]:
submission_final.to_csv('submission_nn_xgb',index = False)

In [ ]:


In [ ]:


In [ ]: