notebook.community

Edit and run



In [2]:

    
import pandas as pd
import numpy as np
import os
import math
import graphlab
import graphlab as gl
import graphlab.aggregate as agg
from graphlab import SArray



In [2]:

    
'''钢炮'''
path = '/home/zongyi/bimbo_data/'
train = gl.SFrame.read_csv(path + 'train_lag5_w8.csv', verbose=False)









    



/usr/local/lib/python2.7/dist-packages/requests/packages/urllib3/util/ssl_.py:315: SNIMissingWarning: An HTTPS request has been made, but the SNI (Subject Name Indication) extension to TLS is not available on this platform. This may cause the server to present an incorrect TLS certificate, which can cause validation failures. For more information, see https://urllib3.readthedocs.org/en/latest/security.html#snimissingwarning.
  SNIMissingWarning
/usr/local/lib/python2.7/dist-packages/requests/packages/urllib3/util/ssl_.py:120: InsecurePlatformWarning: A true SSLContext object is not available. This prevents urllib3 from configuring SSL appropriately and may cause certain SSL connections to fail. For more information, see https://urllib3.readthedocs.org/en/latest/security.html#insecureplatformwarning.
  InsecurePlatformWarning






    



This non-commercial license of GraphLab Create for academic use is assigned to zong-yi.liu@irit.fr and will expire on July 13, 2017.






    



[INFO] graphlab.cython.cy_server: GraphLab Create v2.0.1 started. Logging: /tmp/graphlab_server_1472154436.log



In [3]:

    
town = gl.SFrame.read_csv(path + 'towns.csv', verbose=False)
train = train.join(town, on=['Agencia_ID','Producto_ID'], how='left')
train = train.fillna('t_c',1)
train = train.fillna('tcc',0)
train = train.fillna('tp_sum',0)
train = train.fillna('n_t',0)
# del train['Town']



In [4]:

    
del train['id']
del train['Venta_uni_hoy']
del train['Venta_hoy']
del train['Dev_uni_proxima']
del train['Dev_proxima']
del train['Demanda_uni_equil']



In [5]:

    
relag_train = gl.SFrame.read_csv(path + 're_lag_train.csv', verbose=False)
train = train.join(relag_train, on=['Cliente_ID','Producto_ID','Semana'], how='left')
train = train.fillna('re_lag1',0)
train = train.fillna('re_lag2',0)
train = train.fillna('re_lag3',0)
train = train.fillna('re_lag4',0)
train = train.fillna('re_lag5',0)
train['re_sum'] = (train['re_lag1'] + train['re_lag2'] + train['re_lag3'] + train['re_lag4'] + train['re_lag5'])/5
del relag_train



In [6]:

    
pd = gl.SFrame.read_csv(path + 'products.csv', verbose=False)
train = train.join(pd, on=['Producto_ID'], how='left')
train = train.fillna('prom',0)
train = train.fillna('weight',0)
train = train.fillna('pieces',1)
train = train.fillna('w_per_piece',0)
train = train.fillna('healthy',0)
train = train.fillna('drink',0)
del pd



In [7]:

    
client = gl.SFrame.read_csv(path + 'clients.csv', verbose=False)
train = train.join(client, on=['Cliente_ID'], how='left')
del client



In [8]:

    
del train['Semana']
del train['Canal_ID']
# del train['tcc']
del train['re_lag1']
del train['re_lag2']
del train['re_lag3']
del train['re_lag4']
del train['re_lag5']
del train['prom']
del train['healthy']
del train['drink']
del train['brand']
# del train['week_times']



In [9]:

    
print train.column_names()
print len(train.column_names())









    



['Agencia_ID', 'Ruta_SAK', 'Cliente_ID', 'Producto_ID', 'Demada_log', 'lag1', 'lag2', 'lag3', 'lag4', 'lag5', 'week_times', 'lag_sum', 'prior_sum', 'n_a', 'n_r', 'n_c', 'n_p', 'Town', 't_c', 'tcc', 'tp_sum', 'n_t', 're_sum', 'p_clt', 'weight', 'pieces', 'w_per_piece', 'price', 'c_clt']
29



In [10]:

    
train.save(path+'train_fs_w8.csv',format='csv')



In [ ]:



In [32]:

    
from IPython.core.pylabtools import figsize
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style('darkgrid', {'grid.color': '.8','grid.linestyle': u'--'}) 
%matplotlib inline

figsize(12, 6)
plt.bar(w['id'], w['count'], tick_label=w['name'])

plt.xticks(rotation=45)









    Out[32]:





(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35]), <a list of 36 Text xticklabel objects>)



In [ ]:



In [ ]:



In [17]:

    
path = '/home/zongyi/bimbo_data/'

test = gl.SFrame.read_csv(path + 'test_lag5_w9.csv', verbose=False)



In [18]:

    
town = gl.SFrame.read_csv(path + 'towns.csv', verbose=False)
test = test.join(town, on=['Agencia_ID','Producto_ID'], how='left')
test = test.fillna('t_c',1)
test = test.fillna('tcc',0)
test = test.fillna('tp_sum',0)
test = test.fillna('n_t',0)
# del test['Town']



In [19]:

    
relag_test = gl.SFrame.read_csv(path + 're_lag_test.csv', verbose=False)
test = test.join(relag_test, on=['Cliente_ID','Producto_ID','Semana'], how='left')
test = test.fillna('re_lag1',0)
test = test.fillna('re_lag2',0)
test = test.fillna('re_lag3',0)
test = test.fillna('re_lag4',0)
test = test.fillna('re_lag5',0)

def f(x):
    if x['Semana']==10:
        a = (x['re_lag1'] + x['re_lag2'] + x['re_lag3'] + x['re_lag4'] + x['re_lag5'])/5
    if x['Semana']==11:
        a = (x['re_lag2'] + x['re_lag3'] + x['re_lag4'] + x['re_lag5'])/4   
    return a
test['re_sum'] = test[['Semana','re_lag1', 're_lag2', 're_lag3','re_lag4','re_lag5']].apply(f)

# test['re_sum'] = (test['re_lag1'] + test['re_lag2'] + test['re_lag3'] + test['re_lag4'] + test['re_lag5'])/5

del test['re_lag1']
del test['re_lag2']
del test['re_lag3']
del test['re_lag4']
del test['re_lag5']



In [20]:

    
pd = gl.SFrame.read_csv(path + 'products.csv', verbose=False)
test = test.join(pd, on=['Producto_ID'], how='left')
test = test.fillna('prom',0)
test = test.fillna('weight',0)
test = test.fillna('pieces',1)
test = test.fillna('w_per_piece',0)
test = test.fillna('healthy',0)
test = test.fillna('drink',0)
del pd



In [21]:

    
client = gl.SFrame.read_csv(path + 'clients.csv', verbose=False)
test = test.join(client, on=['Cliente_ID'], how='left')
del client



In [28]:

    
# del test['Semana']
del test['Canal_ID']
# del test['tcc']
# del test['re_lag1']
# del test['re_lag2']
# del test['re_lag3']
# del test['re_lag4']
# del test['re_lag5']
del test['prom']
del test['healthy']
del test['drink']
del test['brand']



In [22]:

    
print test.column_names()
print len(test.column_names())









    



['id', 'Semana', 'Agencia_ID', 'Canal_ID', 'Ruta_SAK', 'Cliente_ID', 'Producto_ID', 'lag1', 'lag2', 'lag3', 'lag4', 'lag5', 'lag_sum', 'prior_sum', 'week_times', 'n_a', 'n_r', 'n_c', 'n_p', 'Town', 't_c', 'tcc', 'tp_sum', 'n_t', 're_sum', 'p_clt', 'prom', 'brand', 'weight', 'pieces', 'w_per_piece', 'healthy', 'drink', 'price', 'c_clt']
35



In [23]:

    
test.save(path+'test_fs_w9.csv',format='csv')



In [ ]:



In [ ]: