In [3]:
import pandas as pd
import numpy as np
import os
import math
import graphlab
import graphlab as gl
import graphlab.aggregate as agg
In [4]:
'''钢炮'''
path = '/home/zongyi/bimbo_data/'
sf = gl.SFrame.read_csv(path + 'train.csv', verbose=False)
town = gl.SFrame.read_csv(path + 'town_state.csv', verbose=False)
In [2]:
'''MAC'''
path = '/Users/zonemercy/jupyter_notebook/bimbo_data/'
sf = gl.SFrame.read_csv(path + 'train.csv', verbose=False)
town = gl.SFrame.read_csv(path + 'town_state.csv', verbose=False)
In [5]:
sf['return_rate'] = sf['Dev_uni_proxima'] / ( sf['Dev_uni_proxima'] + sf['Demanda_uni_equil'] )
re_lag = sf.groupby(key_columns=['Semana','Cliente_ID','Producto_ID'], operations={'re_lag':agg.MEAN('return_rate')})
# re_lag['Semana'] = re_lag['Semana'] + 1
In [6]:
lag = re_lag.copy()
re_lag.remove_column('re_lag')
lag['Semana'] = lag['Semana'].apply(lambda x: x+1)
re_lag = re_lag.join(lag,on=['Cliente_ID','Producto_ID','Semana'],how='outer')
lag['Semana'] = lag['Semana'].apply(lambda x: x+1)
re_lag = re_lag.join(lag,on=['Cliente_ID','Producto_ID','Semana'],how='outer')
lag['Semana'] = lag['Semana'].apply(lambda x: x+1)
re_lag = re_lag.join(lag,on=['Cliente_ID','Producto_ID','Semana'],how='outer')
lag['Semana'] = lag['Semana'].apply(lambda x: x+1)
re_lag = re_lag.join(lag,on=['Cliente_ID','Producto_ID','Semana'],how='outer')
lag['Semana'] = lag['Semana'].apply(lambda x: x+1)
re_lag = re_lag.join(lag,on=['Cliente_ID','Producto_ID','Semana'],how='outer')
# re_lag.rename({'re_lag':'re_lag1','re_lag.1':'re_lag2','re_lag.2':'re_lag3','re_lag.3':'re_lag4','re_lag.4':'re_lag5'})
In [13]:
re_lag.rename({'re_lag':'re_lag1','re_lag.1':'re_lag2','re_lag.2':'re_lag3','re_lag.3':'re_lag4','re_lag.4':'re_lag5'})
Out[13]:
In [14]:
re_train=re_lag[(re_lag['Semana']>5)&(re_lag['Semana']<10)]
In [15]:
re_train.save(path+'re_lag_train.csv',format='csv')
In [16]:
re_test=re_lag[(re_lag['Semana']>9)&(re_lag['Semana']<12)]
In [17]:
re_test.save(path+'re_lag_test.csv',format='csv')