In [ ]:
import pandas as pd
import numpy as np
import os
import math
import graphlab
import graphlab as gl
import graphlab.aggregate as agg

In [ ]:
path = '~/GBRT/'
sf = gl.SFrame.read_csv(path + 'train.csv', verbose=False)
# test = gl.SFrame.read_csv(path + 'test.csv', verbose=False)
sf['Demada_log'] = sf['Demanda_uni_equil'].apply(lambda x: math.log(x+1))

In [ ]:
'''prior sum for week8'''
sf = sf[sf['Semana']>4]
sum10 = sf.groupby(key_columns=['Cliente_ID','Producto_ID'], operations={'prior_sum':agg.SUM('Demada_log')})
sum10['Semana']=10
sf = sf[sf['Semana']>5]
sum11 = sf.groupby(key_columns=['Cliente_ID','Producto_ID'], operations={'prior_sum':agg.SUM('Demada_log')})
sum11['Semana']=11
sum1011 = sum10.append(sum11)

In [ ]:
test = gl.SFrame.read_csv(path + 'test_lag5_w8.csv', verbose=False)
del test['prior_sum']
test = test.join(sum1011,on=['Semana','Cliente_ID','Producto_ID'],how='left')
test = test.fillna('prior_sum',0)
test.save(path+'test_lag5_w8.csv',format='csv')