In [26]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
In [27]:
train['Demanda_uni_equil_log'] = np.log1p(train['Demanda_uni_equil'])
In [ ]:
In [28]:
products = pd.read_csv('products.csv')
clients = pd.read_csv('clients.csv')
del clients['Unnamed: 0']
del products['Unnamed: 0']
del products['NombreProducto']
del products['short_name']
In [29]:
products.head()
Out[29]:
In [30]:
clients.head()
Out[30]:
In [31]:
towns = pd.read_csv('town_state.csv')
train = pd.merge(train, towns, how='left', on=['Agencia_ID'])
test = pd.merge(test, towns, how='left', on=['Agencia_ID'])
client_count = train.groupby(['Town'])['Cliente_ID'].count().reset_index()
client_count.rename(columns={'Cliente_ID': 'Cliente_ID_town_count'}, inplace=True)
train = pd.merge(train, client_count, how='left', on=['Town'])
test = pd.merge(test, client_count, how='left', on=['Town'])
del train['Town']
del train['State']
del test['Town']
del test['State']
del towns
In [32]:
train.head()
Out[32]:
In [33]:
mean_prod_client = train.groupby(['Semana', 'Producto_ID', 'Cliente_ID'])['Demanda_uni_equil_log'].mean().reset_index()
#mean_prod_client = train.groupby(['Semana', 'Producto_ID', 'Cliente_ID'])['Demanda_uni_equil_log'].median().reset_index()
gb_week = mean_prod_client.groupby(['Semana'])
gb_week_test = test.groupby(['Semana'])
In [34]:
week3 = gb_week.get_group(3)
week4 = gb_week.get_group(4)
week5 = gb_week.get_group(5)
week6 = gb_week.get_group(6)
week7 = gb_week.get_group(7)
week8 = gb_week.get_group(8)
week9 = gb_week.get_group(9)
In [35]:
week10 = gb_week_test.get_group(10)
week11 = gb_week_test.get_group(11)
In [36]:
week10 = pd.merge(week10, week9[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week10.rename(columns={'Demanda_uni_equil_log': 'lag1'}, inplace=True)
week10 = pd.merge(week10, week8[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week10.rename(columns={'Demanda_uni_equil_log': 'lag2'}, inplace=True)
week10 = pd.merge(week10, week7[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week10.rename(columns={'Demanda_uni_equil_log': 'lag3'}, inplace=True)
In [37]:
week11 = pd.merge(week11, week10[['Producto_ID', 'Cliente_ID', 'lag3']], how='left', on=['Producto_ID', 'Cliente_ID'])
week11.rename(columns={'lag3': 'lag1'}, inplace=True)
week11 = pd.merge(week11, week9[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week11.rename(columns={'Demanda_uni_equil_log': 'lag2'}, inplace=True)
week11 = pd.merge(week11, week8[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week11.rename(columns={'Demanda_uni_equil_log': 'lag3'}, inplace=True)
In [38]:
week9.rename(columns={'Demanda_uni_equil_log': 'Demanda_uni_equil_log0'}, inplace=True)
week9 = pd.merge(week9, week8[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week9.rename(columns={'Demanda_uni_equil_log': 'lag1'}, inplace=True)
week9 = pd.merge(week9, week7[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week9.rename(columns={'Demanda_uni_equil_log': 'lag2'}, inplace=True)
week9 = pd.merge(week9, week6[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week9.rename(columns={'Demanda_uni_equil_log': 'lag3'}, inplace=True)
In [39]:
week8.rename(columns={'Demanda_uni_equil_log': 'Demanda_uni_equil_log0'}, inplace=True)
week8 = pd.merge(week8, week7[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week8.rename(columns={'Demanda_uni_equil_log': 'lag1'}, inplace=True)
week8 = pd.merge(week8, week6[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week8.rename(columns={'Demanda_uni_equil_log': 'lag2'}, inplace=True)
week8 = pd.merge(week8, week5[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week8.rename(columns={'Demanda_uni_equil_log': 'lag3'}, inplace=True)
In [40]:
week7.rename(columns={'Demanda_uni_equil_log': 'Demanda_uni_equil_log0'}, inplace=True)
week7 = pd.merge(week7, week6[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week7.rename(columns={'Demanda_uni_equil_log': 'lag1'}, inplace=True)
week7 = pd.merge(week7, week5[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week7.rename(columns={'Demanda_uni_equil_log': 'lag2'}, inplace=True)
week7 = pd.merge(week7, week4[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week7.rename(columns={'Demanda_uni_equil_log': 'lag3'}, inplace=True)
In [41]:
week6.rename(columns={'Demanda_uni_equil_log': 'Demanda_uni_equil_log0'}, inplace=True)
week6 = pd.merge(week6, week5[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week6.rename(columns={'Demanda_uni_equil_log': 'lag1'}, inplace=True)
week6 = pd.merge(week6, week4[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week6.rename(columns={'Demanda_uni_equil_log': 'lag2'}, inplace=True)
week6 = pd.merge(week6, week3[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week6.rename(columns={'Demanda_uni_equil_log': 'lag3'}, inplace=True)
In [42]:
train_lag = week6.append(week7, ignore_index=True).append(week8, ignore_index=True).append(week9, ignore_index=True)
In [43]:
test_lag = week10.append(week11, ignore_index=True)
In [44]:
del train['Demanda_uni_equil_log']
del train['Demanda_uni_equil']
del train['Venta_hoy']
del train['Venta_uni_hoy']
del train['Dev_proxima']
del train['Dev_uni_proxima']
In [45]:
train_lag = pd.merge(train_lag, train, how='left', on=['Semana','Producto_ID', 'Cliente_ID'])
In [46]:
train_lag = pd.merge(train_lag, products, how='left', on=['Producto_ID'])
train_lag = pd.merge(train_lag, clients, how='left', on=['Cliente_ID'])
In [47]:
test_lag = pd.merge(test_lag, products, how='left', on=['Producto_ID'])
test_lag = pd.merge(test_lag, clients, how='left', on=['Cliente_ID'])
In [48]:
del train_lag['brand']
train_lag.fillna(0, inplace=True)
del test_lag['brand']
test_lag.fillna(0, inplace=True)
In [49]:
train_lag.to_csv('new_train_mean_cl.csv')
In [50]:
test_lag.to_csv('new_test_mean_cl.csv')
In [51]:
test_lag
Out[51]:
In [ ]:
In [ ]: