In [26]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [27]:
train['Demanda_uni_equil_log'] = np.log1p(train['Demanda_uni_equil'])

In [ ]:


In [28]:
products = pd.read_csv('products.csv')
clients = pd.read_csv('clients.csv')
del clients['Unnamed: 0']
del products['Unnamed: 0']
del products['NombreProducto']
del products['short_name']

In [29]:
products.head()


Out[29]:
Producto_ID price brand weight pieces cluster_nombre drink w_per_piece
0 9 82.500000 NES 750.0 1.0 93 0 750.00
1 41 17.992632 BIM 480.0 6.0 62 0 80.00
2 53 14.300088 LON 170.0 1.0 93 0 170.00
3 72 3.697305 TR 45.0 4.0 44 0 11.25
4 73 21.360519 BIM 540.0 1.0 75 0 540.00

In [30]:
clients.head()


Out[30]:
Cliente_ID OXXO ARTELI ALSUPER BODEGA CALIMAX XICANS ABARROTES CARNICERIA FRUTERIA ... ELEVEN HOTEL HOSPITAL CAFE FARMACIA CREME SUPER COMOD MODELOR UNKN
0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
1 1 1 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 2 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
3 3 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
4 4 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

5 rows × 21 columns


In [31]:
towns = pd.read_csv('town_state.csv')
train = pd.merge(train, towns, how='left', on=['Agencia_ID'])
test = pd.merge(test, towns, how='left', on=['Agencia_ID'])
client_count = train.groupby(['Town'])['Cliente_ID'].count().reset_index()
client_count.rename(columns={'Cliente_ID': 'Cliente_ID_town_count'}, inplace=True)
train = pd.merge(train, client_count, how='left', on=['Town'])
test = pd.merge(test, client_count, how='left', on=['Town'])
del train['Town']
del train['State']
del test['Town']
del test['State']
del towns

In [32]:
train.head()


Out[32]:
Semana Agencia_ID Canal_ID Ruta_SAK Cliente_ID Producto_ID Venta_uni_hoy Venta_hoy Dev_uni_proxima Dev_proxima Demanda_uni_equil Demanda_uni_equil_log Cliente_ID_town_count
0 3 1110 7 3301 15766 1212 3 25.14 0 0.0 3 1.386294 327534
1 3 1110 7 3301 15766 1216 4 33.52 0 0.0 4 1.609438 327534
2 3 1110 7 3301 15766 1238 4 39.32 0 0.0 4 1.609438 327534
3 3 1110 7 3301 15766 1240 4 33.52 0 0.0 4 1.609438 327534
4 3 1110 7 3301 15766 1242 3 22.92 0 0.0 3 1.386294 327534

In [33]:
mean_prod_client = train.groupby(['Semana', 'Producto_ID', 'Cliente_ID'])['Demanda_uni_equil_log'].mean().reset_index()
#mean_prod_client = train.groupby(['Semana', 'Producto_ID', 'Cliente_ID'])['Demanda_uni_equil_log'].median().reset_index()
gb_week = mean_prod_client.groupby(['Semana'])
gb_week_test = test.groupby(['Semana'])

In [34]:
week3 = gb_week.get_group(3)
week4 = gb_week.get_group(4)
week5 = gb_week.get_group(5)
week6 = gb_week.get_group(6)
week7 = gb_week.get_group(7)
week8 = gb_week.get_group(8)
week9 = gb_week.get_group(9)

In [35]:
week10 = gb_week_test.get_group(10)
week11 = gb_week_test.get_group(11)

In [36]:
week10 = pd.merge(week10, week9[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week10.rename(columns={'Demanda_uni_equil_log': 'lag1'}, inplace=True)
week10 = pd.merge(week10, week8[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week10.rename(columns={'Demanda_uni_equil_log': 'lag2'}, inplace=True)
week10 = pd.merge(week10, week7[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week10.rename(columns={'Demanda_uni_equil_log': 'lag3'}, inplace=True)

In [37]:
week11 = pd.merge(week11, week10[['Producto_ID', 'Cliente_ID', 'lag3']], how='left', on=['Producto_ID', 'Cliente_ID'])
week11.rename(columns={'lag3': 'lag1'}, inplace=True)
week11 = pd.merge(week11, week9[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week11.rename(columns={'Demanda_uni_equil_log': 'lag2'}, inplace=True)
week11 = pd.merge(week11, week8[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week11.rename(columns={'Demanda_uni_equil_log': 'lag3'}, inplace=True)

In [38]:
week9.rename(columns={'Demanda_uni_equil_log': 'Demanda_uni_equil_log0'}, inplace=True)
week9 = pd.merge(week9, week8[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week9.rename(columns={'Demanda_uni_equil_log': 'lag1'}, inplace=True)
week9 = pd.merge(week9, week7[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week9.rename(columns={'Demanda_uni_equil_log': 'lag2'}, inplace=True)
week9 = pd.merge(week9, week6[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week9.rename(columns={'Demanda_uni_equil_log': 'lag3'}, inplace=True)

In [39]:
week8.rename(columns={'Demanda_uni_equil_log': 'Demanda_uni_equil_log0'}, inplace=True)
week8 = pd.merge(week8, week7[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week8.rename(columns={'Demanda_uni_equil_log': 'lag1'}, inplace=True)
week8 = pd.merge(week8, week6[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week8.rename(columns={'Demanda_uni_equil_log': 'lag2'}, inplace=True)
week8 = pd.merge(week8, week5[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week8.rename(columns={'Demanda_uni_equil_log': 'lag3'}, inplace=True)

In [40]:
week7.rename(columns={'Demanda_uni_equil_log': 'Demanda_uni_equil_log0'}, inplace=True)
week7 = pd.merge(week7, week6[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week7.rename(columns={'Demanda_uni_equil_log': 'lag1'}, inplace=True)
week7 = pd.merge(week7, week5[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week7.rename(columns={'Demanda_uni_equil_log': 'lag2'}, inplace=True)
week7 = pd.merge(week7, week4[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week7.rename(columns={'Demanda_uni_equil_log': 'lag3'}, inplace=True)

In [41]:
week6.rename(columns={'Demanda_uni_equil_log': 'Demanda_uni_equil_log0'}, inplace=True)
week6 = pd.merge(week6, week5[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week6.rename(columns={'Demanda_uni_equil_log': 'lag1'}, inplace=True)
week6 = pd.merge(week6, week4[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week6.rename(columns={'Demanda_uni_equil_log': 'lag2'}, inplace=True)
week6 = pd.merge(week6, week3[['Producto_ID', 'Cliente_ID', 'Demanda_uni_equil_log']], how='left', on=['Producto_ID', 'Cliente_ID'])
week6.rename(columns={'Demanda_uni_equil_log': 'lag3'}, inplace=True)

In [42]:
train_lag = week6.append(week7, ignore_index=True).append(week8, ignore_index=True).append(week9, ignore_index=True)

In [43]:
test_lag = week10.append(week11, ignore_index=True)

In [44]:
del train['Demanda_uni_equil_log']
del train['Demanda_uni_equil']
del train['Venta_hoy']
del train['Venta_uni_hoy']
del train['Dev_proxima']
del train['Dev_uni_proxima']

In [45]:
train_lag = pd.merge(train_lag, train, how='left', on=['Semana','Producto_ID', 'Cliente_ID'])

In [46]:
train_lag = pd.merge(train_lag, products, how='left', on=['Producto_ID'])
train_lag = pd.merge(train_lag, clients, how='left', on=['Cliente_ID'])

In [47]:
test_lag = pd.merge(test_lag, products, how='left', on=['Producto_ID'])
test_lag = pd.merge(test_lag, clients, how='left', on=['Cliente_ID'])

In [48]:
del train_lag['brand']
train_lag.fillna(0, inplace=True)
del test_lag['brand']
test_lag.fillna(0, inplace=True)

In [49]:
train_lag.to_csv('new_train_mean_cl.csv')

In [50]:
test_lag.to_csv('new_test_mean_cl.csv')

In [51]:
test_lag


Out[51]:
id Semana Agencia_ID Canal_ID Ruta_SAK Cliente_ID Producto_ID Cliente_ID_town_count lag1 lag2 ... ELEVEN HOTEL HOSPITAL CAFE FARMACIA CREME SUPER COMOD MODELOR UNKN
0 2 10 2045 1 2831 4549769 32940 249200 1.098612 1.098612 ... 0 0 0 0 0 0 0 0 0 1
1 7 10 1612 1 2837 4414012 35305 231566 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
2 8 10 1349 1 1223 397854 1240 220034 1.386294 1.609438 ... 0 0 0 0 0 0 0 0 0 1
3 11 10 1336 1 1069 4387996 2233 408793 1.098612 1.098612 ... 0 0 0 0 0 0 0 0 0 1
4 13 10 1217 1 1143 4446449 1240 265360 1.609438 1.609438 ... 0 0 0 0 0 0 0 0 0 1
5 14 10 1312 1 2056 69110 43274 610362 1.098612 1.609438 ... 0 0 0 0 0 0 0 0 0 1
6 18 10 1629 1 1612 2325923 4270 596040 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
7 22 10 1477 4 4714 4670562 35525 544060 2.197225 2.197225 ... 0 0 0 0 0 0 0 0 0 1
8 26 10 3214 1 1627 4721398 36920 920007 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
9 31 10 1614 1 4501 1042331 1278 569527 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 0
10 32 10 1112 1 1013 331630 3631 383484 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
11 37 10 1259 6 3229 4682651 42434 778114 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
12 38 10 2037 1 2805 2194171 31507 209411 1.609438 0.000000 ... 0 0 0 0 0 0 0 0 0 1
13 41 10 1636 1 1209 2427417 1125 324093 2.397895 0.000000 ... 0 0 0 0 0 0 0 0 0 0
14 43 10 1387 1 2014 409049 43274 337827 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
15 44 10 1657 4 6643 1965412 30314 596040 2.564949 0.000000 ... 0 0 0 0 0 0 0 0 0 1
16 48 10 4066 2 1520 1397467 34794 321529 1.945910 1.945910 ... 0 0 0 0 0 0 0 0 0 1
17 53 10 1236 1 1123 911568 1220 444222 0.693147 0.000000 ... 0 0 0 0 0 0 0 0 0 1
18 54 10 1120 1 2119 669062 35305 509488 2.197225 2.079442 ... 0 0 0 0 0 0 0 0 0 1
19 56 10 1626 1 2106 2028381 30532 172339 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
20 58 10 1117 1 1082 1544467 2233 778433 2.484907 1.609438 ... 0 0 0 0 0 0 0 0 0 1
21 59 10 1359 11 3957 2068213 6469 458710 1.386294 1.609438 ... 0 0 0 0 0 0 0 0 0 1
22 60 10 1224 1 1616 80637 325 753265 0.693147 1.098612 ... 0 0 0 0 0 0 0 0 0 1
23 62 10 1462 1 2105 1263527 30549 224026 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
24 63 10 1311 1 2805 337708 31471 515693 1.791759 0.000000 ... 0 0 0 0 0 0 0 0 0 1
25 65 10 1111 1 1201 50107 1232 475052 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
26 67 10 1534 1 2011 4516053 43064 248328 0.693147 0.000000 ... 0 0 0 0 0 0 0 0 0 1
27 72 10 25759 1 1121 47663 43203 214801 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
28 75 10 1331 1 1110 2203774 1216 244697 0.693147 0.693147 ... 0 0 0 0 0 0 0 0 0 1
29 78 10 2237 1 1241 1477534 1250 553984 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
7401335 6999201 11 1213 1 1202 1370327 1242 511746 0.000000 2.079442 ... 0 0 0 0 0 0 0 0 0 1
7401336 6999202 11 1626 1 2843 1732798 43307 172339 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
7401337 6999203 11 1556 1 5034 2327849 40886 278298 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
7401338 6999204 11 1315 1 1155 873324 1242 580696 2.079442 1.945910 ... 0 0 0 0 0 0 0 0 0 1
7401339 6999206 11 2085 2 1508 127018 34786 299688 0.000000 4.290459 ... 0 0 0 0 0 0 1 0 0 0
7401340 6999208 11 2019 1 1201 2226663 35651 316070 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
7401341 6999209 11 1116 1 1606 797334 4245 778114 0.000000 1.386294 ... 0 0 0 0 0 0 0 0 0 1
7401342 6999210 11 2230 1 2855 2016664 43307 350967 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
7401343 6999213 11 1152 4 1852 2222650 37086 327534 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 0
7401344 6999214 11 2218 1 1106 464390 1212 227590 0.000000 1.609438 ... 0 0 0 0 0 0 0 0 0 1
7401345 6999215 11 2036 1 2839 1961384 37361 317867 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
7401346 6999218 11 1628 1 2872 84886 37058 341523 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
7401347 6999219 11 1140 1 1240 2428803 46772 420232 0.000000 1.609438 ... 0 0 0 0 0 0 0 0 0 1
7401348 6999221 11 1613 1 1024 2392365 1109 314742 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 0
7401349 6999224 11 2032 1 2056 241145 43316 455143 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
7401350 6999226 11 1245 1 2804 1419434 35305 524874 0.000000 1.098612 ... 0 0 0 0 0 0 0 0 0 1
7401351 6999227 11 3215 1 4459 58834 43069 778433 2.079442 2.302585 ... 0 0 0 0 0 0 0 0 0 1
7401352 6999230 11 2031 1 1051 4477847 1146 320754 0.000000 4.369448 ... 0 0 0 0 0 0 0 0 0 1
7401353 6999231 11 1636 1 1101 1740273 1230 324093 0.000000 0.000000 ... 0 0 0 0 0 0 1 0 0 0
7401354 6999234 11 4049 1 1141 1067683 37403 429162 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
7401355 6999235 11 2032 1 1155 241197 35651 455143 0.000000 2.079442 ... 0 0 0 0 0 0 0 0 0 0
7401356 6999236 11 24049 1 4502 4416986 2233 106285 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
7401357 6999239 11 1427 1 2801 1373090 43307 252685 2.197225 1.098612 ... 0 0 0 0 0 0 0 0 0 0
7401358 6999240 11 1620 1 1242 1611046 1230 227833 0.000000 0.693147 ... 0 0 0 0 0 0 0 0 0 1
7401359 6999243 11 1631 1 2806 1526987 43285 447269 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 0
7401360 6999244 11 1420 1 1119 1621931 43206 380796 3.806662 4.043051 ... 0 0 0 0 0 0 0 0 0 1
7401361 6999246 11 2057 1 1153 4379638 1232 746338 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1
7401362 6999248 11 1622 1 2869 192749 30532 372456 0.000000 1.386294 ... 0 0 0 0 0 0 0 0 0 0
7401363 6999249 11 1636 1 4401 286071 35107 324093 0.000000 1.386294 ... 0 0 0 0 0 0 0 0 0 1
7401364 6999250 11 1625 1 1259 978760 1232 260163 0.000000 0.000000 ... 0 0 0 0 0 0 0 0 0 1

7401365 rows × 37 columns


In [ ]:


In [ ]: