In [2]:
import pandas as pd
import numpy as np
import os
import math
import graphlab
import graphlab as gl
import graphlab.aggregate as agg
from graphlab import SArray

In [3]:
'''钢炮'''
path = '/home/zongyi/bimbo_data/'

In [4]:
train = gl.SFrame.read_csv(path + 'train_lag5.csv', verbose=False)


This non-commercial license of GraphLab Create for academic use is assigned to zong-yi.liu@irit.fr and will expire on July 13, 2017.
[INFO] graphlab.cython.cy_server: GraphLab Create v2.0.1 started. Logging: /tmp/graphlab_server_1471507912.log

In [5]:
town = gl.SFrame.read_csv(path + 'towns.csv', verbose=False)
train = train.join(town, on=['Agencia_ID','Producto_ID'], how='left')
train = train.fillna('t_c',1)
train = train.fillna('tcc',0)
train = train.fillna('tp_sum',0)
del train['Town']

In [6]:
del train['id']
del train['Venta_uni_hoy']
del train['Venta_hoy']
del train['Dev_uni_proxima']
del train['Dev_proxima']
del train['Demanda_uni_equil']

In [7]:
# relag_train = gl.SFrame.read_csv(path + 're_lag_train.csv', verbose=False)
# train = train.join(relag_train, on=['Cliente_ID','Producto_ID','Semana'], how='left')
# train = train.fillna('re_lag1',0)
# train = train.fillna('re_lag2',0)
# train = train.fillna('re_lag3',0)
# train = train.fillna('re_lag4',0)
# train = train.fillna('re_lag5',0)
# del relag_train

In [8]:
# pd = gl.SFrame.read_csv(path + 'products.csv', verbose=False)
# train = train.join(pd, on=['Producto_ID'], how='left')
# train = train.fillna('prom',0)
# train = train.fillna('weight',0)
# train = train.fillna('pieces',1)
# train = train.fillna('w_per_piece',0)
# train = train.fillna('healthy',0)
# train = train.fillna('drink',0)
# del train['brand']
# del train['NombreProducto']
# del pd

In [9]:
# client = gl.SFrame.read_csv(path + 'clients.csv', verbose=False)
# train = train.join(client, on=['Cliente_ID'], how='left')
# del client

In [10]:
# cluster = gl.SFrame.read_csv(path + 'prod_cluster.csv', verbose=False)
# cluster = cluster[['Producto_ID','cluster']]
# train = train.join(cluster, on=['Producto_ID'], how='left')

In [44]:
train


Out[44]:
Semana Agencia_ID Canal_ID Ruta_SAK Cliente_ID Producto_ID Demada_log lag1 lag2 lag3
9 2234 1 1234 2308240 1240 2.19722 1.79176 0.693147 1.94591
9 1121 1 1415 1758210 35651 2.3979 1.94591 2.56495 3.4012
9 2032 1 1267 3554707 1240 1.38629 1.60944 1.60944 1.38629
9 2220 1 1227 2492643 1109 1.09861 1.09861 0.693147 1.09861
9 1612 1 1139 1965168 972 1.60944 1.09861 0.693147 1.38629
9 2230 1 1002 2482931 3631 2.48491 2.3979 2.56495 2.56495
9 1121 1 1049 2449109 1125 2.19722 3.29584 3.04452 1.94591
9 1121 1 1418 169387 1240 1.38629 1.60944 2.3979 1.38629
9 4041 1 1256 205279 1240 1.09861 1.09861 1.09861 1.09861
9 3215 1 4435 1280994 43285 1.79176 2.3979 1.79176 2.70805
lag4 lag5 week_times lag_sum prior_sum n_a n_r n_c n_p t_c tcc tp_sum
0.0 0.0 1 4.43082 4.43082 21065.6 22685.1 21.2 306665.0 2 1474 7032.04
2.48491 2.56495 1 12.9619 12.9619 77207.6 11443.1 29.0 192223.0 2 5404 27611.6
1.79176 1.94591 1 8.34284 10.1346 58890.3 18231.6 14.0 306665.0 5 4551 28371.8
1.09861 0.0 1 3.98898 3.98898 24685.3 32862.7 9.57143 193724.0 4 1894 7265.43
1.09861 1.79176 1 6.06842 7.16704 29846.9 11812.4 25.7143 86259.0 4 2315 3132.52
2.30259 1.09861 1 10.929 13.3269 47042.3 37611.6 51.2857 113887.0 4 3509 1317.56
3.17805 2.77259 1 14.2369 17.495 77207.6 4978.14 42.0 150267.0 2 5404 19233.5
0.0 0.0 1 5.39363 5.39363 77207.6 12292.0 27.2857 306665.0 2 5404 23690.2
1.38629 1.09861 1 5.78073 7.16704 42256.7 29639.1 19.4286 306665.0 4 3356 19100.2
2.3979 2.83321 1 12.1288 12.1288 28063.9 2389.0 44.4286 172779.0 5 7784 36823.3
[20815581 rows x 22 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.

In [12]:
# Make a train-test split
train_data, test_data = train.random_split(0.999)

# Create a model.
model = gl.boosted_trees_regression.create(train_data, target='Demada_log',
                                           step_size=0.1,
                                           max_iterations=500,
                                           max_depth = 10,
                                          metric='rmse',
                                          random_seed=395,
                                          column_subsample=0.7,
                                          row_subsample=0.85,
                                          validation_set=test_data,
                                          model_checkpoint_path=path,
                                          model_checkpoint_interval=500)


Boosted trees regression:
--------------------------------------------------------
Number of examples          : 20794971
Number of features          : 21
Number of unpacked features : 21
+-----------+--------------+---------------+-----------------+
| Iteration | Elapsed Time | Training-rmse | Validation-rmse |
+-----------+--------------+---------------+-----------------+
| 1         | 62.903776    | 1.264849      | 1.274738        |
| 2         | 126.663157   | 1.160038      | 1.169487        |
| 3         | 191.931803   | 1.067362      | 1.076570        |
| 4         | 258.258780   | 0.986312      | 0.995179        |
| 5         | 318.288369   | 0.915581      | 0.924253        |
| 6         | 385.540477   | 0.852039      | 0.860266        |
| 7         | 452.344752   | 0.797251      | 0.805138        |
| 8         | 514.916891   | 0.749145      | 0.756583        |
| 9         | 581.087198   | 0.707737      | 0.714993        |
| 10        | 644.435950   | 0.672528      | 0.679509        |
| 11        | 711.694113   | 0.641653      | 0.648163        |
| 12        | 777.060168   | 0.615807      | 0.622150        |
| 13        | 843.987115   | 0.593331      | 0.599248        |
| 14        | 907.749734   | 0.575079      | 0.580770        |
| 15        | 972.747105   | 0.559523      | 0.565087        |
| 16        | 1036.379700  | 0.546221      | 0.551397        |
| 17        | 1098.973543  | 0.535189      | 0.540215        |
| 18        | 1163.603514  | 0.526107      | 0.530995        |
| 19        | 1232.930808  | 0.517305      | 0.521901        |
| 20        | 1299.263837  | 0.510672      | 0.515194        |
| 21        | 1367.519968  | 0.504288      | 0.508471        |
| 22        | 1436.786112  | 0.498989      | 0.502852        |
| 23        | 1502.339895  | 0.494468      | 0.498113        |
| 24        | 1569.068817  | 0.491130      | 0.494777        |
| 25        | 1633.156835  | 0.488182      | 0.491687        |
| 26        | 1703.049990  | 0.485723      | 0.489253        |
| 27        | 1772.055390  | 0.483073      | 0.486534        |
| 28        | 1839.690682  | 0.481129      | 0.484571        |
| 29        | 1905.750034  | 0.479545      | 0.482894        |
| 30        | 1970.341474  | 0.477900      | 0.481195        |
| 31        | 2034.847046  | 0.476596      | 0.479843        |
| 32        | 2106.960313  | 0.474826      | 0.477928        |
| 33        | 2177.918723  | 0.473837      | 0.476849        |
| 34        | 2244.860949  | 0.472711      | 0.475615        |
| 35        | 2310.819131  | 0.471808      | 0.474680        |
| 36        | 2375.655160  | 0.471108      | 0.474022        |
| 37        | 2443.135788  | 0.470235      | 0.473106        |
| 38        | 2513.791354  | 0.469486      | 0.472298        |
| 39        | 2584.486540  | 0.468832      | 0.471624        |
| 40        | 2650.127260  | 0.468238      | 0.471090        |
| 41        | 2714.607438  | 0.467793      | 0.470626        |
| 42        | 2780.178805  | 0.467184      | 0.470038        |
| 43        | 2846.151795  | 0.466673      | 0.469497        |
| 44        | 2915.442029  | 0.466121      | 0.468946        |
| 45        | 2984.101222  | 0.465557      | 0.468400        |
| 46        | 3050.775974  | 0.465232      | 0.468101        |
| 47        | 3119.499574  | 0.464794      | 0.467601        |
| 48        | 3185.940088  | 0.464256      | 0.467042        |
| 49        | 3255.253738  | 0.463992      | 0.466795        |
| 50        | 3323.755584  | 0.463321      | 0.466061        |
| 51        | 3392.728217  | 0.462910      | 0.465656        |
| 52        | 3460.315961  | 0.462594      | 0.465320        |
| 53        | 3529.795202  | 0.462267      | 0.465005        |
| 54        | 3596.227444  | 0.462066      | 0.464842        |
| 55        | 3664.384613  | 0.461732      | 0.464504        |
| 56        | 3737.408819  | 0.461341      | 0.464170        |
| 57        | 3807.117592  | 0.461077      | 0.463961        |
| 58        | 3875.325048  | 0.460891      | 0.463825        |
| 59        | 3939.882567  | 0.460767      | 0.463727        |
| 60        | 4005.621921  | 0.460431      | 0.463395        |
| 61        | 4073.339561  | 0.460225      | 0.463222        |
| 62        | 4136.870802  | 0.460064      | 0.463091        |
| 63        | 4206.569038  | 0.459773      | 0.462768        |
| 64        | 4270.029934  | 0.459462      | 0.462380        |
| 65        | 4342.338976  | 0.459084      | 0.462029        |
| 66        | 4411.449270  | 0.458901      | 0.461903        |
| 67        | 4477.415124  | 0.458777      | 0.461762        |
| 68        | 4540.242808  | 0.458479      | 0.461477        |
| 69        | 4607.664606  | 0.458237      | 0.461231        |
| 70        | 4680.793168  | 0.458054      | 0.461032        |
| 71        | 4746.282736  | 0.457836      | 0.460816        |
| 72        | 4814.935070  | 0.457560      | 0.460583        |
| 73        | 4884.784213  | 0.457370      | 0.460340        |
| 74        | 4954.117698  | 0.457219      | 0.460195        |
| 75        | 5021.902098  | 0.457079      | 0.460103        |
| 76        | 5087.275899  | 0.456901      | 0.459977        |
| 77        | 5157.800524  | 0.456756      | 0.459869        |
| 78        | 5227.139935  | 0.456647      | 0.459789        |
| 79        | 5296.592216  | 0.456516      | 0.459705        |
| 80        | 5362.198006  | 0.456287      | 0.459434        |
| 81        | 5427.423481  | 0.456152      | 0.459336        |
| 82        | 5494.201216  | 0.455928      | 0.459137        |
| 83        | 5560.899866  | 0.455828      | 0.459045        |
| 84        | 5632.537896  | 0.455632      | 0.458873        |
| 85        | 5698.646595  | 0.455499      | 0.458761        |
| 86        | 5767.518612  | 0.455293      | 0.458517        |
| 87        | 5835.322259  | 0.455120      | 0.458369        |
| 88        | 5906.141549  | 0.454999      | 0.458305        |
| 89        | 5974.746843  | 0.454844      | 0.458141        |
| 90        | 6039.300471  | 0.454715      | 0.457997        |
| 91        | 6107.997196  | 0.454590      | 0.457877        |
| 92        | 6172.238346  | 0.454505      | 0.457833        |
| 93        | 6239.465270  | 0.454375      | 0.457715        |
| 94        | 6306.627001  | 0.454212      | 0.457543        |
| 95        | 6376.039243  | 0.454097      | 0.457434        |
| 96        | 6445.004887  | 0.453976      | 0.457315        |
| 97        | 6507.373179  | 0.453888      | 0.457245        |
| 98        | 6578.855689  | 0.453747      | 0.457120        |
| 99        | 6646.555526  | 0.453620      | 0.456989        |
| 100       | 6712.880093  | 0.453559      | 0.456916        |
| 101       | 6783.710727  | 0.453409      | 0.456724        |
| 102       | 6851.554027  | 0.453295      | 0.456635        |
| 103       | 6916.954929  | 0.453127      | 0.456495        |
| 104       | 6989.260729  | 0.453031      | 0.456438        |
| 105       | 7058.250103  | 0.452940      | 0.456363        |
| 106       | 7124.664365  | 0.452839      | 0.456278        |
| 107       | 7192.842522  | 0.452744      | 0.456193        |
| 108       | 7260.723621  | 0.452637      | 0.456114        |
| 109       | 7328.379300  | 0.452484      | 0.455950        |
| 110       | 7399.625441  | 0.452343      | 0.455821        |
| 111       | 7469.093185  | 0.452275      | 0.455776        |
| 112       | 7538.995144  | 0.452169      | 0.455675        |
| 113       | 7607.134608  | 0.452106      | 0.455604        |
| 114       | 7677.830400  | 0.451961      | 0.455474        |
| 115       | 7748.755051  | 0.451876      | 0.455413        |
| 116       | 7816.791871  | 0.451756      | 0.455269        |
| 117       | 7889.409678  | 0.451679      | 0.455225        |
| 118       | 7958.653055  | 0.451592      | 0.455188        |
| 119       | 8027.651558  | 0.451517      | 0.455122        |
| 120       | 8097.563339  | 0.451442      | 0.455075        |
| 121       | 8170.713060  | 0.451347      | 0.454997        |
| 122       | 8239.486764  | 0.451278      | 0.454969        |
| 123       | 8307.886574  | 0.451156      | 0.454891        |
| 124       | 8379.406048  | 0.451088      | 0.454838        |
| 125       | 8446.172614  | 0.450983      | 0.454735        |
| 126       | 8512.953583  | 0.450950      | 0.454719        |
| 127       | 8585.721491  | 0.450836      | 0.454620        |
| 128       | 8658.484979  | 0.450760      | 0.454570        |
| 129       | 8727.702644  | 0.450685      | 0.454500        |
| 130       | 8800.963598  | 0.450615      | 0.454448        |
| 131       | 8870.711542  | 0.450547      | 0.454403        |
| 132       | 8939.807324  | 0.450495      | 0.454361        |
| 133       | 9009.653707  | 0.450441      | 0.454329        |
| 134       | 9078.846840  | 0.450379      | 0.454267        |
| 135       | 9150.150109  | 0.450271      | 0.454226        |
| 136       | 9223.898536  | 0.450191      | 0.454132        |
| 137       | 9293.092760  | 0.450105      | 0.454073        |
| 138       | 9364.084321  | 0.450057      | 0.454014        |
| 139       | 9430.967812  | 0.449978      | 0.453938        |
| 140       | 9499.681126  | 0.449938      | 0.453919        |
| 141       | 9572.760669  | 0.449873      | 0.453875        |
| 142       | 9641.046831  | 0.449793      | 0.453821        |
| 143       | 9705.176879  | 0.449735      | 0.453786        |
| 144       | 9775.887699  | 0.449668      | 0.453751        |
| 145       | 9846.113759  | 0.449585      | 0.453648        |
| 146       | 9914.964987  | 0.449492      | 0.453569        |
| 147       | 9984.075321  | 0.449439      | 0.453520        |
| 148       | 10051.602071 | 0.449385      | 0.453467        |
| 149       | 10121.875106 | 0.449318      | 0.453426        |
| 150       | 10187.410400 | 0.449243      | 0.453359        |
| 151       | 10253.521525 | 0.449186      | 0.453296        |
| 152       | 10322.814885 | 0.449095      | 0.453234        |
| 153       | 10392.209117 | 0.448999      | 0.453183        |
| 154       | 10456.985629 | 0.448951      | 0.453146        |
| 155       | 10527.765498 | 0.448886      | 0.453079        |
| 156       | 10597.718274 | 0.448839      | 0.453054        |
| 157       | 10668.884201 | 0.448796      | 0.453028        |
| 158       | 10734.201539 | 0.448750      | 0.452998        |
| 159       | 10803.979190 | 0.448691      | 0.452966        |
| 160       | 10870.829597 | 0.448615      | 0.452917        |
| 161       | 10942.051384 | 0.448586      | 0.452903        |
| 162       | 11005.274538 | 0.448538      | 0.452866        |
| 163       | 11077.364411 | 0.448466      | 0.452804        |
| 164       | 11150.940257 | 0.448406      | 0.452747        |
| 165       | 11216.802439 | 0.448364      | 0.452735        |
| 166       | 11284.851767 | 0.448306      | 0.452700        |
| 167       | 11352.682388 | 0.448239      | 0.452657        |
| 168       | 11418.807725 | 0.448190      | 0.452614        |
| 169       | 11488.670049 | 0.448145      | 0.452594        |
| 170       | 11558.404479 | 0.448096      | 0.452550        |
| 171       | 11625.048368 | 0.448053      | 0.452531        |
| 172       | 11689.656795 | 0.448000      | 0.452460        |
| 173       | 11759.753712 | 0.447925      | 0.452430        |
| 174       | 11832.383882 | 0.447881      | 0.452406        |
| 175       | 11900.678617 | 0.447834      | 0.452368        |
| 176       | 11972.707555 | 0.447782      | 0.452340        |
| 177       | 12040.494257 | 0.447740      | 0.452304        |
| 178       | 12107.562017 | 0.447674      | 0.452240        |
| 179       | 12176.500390 | 0.447622      | 0.452201        |
| 180       | 12245.041581 | 0.447585      | 0.452159        |
| 181       | 12314.251174 | 0.447544      | 0.452128        |
| 182       | 12379.839845 | 0.447477      | 0.452057        |
| 183       | 12446.778225 | 0.447442      | 0.452055        |
| 184       | 12515.466147 | 0.447399      | 0.451958        |
| 185       | 12585.289226 | 0.447352      | 0.451930        |
| 186       | 12651.715484 | 0.447290      | 0.451872        |
| 187       | 12718.040768 | 0.447244      | 0.451871        |
| 188       | 12784.383311 | 0.447211      | 0.451855        |
| 189       | 12852.624878 | 0.447128      | 0.451801        |
| 190       | 12922.563646 | 0.447076      | 0.451772        |
| 191       | 12990.361687 | 0.447037      | 0.451732        |
| 192       | 13058.687734 | 0.447002      | 0.451715        |
| 193       | 13130.347920 | 0.446964      | 0.451688        |
| 194       | 13197.602942 | 0.446908      | 0.451623        |
| 195       | 13263.658270 | 0.446874      | 0.451606        |
| 196       | 13331.456953 | 0.446841      | 0.451597        |
| 197       | 13400.463554 | 0.446809      | 0.451575        |
| 198       | 13467.904431 | 0.446740      | 0.451534        |
| 199       | 13534.433200 | 0.446683      | 0.451489        |
| 200       | 13604.180718 | 0.446615      | 0.451416        |
| 201       | 13671.640008 | 0.446579      | 0.451406        |
| 202       | 13735.301453 | 0.446536      | 0.451382        |
| 203       | 13799.819517 | 0.446458      | 0.451295        |
| 204       | 13869.931670 | 0.446423      | 0.451274        |
| 205       | 13941.214084 | 0.446374      | 0.451260        |
| 206       | 14006.911546 | 0.446336      | 0.451214        |
| 207       | 14074.039295 | 0.446304      | 0.451191        |
| 208       | 14144.797477 | 0.446275      | 0.451179        |
| 209       | 14214.864536 | 0.446215      | 0.451129        |
| 210       | 14282.683034 | 0.446172      | 0.451103        |
| 211       | 14355.331564 | 0.446134      | 0.451104        |
| 212       | 14422.972058 | 0.446104      | 0.451082        |
| 213       | 14488.140232 | 0.446048      | 0.451019        |
| 214       | 14552.389952 | 0.445989      | 0.450979        |
| 215       | 14623.199648 | 0.445962      | 0.450960        |
| 216       | 14690.228384 | 0.445932      | 0.450941        |
| 217       | 14757.116522 | 0.445902      | 0.450903        |
| 218       | 14821.728835 | 0.445862      | 0.450862        |
| 219       | 14890.358436 | 0.445831      | 0.450836        |
| 220       | 14961.543949 | 0.445791      | 0.450799        |
| 221       | 15030.585218 | 0.445761      | 0.450786        |
| 222       | 15096.979502 | 0.445721      | 0.450755        |
| 223       | 15166.522764 | 0.445692      | 0.450740        |
| 224       | 15233.676469 | 0.445668      | 0.450738        |
| 225       | 15301.948471 | 0.445641      | 0.450722        |
| 226       | 15370.367362 | 0.445592      | 0.450696        |
| 227       | 15442.696970 | 0.445563      | 0.450659        |
| 228       | 15513.323734 | 0.445511      | 0.450612        |
| 229       | 15584.119429 | 0.445483      | 0.450590        |
| 230       | 15653.690744 | 0.445446      | 0.450566        |
| 231       | 15725.632749 | 0.445411      | 0.450542        |
| 232       | 15792.050899 | 0.445375      | 0.450500        |
| 233       | 15859.671382 | 0.445357      | 0.450498        |
| 234       | 15933.066907 | 0.445335      | 0.450507        |
| 235       | 16005.377416 | 0.445305      | 0.450482        |
| 236       | 16077.787475 | 0.445279      | 0.450476        |
| 237       | 16147.149674 | 0.445243      | 0.450445        |
| 238       | 16213.369780 | 0.445212      | 0.450444        |
| 239       | 16283.522455 | 0.445174      | 0.450431        |
| 240       | 16356.425548 | 0.445134      | 0.450409        |
| 241       | 16425.925734 | 0.445101      | 0.450374        |
| 242       | 16495.011344 | 0.445066      | 0.450360        |
| 243       | 16560.875670 | 0.445026      | 0.450331        |
| 244       | 16626.960274 | 0.444990      | 0.450299        |
| 245       | 16696.616722 | 0.444942      | 0.450262        |
| 246       | 16765.458459 | 0.444901      | 0.450222        |
| 247       | 16831.969865 | 0.444883      | 0.450219        |
| 248       | 16901.414791 | 0.444863      | 0.450221        |
| 249       | 16971.635103 | 0.444834      | 0.450203        |
| 250       | 17042.240501 | 0.444777      | 0.450185        |
| 251       | 17111.412826 | 0.444749      | 0.450167        |
| 252       | 17177.808606 | 0.444721      | 0.450117        |
| 253       | 17253.339823 | 0.444676      | 0.450084        |
| 254       | 17317.344777 | 0.444646      | 0.450085        |
| 255       | 17382.519191 | 0.444619      | 0.450090        |
| 256       | 17450.244867 | 0.444597      | 0.450078        |
| 257       | 17519.171068 | 0.444551      | 0.450058        |
| 258       | 17584.483833 | 0.444526      | 0.450037        |
| 259       | 17652.886886 | 0.444498      | 0.450015        |
| 260       | 17721.763173 | 0.444468      | 0.450006        |
| 261       | 17786.937140 | 0.444450      | 0.450009        |
| 262       | 17858.227156 | 0.444425      | 0.449985        |
| 263       | 17927.277959 | 0.444382      | 0.449964        |
| 264       | 17994.387536 | 0.444332      | 0.449920        |
| 265       | 18059.970729 | 0.444304      | 0.449891        |
| 266       | 18130.835975 | 0.444278      | 0.449859        |
| 267       | 18196.326692 | 0.444257      | 0.449851        |
| 268       | 18262.920902 | 0.444229      | 0.449824        |
| 269       | 18332.638978 | 0.444179      | 0.449804        |
| 270       | 18401.281498 | 0.444153      | 0.449780        |
| 271       | 18470.420268 | 0.444134      | 0.449761        |
| 272       | 18541.593706 | 0.444114      | 0.449752        |
| 273       | 18613.484351 | 0.444084      | 0.449737        |
| 274       | 18684.546780 | 0.444044      | 0.449711        |
| 275       | 18752.543092 | 0.444013      | 0.449718        |
| 276       | 18822.972625 | 0.443987      | 0.449706        |
| 277       | 18890.481870 | 0.443960      | 0.449680        |
| 278       | 18960.770026 | 0.443921      | 0.449649        |
| 279       | 19030.692963 | 0.443865      | 0.449610        |
| 280       | 19097.515596 | 0.443852      | 0.449597        |
| 281       | 19163.588742 | 0.443814      | 0.449579        |
| 282       | 19231.651298 | 0.443792      | 0.449531        |
| 283       | 19299.642457 | 0.443780      | 0.449528        |
| 284       | 19363.030034 | 0.443762      | 0.449517        |
| 285       | 19429.873732 | 0.443738      | 0.449496        |
| 286       | 19492.942687 | 0.443706      | 0.449482        |
| 287       | 19563.696339 | 0.443687      | 0.449463        |
| 288       | 19633.235021 | 0.443671      | 0.449460        |
| 289       | 19700.805665 | 0.443647      | 0.449446        |
| 290       | 19770.882514 | 0.443598      | 0.449412        |
| 291       | 19836.725054 | 0.443576      | 0.449421        |
| 292       | 19907.435156 | 0.443534      | 0.449408        |
| 293       | 19976.360334 | 0.443497      | 0.449372        |
| 294       | 20043.994010 | 0.443468      | 0.449392        |
| 295       | 20113.280432 | 0.443434      | 0.449377        |
| 296       | 20176.151258 | 0.443411      | 0.449369        |
| 297       | 20238.089702 | 0.443382      | 0.449345        |
| 298       | 20308.436104 | 0.443344      | 0.449326        |
| 299       | 20381.115256 | 0.443306      | 0.449314        |
| 300       | 20445.741237 | 0.443273      | 0.449297        |
| 301       | 20515.532759 | 0.443248      | 0.449275        |
| 302       | 20579.737960 | 0.443228      | 0.449268        |
| 303       | 20646.464989 | 0.443170      | 0.449221        |
| 304       | 20714.944225 | 0.443156      | 0.449204        |
| 305       | 20785.292694 | 0.443132      | 0.449199        |
| 306       | 20857.992526 | 0.443099      | 0.449186        |
| 307       | 20927.068915 | 0.443074      | 0.449153        |
| 308       | 20993.627740 | 0.443053      | 0.449159        |
| 309       | 21062.004836 | 0.443031      | 0.449137        |
| 310       | 21131.029303 | 0.443016      | 0.449133        |
| 311       | 21196.760865 | 0.442985      | 0.449127        |
| 312       | 21260.530556 | 0.442972      | 0.449129        |
| 313       | 21326.656751 | 0.442948      | 0.449121        |
| 314       | 21393.704325 | 0.442924      | 0.449114        |
| 315       | 21460.058090 | 0.442896      | 0.449117        |
| 316       | 21526.637395 | 0.442880      | 0.449108        |
| 317       | 21597.098311 | 0.442859      | 0.449093        |
| 318       | 21665.751469 | 0.442833      | 0.449085        |
| 319       | 21732.063577 | 0.442815      | 0.449066        |
| 320       | 21800.395029 | 0.442794      | 0.449061        |
| 321       | 21866.137545 | 0.442765      | 0.449025        |
| 322       | 21931.475776 | 0.442730      | 0.448990        |
| 323       | 21997.460717 | 0.442706      | 0.448981        |
| 324       | 22064.745573 | 0.442674      | 0.448936        |
| 325       | 22130.842410 | 0.442668      | 0.448934        |
| 326       | 22195.461332 | 0.442650      | 0.448927        |
| 327       | 22261.705771 | 0.442623      | 0.448920        |
| 328       | 22326.309649 | 0.442597      | 0.448905        |
| 329       | 22398.189119 | 0.442574      | 0.448879        |
| 330       | 22463.579648 | 0.442556      | 0.448876        |
| 331       | 22528.647820 | 0.442520      | 0.448850        |
| 332       | 22598.338149 | 0.442483      | 0.448813        |
| 333       | 22668.051021 | 0.442464      | 0.448798        |
| 334       | 22739.796408 | 0.442444      | 0.448812        |
| 335       | 22804.690009 | 0.442422      | 0.448801        |
| 336       | 22875.040649 | 0.442397      | 0.448791        |
| 337       | 22944.610456 | 0.442374      | 0.448774        |
| 338       | 23016.539010 | 0.442345      | 0.448760        |
| 339       | 23085.615761 | 0.442321      | 0.448752        |
| 340       | 23159.122392 | 0.442298      | 0.448737        |
| 341       | 23229.615847 | 0.442261      | 0.448694        |
| 342       | 23299.628348 | 0.442233      | 0.448668        |
| 343       | 23371.824758 | 0.442201      | 0.448661        |
| 344       | 23438.950870 | 0.442181      | 0.448654        |
| 345       | 23505.871055 | 0.442155      | 0.448639        |
| 346       | 23573.891682 | 0.442136      | 0.448624        |
| 347       | 23643.573895 | 0.442110      | 0.448628        |
| 348       | 23708.114411 | 0.442083      | 0.448614        |
| 349       | 23776.301909 | 0.442059      | 0.448607        |
| 350       | 23843.575101 | 0.442025      | 0.448594        |
| 351       | 23913.092317 | 0.441998      | 0.448591        |
| 352       | 23980.951090 | 0.441976      | 0.448597        |
| 353       | 24049.175251 | 0.441956      | 0.448568        |
| 354       | 24121.562212 | 0.441929      | 0.448549        |
| 355       | 24192.089028 | 0.441907      | 0.448545        |
| 356       | 24258.500843 | 0.441893      | 0.448542        |
| 357       | 24326.724139 | 0.441871      | 0.448534        |
| 358       | 24397.354799 | 0.441854      | 0.448535        |
| 359       | 24467.717053 | 0.441836      | 0.448528        |
| 360       | 24537.399165 | 0.441801      | 0.448495        |
| 361       | 24604.343114 | 0.441771      | 0.448488        |
| 362       | 24671.998842 | 0.441756      | 0.448482        |
| 363       | 24739.525812 | 0.441740      | 0.448487        |
| 364       | 24804.455700 | 0.441728      | 0.448474        |
| 365       | 24873.405326 | 0.441707      | 0.448465        |
| 366       | 24941.890149 | 0.441691      | 0.448468        |
| 367       | 25009.040307 | 0.441665      | 0.448436        |
| 368       | 25075.490877 | 0.441650      | 0.448437        |
| 369       | 25145.702571 | 0.441615      | 0.448403        |
| 370       | 25213.165786 | 0.441579      | 0.448373        |
| 371       | 25282.420197 | 0.441546      | 0.448346        |
| 372       | 25348.838570 | 0.441523      | 0.448335        |
| 373       | 25417.911080 | 0.441517      | 0.448334        |
| 374       | 25487.022127 | 0.441499      | 0.448323        |
| 375       | 25551.998150 | 0.441488      | 0.448324        |
| 376       | 25618.035956 | 0.441472      | 0.448325        |
| 377       | 25684.381754 | 0.441450      | 0.448313        |
| 378       | 25755.733001 | 0.441423      | 0.448305        |
| 379       | 25826.040767 | 0.441384      | 0.448266        |
| 380       | 25895.289623 | 0.441365      | 0.448278        |
| 381       | 25963.883992 | 0.441344      | 0.448288        |
| 382       | 26031.984012 | 0.441318      | 0.448272        |
| 383       | 26102.600428 | 0.441285      | 0.448259        |
| 384       | 26168.043779 | 0.441278      | 0.448249        |
| 385       | 26236.159553 | 0.441254      | 0.448227        |
| 386       | 26303.052766 | 0.441240      | 0.448221        |
| 387       | 26375.311790 | 0.441222      | 0.448219        |
| 388       | 26444.861985 | 0.441202      | 0.448215        |
| 389       | 26513.452317 | 0.441187      | 0.448227        |
| 390       | 26581.399293 | 0.441170      | 0.448206        |
| 391       | 26647.515880 | 0.441142      | 0.448210        |
| 392       | 26719.836958 | 0.441127      | 0.448201        |
| 393       | 26786.240090 | 0.441113      | 0.448192        |
| 394       | 26856.241378 | 0.441092      | 0.448191        |
| 395       | 26928.738812 | 0.441070      | 0.448182        |
| 396       | 26996.432320 | 0.441038      | 0.448161        |
| 397       | 27062.646773 | 0.441018      | 0.448142        |
| 398       | 27130.164348 | 0.440988      | 0.448117        |
| 399       | 27202.326940 | 0.440966      | 0.448106        |
| 400       | 27269.355123 | 0.440921      | 0.448059        |
| 401       | 27333.538317 | 0.440891      | 0.448040        |
| 402       | 27399.562962 | 0.440866      | 0.448036        |
| 403       | 27464.335171 | 0.440851      | 0.448035        |
| 404       | 27533.287827 | 0.440834      | 0.448017        |
| 405       | 27602.588023 | 0.440815      | 0.448025        |
| 406       | 27668.094657 | 0.440805      | 0.448018        |
| 407       | 27737.214630 | 0.440780      | 0.448018        |
| 408       | 27803.000809 | 0.440765      | 0.448016        |
| 409       | 27872.747768 | 0.440745      | 0.448019        |
| 410       | 27941.836637 | 0.440718      | 0.448003        |
| 411       | 28006.048566 | 0.440701      | 0.447984        |
| 412       | 28079.125848 | 0.440679      | 0.447975        |
| 413       | 28145.963411 | 0.440666      | 0.447977        |
| 414       | 28215.740796 | 0.440649      | 0.447965        |
| 415       | 28282.046763 | 0.440629      | 0.447969        |
| 416       | 28349.899204 | 0.440614      | 0.447957        |
| 417       | 28418.699521 | 0.440579      | 0.447942        |
| 418       | 28487.602831 | 0.440550      | 0.447926        |
| 419       | 28552.645598 | 0.440543      | 0.447927        |
| 420       | 28620.871202 | 0.440512      | 0.447891        |
| 421       | 28687.159272 | 0.440500      | 0.447886        |
| 422       | 28754.309338 | 0.440477      | 0.447875        |
| 423       | 28820.306588 | 0.440459      | 0.447873        |
| 424       | 28886.322605 | 0.440423      | 0.447865        |
| 425       | 28955.928346 | 0.440409      | 0.447816        |
| 426       | 29022.189721 | 0.440395      | 0.447813        |
| 427       | 29091.765413 | 0.440380      | 0.447805        |
| 428       | 29161.499084 | 0.440347      | 0.447803        |
| 429       | 29232.317971 | 0.440327      | 0.447795        |
| 430       | 29304.857563 | 0.440310      | 0.447790        |
| 431       | 29371.825772 | 0.440294      | 0.447784        |
| 432       | 29443.098361 | 0.440263      | 0.447771        |
| 433       | 29512.967418 | 0.440248      | 0.447766        |
| 434       | 29584.068248 | 0.440220      | 0.447755        |
| 435       | 29654.080104 | 0.440198      | 0.447725        |
| 436       | 29724.757025 | 0.440172      | 0.447716        |
| 437       | 29793.032737 | 0.440155      | 0.447708        |
| 438       | 29859.082379 | 0.440142      | 0.447705        |
| 439       | 29931.692346 | 0.440117      | 0.447698        |
| 440       | 30001.338215 | 0.440088      | 0.447701        |
| 441       | 30070.843802 | 0.440066      | 0.447699        |
| 442       | 30135.419684 | 0.440050      | 0.447708        |
| 443       | 30203.222748 | 0.440031      | 0.447681        |
| 444       | 30272.139966 | 0.440009      | 0.447662        |
| 445       | 30336.127617 | 0.439990      | 0.447654        |
| 446       | 30405.434460 | 0.439973      | 0.447666        |
| 447       | 30475.044304 | 0.439943      | 0.447653        |
| 448       | 30545.894720 | 0.439923      | 0.447645        |
| 449       | 30612.181260 | 0.439904      | 0.447645        |
| 450       | 30681.804359 | 0.439869      | 0.447615        |
| 451       | 30753.602188 | 0.439842      | 0.447608        |
| 452       | 30825.594469 | 0.439830      | 0.447604        |
| 453       | 30900.057342 | 0.439809      | 0.447590        |
| 454       | 30968.700433 | 0.439790      | 0.447539        |
| 455       | 31038.018817 | 0.439775      | 0.447516        |
| 456       | 31104.133761 | 0.439750      | 0.447478        |
| 457       | 31174.509307 | 0.439734      | 0.447472        |
| 458       | 31244.396028 | 0.439705      | 0.447478        |
| 459       | 31314.851790 | 0.439688      | 0.447473        |
| 460       | 31383.175244 | 0.439667      | 0.447460        |
| 461       | 31448.706864 | 0.439657      | 0.447459        |
| 462       | 31511.684279 | 0.439643      | 0.447451        |
| 463       | 31582.539583 | 0.439635      | 0.447449        |
| 464       | 31648.601473 | 0.439613      | 0.447450        |
| 465       | 31716.167806 | 0.439585      | 0.447446        |
| 466       | 31779.973007 | 0.439576      | 0.447443        |
| 467       | 31846.837857 | 0.439558      | 0.447433        |
| 468       | 31912.610820 | 0.439542      | 0.447435        |
| 469       | 31985.329886 | 0.439527      | 0.447432        |
| 470       | 32050.686671 | 0.439512      | 0.447420        |
| 471       | 32119.455413 | 0.439489      | 0.447400        |
| 472       | 32184.999486 | 0.439470      | 0.447393        |
| 473       | 32249.816554 | 0.439451      | 0.447370        |
| 474       | 32317.901612 | 0.439433      | 0.447361        |
| 475       | 32390.244312 | 0.439409      | 0.447359        |
| 476       | 32455.766584 | 0.439399      | 0.447354        |
| 477       | 32525.583357 | 0.439376      | 0.447345        |
| 478       | 32592.419104 | 0.439364      | 0.447345        |
| 479       | 32663.819054 | 0.439344      | 0.447297        |
| 480       | 32736.159029 | 0.439324      | 0.447294        |
| 481       | 32805.372440 | 0.439293      | 0.447273        |
| 482       | 32876.115138 | 0.439276      | 0.447255        |
| 483       | 32943.792259 | 0.439256      | 0.447231        |
| 484       | 33012.775315 | 0.439229      | 0.447218        |
| 485       | 33082.018096 | 0.439218      | 0.447214        |
| 486       | 33154.866399 | 0.439193      | 0.447192        |
| 487       | 33225.811296 | 0.439157      | 0.447181        |
| 488       | 33297.765905 | 0.439139      | 0.447167        |
| 489       | 33368.806775 | 0.439111      | 0.447171        |
| 490       | 33438.886534 | 0.439090      | 0.447165        |
| 491       | 33502.197317 | 0.439080      | 0.447168        |
| 492       | 33562.995510 | 0.439060      | 0.447146        |
| 493       | 33635.289893 | 0.439043      | 0.447140        |
| 494       | 33700.659719 | 0.439024      | 0.447117        |
| 495       | 33764.842126 | 0.439015      | 0.447120        |
| 496       | 33832.099138 | 0.438998      | 0.447116        |
| 497       | 33898.686136 | 0.438979      | 0.447107        |
| 498       | 33965.185898 | 0.438964      | 0.447108        |
| 499       | 34032.710430 | 0.438947      | 0.447101        |
| 500       | 34102.757059 | 0.438932      | 0.447099        |
Checkpointing to /home/zongyi/bimbo_data/model_checkpoint_500
+-----------+--------------+---------------+-----------------+

In [71]:
model1 = gl.boosted_trees_regression.create(train, target='Demada_log',
                                           step_size=0.1,
                                           max_iterations=4,
                                           max_depth = 10,
                                          metric='rmse',
                                          random_seed=395,
                                          column_subsample=0.7,
                                          row_subsample=0.85,
                                          validation_set=None,
                                          resume_from_checkpoint=path+'model_checkpoint_4',
                                           model_checkpoint_path=path,
                                          model_checkpoint_interval=2)


Resuming from checkpoint at /home/zongyi/bimbo_data/model_checkpoint_4
Boosted trees regression:
--------------------------------------------------------
Number of examples          : 20815581
Number of features          : 21
Number of unpacked features : 21
Resumed training from checkpoint at iteration 4 which is greater than or equal to max_iterations 4

In [13]:
model


Out[13]:
Class                          : BoostedTreesRegression

Schema
------
Number of examples             : 20794971
Number of feature columns      : 21
Number of unpacked features    : 21

Settings
--------
Number of trees                : 500
Max tree depth                 : 10
Training time (sec)            : 34110.3241
Training rmse                  : 0.4389
Validation rmse                : 0.4471

In [15]:
w = model.get_feature_importance()

In [16]:
w = w.add_row_number()

In [17]:
w


Out[17]:
id name index count
0 Ruta_SAK None 39491
1 Cliente_ID None 32859
2 n_c None 31315
3 Producto_ID None 29303
4 tp_sum None 26693
5 n_p None 26106
6 n_r None 25619
7 lag_sum None 24065
8 Agencia_ID None 23724
9 prior_sum None 22175
[21 rows x 4 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.

In [21]:
from IPython.core.pylabtools import figsize
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style('darkgrid', {'grid.color': '.8','grid.linestyle': u'--'}) 
%matplotlib inline

figsize(12, 6)
plt.bar(w['id'], w['count'], tick_label=w['name'])

plt.xticks(rotation=45)


Out[21]:
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20]), <a list of 21 Text xticklabel objects>)

In [20]:
# Save predictions to an SArray
predictions = model.predict(train)

# Evaluate the model and save the results into a dictionary
results = model.evaluate(train)
print results


{'max_error': 6.300516724586487, 'rmse': 0.4389403189567331}

In [36]:
model.summary()


Class                          : BoostedTreesRegression

Schema
------
Number of examples             : 17797989
Number of feature columns      : 21
Number of unpacked features    : 21

Settings
--------
Number of trees                : 200
Max tree depth                 : 10
Training time (sec)            : 11956.8374
Training rmse                  : 0.4465
Validation rmse                : 0.4507


In [ ]:


In [23]:
test = gl.SFrame.read_csv(path + 'test_lag5.csv', verbose=False)
test = test.join(town, on=['Agencia_ID','Producto_ID'], how='left')
del test['Town']
test = test.fillna('t_c',1)
test = test.fillna('tcc',0)
test = test.fillna('tp_sum',0)

In [24]:
test


Out[24]:
id Semana Agencia_ID Canal_ID Ruta_SAK Cliente_ID Producto_ID lag1 lag2 lag3
4721633 10 4037 1 1153 2398808 43202 0.0 0.0 1.60944
6035419 10 2239 1 1157 1529806 1230 1.09861 1.38629 0.0
3462602 10 1366 1 1051 1368806 1146 1.09861 0.693147 0.693147
855102 10 1911 1 1156 867160 1232 1.38629 0.0 0.0
3242700 11 1952 11 5721 1126647 47612 0.0 3.09104 2.99573
2421613 10 2653 1 2117 954439 35305 2.77259 3.04452 2.56495
1573296 10 1427 1 1102 4561415 43197 1.38629 1.79176 1.94591
1619375 10 1629 1 1013 169508 1146 2.07944 2.19722 1.79176
3539201 11 1120 1 1453 4602755 1230 0.0 1.38629 1.38629
1631781 10 1385 4 6617 20130 2025 2.56495 0.0 2.56495
lag4 lag5 lag_sum prior_sum week_times n_a n_r n_c n_p t_c tcc
1.94591 1.38629 4.94164 4.94164 1 65135.3 27403.3 36.1429 44202.3 4 5437
0.0 0.0 2.48491 3.17805 1 29145.6 23702.4 27.8571 97279.0 4 2239
1.38629 0.693147 4.56435 5.2575 1 18224.3 21835.9 12.5714 209801.0 2 1275
0.693147 1.38629 3.46574 6.68461 1 115183.0 26237.0 42.8571 210297.0 3 8302
2.07944 0.0 8.16622 8.16622 1 22129.6 487.286 3.33333 1682.71 3 1598
0.0 0.0 8.38206 8.38206 1 67966.0 10179.9 20.7143 134427.0 2 4757
1.94591 1.94591 9.01578 10.9617 1 29824.4 50102.3 18.5714 37476.6 5 2526
2.07944 2.19722 10.3451 14.4394 1 71909.4 26384.6 54.7143 209801.0 4 5960
1.09861 1.38629 5.2575 6.35611 1 66669.6 5889.57 46.8571 97279.0 4 5094
2.48491 2.07944 9.69425 14.7441 1 5707.29 5323.86 10.8571 11829.3 6 4087
tp_sum
22072.1
7288.35
6922.05
18695.9
1831.61
9574.76
10830.0
15834.2
7680.14
2287.19
[6999251 rows x 22 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.

In [25]:
ids = test['id']

In [26]:
del test['id']

In [27]:
demand_log = model.predict(test)

In [30]:
sub = gl.SFrame({'id':ids,'Demanda_uni_equil':demand_log})

In [33]:
import math
sub['Demanda_uni_equil'] = sub['Demanda_uni_equil'].apply(lambda x: math.expm1(max(0, x)))

In [34]:
sub


Out[34]:
Demanda_uni_equil id
3.39354070541 4721633
2.12662481892 6035419
1.70040726436 3462602
3.01457863433 855102
9.90822214871 3242700
12.0511999688 2421613
3.74027486912 1573296
8.52467448859 1619375
1.88249081059 3539201
10.1654762286 1631781
[6999251 rows x 2 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.

In [35]:
sub.save(path+'gbrt_sub3.csv',format='csv')

In [43]:
math.expm1(math.log1p(2))


Out[43]:
1.9999999999999996

In [40]:



Out[40]:
1.0986122886681098

In [ ]: