In [ ]:


In [2]:
import pandas as pd
import numpy as np

In [3]:
%run -i readData.py


Shape aisles: (134, 2)
Shape departments: (21, 2)
Shape order_products__prior: (32434489, 4)
Shape order_products__train: (1384617, 4)
Shape orders: (3421083, 7)
Shape products: (49688, 4)

In [14]:
allDsCombine = [aisles, departments, order_products__prior,order_products__train, orders, products]

In [31]:
#print all datasets
for ds in allDsCombine:
    print(ds.head(n=10))
    print("-"*100)


    aisle_id                         aisle
0          1         prepared soups salads
1          2             specialty cheeses
2          3           energy granola bars
3          4                 instant foods
4          5    marinades meat preparation
5          6                         other
6          7                 packaged meat
7          8               bakery desserts
8          9                   pasta sauce
9         10              kitchen supplies
10        11              cold flu allergy
11        12                   fresh pasta
12        13                prepared meals
13        14        tofu meat alternatives
14        15              packaged seafood
15        16                   fresh herbs
16        17            baking ingredients
17        18  bulk dried fruits vegetables
18        19                 oils vinegars
19        20                  oral hygiene
----------------------------------------------------------------------------------------------------
    department_id       department
0               1           frozen
1               2            other
2               3           bakery
3               4          produce
4               5          alcohol
5               6    international
6               7        beverages
7               8             pets
8               9  dry goods pasta
9              10             bulk
10             11    personal care
11             12     meat seafood
12             13           pantry
13             14        breakfast
14             15     canned goods
15             16       dairy eggs
16             17        household
17             18           babies
18             19           snacks
19             20             deli
----------------------------------------------------------------------------------------------------
    order_id  product_id  add_to_cart_order  reordered
0          2       33120                  1          1
1          2       28985                  2          1
2          2        9327                  3          0
3          2       45918                  4          1
4          2       30035                  5          0
5          2       17794                  6          1
6          2       40141                  7          1
7          2        1819                  8          1
8          2       43668                  9          0
9          3       33754                  1          1
10         3       24838                  2          1
11         3       17704                  3          1
12         3       21903                  4          1
13         3       17668                  5          1
14         3       46667                  6          1
15         3       17461                  7          1
16         3       32665                  8          1
17         4       46842                  1          0
18         4       26434                  2          1
19         4       39758                  3          1
----------------------------------------------------------------------------------------------------
    order_id  product_id  add_to_cart_order  reordered
0          1       49302                  1          1
1          1       11109                  2          1
2          1       10246                  3          0
3          1       49683                  4          0
4          1       43633                  5          1
5          1       13176                  6          0
6          1       47209                  7          0
7          1       22035                  8          1
8         36       39612                  1          0
9         36       19660                  2          1
10        36       49235                  3          0
11        36       43086                  4          1
12        36       46620                  5          1
13        36       34497                  6          1
14        36       48679                  7          1
15        36       46979                  8          1
16        38       11913                  1          0
17        38       18159                  2          0
18        38        4461                  3          0
19        38       21616                  4          1
----------------------------------------------------------------------------------------------------
    order_id  user_id eval_set  order_number  order_dow  order_hour_of_day  \
0    2539329        1    prior             1          2                  8   
1    2398795        1    prior             2          3                  7   
2     473747        1    prior             3          3                 12   
3    2254736        1    prior             4          4                  7   
4     431534        1    prior             5          4                 15   
5    3367565        1    prior             6          2                  7   
6     550135        1    prior             7          1                  9   
7    3108588        1    prior             8          1                 14   
8    2295261        1    prior             9          1                 16   
9    2550362        1    prior            10          4                  8   
10   1187899        1    train            11          4                  8   
11   2168274        2    prior             1          2                 11   
12   1501582        2    prior             2          5                 10   
13   1901567        2    prior             3          1                 10   
14    738281        2    prior             4          2                 10   
15   1673511        2    prior             5          3                 11   
16   1199898        2    prior             6          2                  9   
17   3194192        2    prior             7          2                 12   
18    788338        2    prior             8          1                 15   
19   1718559        2    prior             9          2                  9   

    days_since_prior_order  
0                      NaN  
1                     15.0  
2                     21.0  
3                     29.0  
4                     28.0  
5                     19.0  
6                     20.0  
7                     14.0  
8                      0.0  
9                     30.0  
10                    14.0  
11                     NaN  
12                    10.0  
13                     3.0  
14                     8.0  
15                     8.0  
16                    13.0  
17                    14.0  
18                    27.0  
19                     8.0  
----------------------------------------------------------------------------------------------------
    product_id                                       product_name  aisle_id  \
0            1                         Chocolate Sandwich Cookies        61   
1            2                                   All-Seasons Salt       104   
2            3               Robust Golden Unsweetened Oolong Tea        94   
3            4  Smart Ones Classic Favorites Mini Rigatoni Wit...        38   
4            5                          Green Chile Anytime Sauce         5   
5            6                                       Dry Nose Oil        11   
6            7                     Pure Coconut Water With Orange        98   
7            8                  Cut Russet Potatoes Steam N' Mash       116   
8            9                  Light Strawberry Blueberry Yogurt       120   
9           10     Sparkling Orange Juice & Prickly Pear Beverage       115   
10          11                                  Peach Mango Juice        31   
11          12                         Chocolate Fudge Layer Cake       119   
12          13                                  Saline Nasal Mist        11   
13          14                     Fresh Scent Dishwasher Cleaner        74   
14          15                           Overnight Diapers Size 6        56   
15          16                      Mint Chocolate Flavored Syrup       103   
16          17                                  Rendered Duck Fat        35   
17          18                Pizza for One Suprema  Frozen Pizza        79   
18          19   Gluten Free Quinoa Three Cheese & Mushroom Blend        63   
19          20     Pomegranate Cranberry & Aloe Vera Enrich Drink        98   

    department_id  
0              19  
1              13  
2               7  
3               1  
4              13  
5              11  
6               7  
7               1  
8              16  
9               7  
10              7  
11              1  
12             11  
13             17  
14             18  
15             19  
16             12  
17              1  
18              9  
19              7  
----------------------------------------------------------------------------------------------------

In [60]:
#count number of data set sizes

orders.groupby(orders['eval_set']).size()


Out[60]:
eval_set
prior    3214874
test       75000
train     131209
dtype: int64

Find out if there is any products from the train sample that has never been bought before


In [68]:
products[~products['product_id'].isin(order_products__prior['product_id'])]


Out[68]:
product_id product_name aisle_id department_id
3629 3630 Protein Granola Apple Crisp 57 14
3717 3718 Wasabi Cheddar Spreadable Cheese 21 16
7044 7045 Unpeeled Apricot Halves in Heavy Syrup 88 13
25382 25383 Chocolate Go Bites 61 19
27498 27499 Non-Dairy Coconut Seven Layer Bar 100 21
36232 36233 Water With Electrolytes 100 21
37702 37703 Ultra Sun Blossom Liquid 90 loads Fabric Enhan... 75 17
43724 43725 Sweetart Jelly Beans 100 21
45970 45971 12 Inch Taper Candle White 101 17
46624 46625 Single Barrel Kentucky Straight Bourbon Whiskey 31 7
49539 49540 Pure Squeezed Lemonade 31 7

In [ ]: