In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [4]:
aisles = pd.read_csv('data/aisles.csv', index_col='aisle_id').to_dict()['aisle']
depart = pd.read_csv('data/departments.csv', index_col='department_id').to_dict()['department']
products_df = pd.read_csv('data/products.csv', index_col='product_id')
products_df['aisle'] = products_df.aisle_id.map(lambda x: aisles[x])
products_df['department'] = products_df.department_id.map(lambda x: depart[x])
products_df.drop(['aisle_id', 'department_id'], axis=1, inplace=True)
In [5]:
prior_df = pd.read_csv('data/order_products__prior.csv', index_col='order_id')
train_df = pd.read_csv('data/order_products__train.csv', index_col='order_id')
orders_df = pd.read_csv('data/orders.csv', index_col='order_id')
In [6]:
gb1 = orders_df.groupby(orders_df.user_id).days_since_prior_order.aggregate(np.sum)
data = gb1.value_counts()
plt.figure(figsize=(200,20))
sns.barplot(data.index, data.values)
Out[6]:
In [7]:
sns.countplot(orders_df.eval_set)
Out[7]:
In [38]:
hdf = orders_df.groupby(['order_dow', 'order_hour_of_day']).size().reset_index().pivot("order_dow", "order_hour_of_day", 0)
sns.heatmap(hdf)
Out[38]:
In [73]:
s = prior_df[prior_df.reordered==1].groupby('product_id').size()
s = s.order(ascending=False)
s /= sum(s)
In [102]:
orders_df.order_hour_of_day
Out[102]:
In [ ]: