In [1]:
import pandas as pd
import numpy as np
In [2]:
sales = pd.read_csv('foodmart.sales.tsv', sep = '\t', header = 0, parse_dates = [2])
In [3]:
sales.head()
Out[3]:
In [4]:
products = pd.read_csv('foodmart.products.tsv', sep = '\t', header = 0)
In [5]:
products.head()
Out[5]:
In [6]:
sales = sales.merge(products[['product_id', 'product_name']],
on = ['product_id'], how = 'inner')
In [7]:
sales.head()
Out[7]:
In [8]:
sparse_sales = pd.pivot_table(sales, values='sales', index=['date', 'store_id'],
columns=['product_name'], fill_value = 0)
In [9]:
sparse_sales.head()
Out[9]:
In [10]:
sales_correlation = sparse_sales.corr()
In [11]:
sales_correlation.head()
Out[11]:
In [14]:
product_name = 'American Chicken Hot Dogs'
sales_correlation[[product_name]].sort_values(product_name, ascending = True).head()
Out[14]:
In [15]:
min_corr = pd.DataFrame(sales_correlation.min())
min_corr.columns = ['min']
min_corr.sort_values(by = 'min').head()
Out[15]:
In [16]:
max_corr = pd.DataFrame(sales_correlation.apply(lambda x :
np.max(filter(lambda x : x != 1., x)),
axis = 1))
max_corr.columns = ['max']
max_corr.sort_values(by = 'max', ascending = False).head()
Out[16]:
In [17]:
product_name = 'Plato French Roast Coffee'
sales_correlation[[product_name]].sort_values(product_name, ascending = False).head()
Out[17]: