In [1]:
# import libaries
import numpy as np
import pandas as pd
import matplotlib.pyplot as pyplt
from IPython.display import Image
import seaborn as sns

In [2]:
# Plot in ipython notebook
%matplotlib inline

In [3]:
# Set Display Options
pd.options.display.max_rows = 15

Kaggle Walmart Sales Data:


In [4]:
stores = pd.read_csv('kaggle_walmart_data/stores.csv')
sales = pd.read_csv('kaggle_walmart_data/train.csv')
features = pd.read_csv('kaggle_walmart_data/features.csv')

In [5]:
print('Rows of Weekly Sales: ', len(sales))


Rows of Weekly Sales:  421570

In [6]:
sales.dtypes


Out[6]:
Store             int64
Dept              int64
Date             object
Weekly_Sales    float64
IsHoliday          bool
dtype: object

In [7]:
sales.head()


Out[7]:
Store Dept Date Weekly_Sales IsHoliday
0 1 1 2010-02-05 24924.50 False
1 1 1 2010-02-12 46039.49 True
2 1 1 2010-02-19 41595.55 False
3 1 1 2010-02-26 19403.54 False
4 1 1 2010-03-05 21827.90 False

In [8]:
sales.Date = pd.to_datetime(sales.Date)

In [9]:
sales.dtypes


Out[9]:
Store                    int64
Dept                     int64
Date            datetime64[ns]
Weekly_Sales           float64
IsHoliday                 bool
dtype: object

In [10]:
def topn(group, field, n=5):
    return group.sort_index(by=field, ascending=False)[:n]

sales.groupby('Store').apply(topn, 'Weekly_Sales', 3)


Out[10]:
Store Dept Date Weekly_Sales IsHoliday
Store
1 7741 1 72 2011-11-25 203670.47 True
7689 1 72 2010-11-26 196810.42 True
9325 1 92 2011-12-23 194839.60 False
2 17896 2 72 2010-11-26 285353.53 True
11148 2 7 2010-12-24 264837.42 False
17948 2 72 2011-11-25 263558.03 True
3 25459 3 38 2010-02-05 155897.94 False
... ... ... ... ... ... ...
43 403842 43 92 2011-09-09 101792.33 True
44 408236 44 38 2012-10-26 66629.98 False
408232 44 38 2012-09-28 65599.95 False
408204 44 38 2012-03-16 64719.71 False
45 419193 45 72 2010-11-26 240758.86 True
419245 45 72 2011-11-25 198041.11 True
412837 45 7 2010-12-24 166757.11 False

135 rows × 5 columns


In [14]:
sales_monthly_store1 = sales[sales.Store == 1].set_index('Date').sort_index().resample('M', how='sum')

In [16]:
sales_monthly.Weekly_Sales.plot()


Out[16]:
<matplotlib.axes.AxesSubplot at 0x10ef94710>

In [ ]: