In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
train = pd.read_csv('data/train.csv')
print(train[:5])
In [3]:
train['Date'] = pd.to_datetime(train['Date'])
train[:5]['Date'].dt.dayofweek
Out[3]:
In [13]:
train['DayOfWeek'] = train['Date'].dt.dayofweek
train['Month'] = train['Date'].dt.month
train['Year'] = train['Date'].dt.year
avg_month = train[['Sales', 'Month']].groupby('Month').mean()
avg_month.plot(kind='bar')
avg_day = train[['Sales', 'DayOfWeek']].groupby('DayOfWeek').mean()
avg_day.plot(kind='bar')
Out[13]:
In [17]:
#group_dayofweek = train.groupby('DayOfWeek')
#sales by day of week
sale_dayofweek = pd.pivot_table(train, values='Sales', index=['Year','Store'], columns=['DayOfWeek'])
#sales by month
sale_month = pd.pivot_table(train, values='Sales', index=['Year','Store'], columns=['Month'])
sale_month[:5]
Out[17]:
In [18]:
sale_dayofweek.plot(kind='box')
sale_month.plot(kind='box')
Out[18]:
In [ ]: