In [1]:
import numpy as np
import pandas as pd
In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
In [3]:
# Grab data
# Faster alternative
# df = pd.read_csv('time_data/walmart_stock.csv',index_col='Date')
df = pd.read_csv('time_data/walmart_stock.csv')
In [4]:
df.head()
Out[4]:
Create a date index from the date column
In [5]:
df['Date'] = df['Date'].apply(pd.to_datetime)
In [6]:
df.head()
Out[6]:
In [7]:
df.set_index('Date',
inplace = True)
In [8]:
df.head()
Out[8]:
Alias | Description |
---|---|
B | business day frequency |
C | custom business day frequency (experimental) |
D | calendar day frequency |
W | weekly frequency |
M | month end frequency |
SM | semi-month end frequency (15th and end of month) |
BM | business month end frequency |
CBM | custom business month end frequency |
MS | month start frequency |
SMS | semi-month start frequency (1st and 15th) |
BMS | business month start frequency |
CBMS | custom business month start frequency |
Q | quarter end frequency |
BQ | business quarter endfrequency |
QS | quarter start frequency |
BQS | business quarter start frequency |
A | year end frequency |
BA | business year end frequency |
AS | year start frequency |
BAS | business year start frequency |
BH | business hour frequency |
H | hourly frequency |
T, min | minutely frequency |
S | secondly frequency |
L, ms | milliseconds |
U, us | microseconds |
N | nanoseconds |
In [9]:
# Our index
df.index
Out[9]:
You need to call resample with the rule parameter, then you need to call some sort of aggregation function. This is because due to resampling, we need some sort of mathematical rule to join the rows by (mean,sum,count,etc...)
In [10]:
# Yearly Means
df.resample(rule = 'A').mean()
Out[10]:
In [11]:
def first_day(entry):
"""
Returns the first instance of the period, regardless of samplling rate.
"""
return entry[0]
In [12]:
df.resample(rule = 'A').apply(first_day)
Out[12]:
In [13]:
df['Close'].resample('A').mean().plot(kind = 'bar')
plt.title('Yearly Mean Close Price for Walmart')
Out[13]:
In [14]:
df['Open'].resample('M').max().plot(kind = 'bar',
figsize = (16, 6))
plt.title('Monthly Max Opening Price for Walmart')
Out[14]:
That is it! Up next we'll learn about time shifts!