In [1]:
%pylab inline
pylab.style.use('ggplot')
import pandas as pd
import numpy as np
In [2]:
# Load the yahoo finance data for a bunch of stocks
import pandas_datareader as web
tickers = ['AAPL', 'IBM', 'MSFT', 'GOOGL']
daily_data = {ticker: web.get_data_yahoo(ticker, '1/1/2013', '12/31/2016') for ticker in tickers}
In [3]:
# Do we have duplicate dates?
for ticker in tickers:
print(ticker, daily_data[ticker].index.has_duplicates)
In [4]:
# Are number of observations same for all tickers?
for ticker in tickers:
print(ticker, daily_data[ticker].shape)
In [5]:
# Get the APPL dataset
aapl = daily_data['AAPL']
aapl.info()
In [6]:
# Is it sorted by date?
print(aapl.index.is_monotonic)
print(aapl.index.is_monotonic_increasing)
In [7]:
# Resample to business day
print(aapl.index.freq)
# This will introduce nans because some business days are missing at source
aapl = aapl.asfreq('B')
In [8]:
aapl.head()
Out[8]:
In [9]:
aapl.tail()
Out[9]:
In [10]:
# take the open price
aapl_open = aapl['Open']
In [11]:
# Do we have nans?
aapl_open[aapl_open.isnull()].shape
Out[11]:
In [31]:
# What are those days?
nan_days = aapl_open[aapl_open.isnull()].index
nan_days.weekday_name
Out[31]:
In [13]:
# Remove the nan values
valid_aapl_open = aapl_open[aapl_open.notnull()]
In [14]:
# Straight up plot
valid_aapl_open.plot()
Out[14]:
In [15]:
# only for 2016
valid_aapl_open.truncate(before='2016-1-1').head()
Out[15]:
In [16]:
# only for 2016
valid_aapl_open[valid_aapl_open.index.year==2016].head()
Out[16]:
In [17]:
# Between June 2015 and June 2016
v = valid_aapl_open.truncate(before='2015-6', after='2016-6-30')
print(v.head())
print(v.tail())
In [18]:
# Min and max
valid_aapl_open.min(), valid_aapl_open.max()
Out[18]:
In [19]:
# When did min and max occur?
print('Min occurred on: ', valid_aapl_open.idxmin().date())
print('Max occurred on: ', valid_aapl_open.idxmax().date())
In [20]:
# Monthly averages
valid_aapl_open.resample('M').mean().head()
Out[20]:
In [21]:
# Resample using medians
valid_aapl_open.resample('M').median().head()
Out[21]:
In [22]:
# Plot Monthly min/max/avg
sampler = valid_aapl_open.resample('M')
monthly_min_max = sampler.agg({'min': np.min, 'max': np.max})
monthly_min_max.plot(kind='line', title='APPL open price monthly min/max')
Out[22]:
In [23]:
# Rolling statistics
rolling = valid_aapl_open.rolling(5)
five_day_mean = rolling.mean()
five_day_mean.plot(kind='line', title='AAPL open price: 5 day rolling average')
Out[23]:
In [24]:
# EWM
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.ewm.html
# DataFrame.ewm(com=None, span=None, halflife=None, alpha=None,
# min_periods=0, freq=None, adjust=True, ignore_na=False, axis=0)[source]
ewm = valid_aapl_open.ewm(halflife=250, min_periods=250)
smoothed_mean = ewm.mean()
smoothed_mean[smoothed_mean.first_valid_index():].plot(kind='line', title='APPL open price: 250 day EWMA')
Out[24]:
In [25]:
## Construct daily returns and risk model
close_prices = {ticker: df['Close'] for ticker, df in daily_data.items()}
close_prices = pd.concat(close_prices, axis=1)
# Convert to businessdays and ffill the nans
close_prices = close_prices.asfreq('B').ffill(axis=0)
In [26]:
close_prices.head()
Out[26]:
In [56]:
daily_changes = close_prices.diff(1).iloc[1:]
daily_changes.head()
Out[56]:
In [62]:
daily_changes.tail()
Out[62]:
In [59]:
daily_returns = daily_changes / close_prices.values[:-1]
In [63]:
daily_returns.head()
Out[63]:
In [64]:
daily_returns.tail()
Out[64]:
In [73]:
daily_returns.plot(kind='line', subplots=True, figsize=(16, 16))
Out[73]:
In [67]:
smoother = daily_returns.ewm(halflife=20, min_periods=20)
smoothed_cov = smoother.cov()
smoothed_cov = smoothed_cov.iloc[20:]
for day in smoothed_cov.items[:5]:
print(smoothed_cov[day])
In [ ]: