In [17]:
from matplotlib import pyplot as plt
from pandas_datareader import data
from sklearn.linear_model import LinearRegression
import datetime
import numpy as np
import pandas as pd
In [4]:
# Define the instruments to download.
# We would like to see Apple, Microsoft and the S&P500 index.
# FB = Facebook
tickers = ['AAPL', 'MSFT', '^GSPC', 'FB']
# Define which online source one should use
data_source = 'yahoo'
# We would like all available data from 01/01/2000 until 12/31/2016.
start_date = '2016-12-25'
end_date = '2017-12-25'
# User pandas_reader.data.DataReader to load the desired data. As simple as that.
panel_data = data.DataReader(tickers, data_source, start_date, end_date)
In [5]:
close = panel_data.ix['Close']
In [6]:
close.plot()
plt.grid(True)
plt.show()
In [7]:
fb_close = close['FB'].sort_index(ascending=True)
fb_close.plot()
plt.grid(True)
plt.show()
In [9]:
model = LinearRegression(fit_intercept=True)
y = fb_close.values
x = fb_close.reset_index().index
s = fb_close.size # size
x_shared = fb_close.index.values
model.fit(x[:, np.newaxis], y)
xfit = np.linspace(0, s-1, s)
yfit = model.predict(xfit[:, np.newaxis])
# data
plt.scatter(x_shared, y, s=2, color='red')
# prediction
plt.plot(x_shared, yfit)
plt.grid(True)
plt.show()
In [15]:
# Getting just the adjusted closing prices. This will return a Pandas DataFrame
# The index in this DataFrame is the major index of the panel_data.
adj_close = panel_data.ix['Adj Close']
adj_close.describe()
Out[15]:
In [22]:
# Getting all weekdays between 01/01/2000 and 12/31/2016
all_weekdays = pd.date_range(start=start_date, end=end_date, freq='B')
# How do we align the existing prices in adj_close with our new set of dates?
# All we need to do is reindex adj_close using all_weekdays as the new index
adj_close = adj_close.reindex(all_weekdays, method='ffill')
# Reindexing will insert missing values (NaN) for the dates that were not present
# in the original set. To cope with this, we can fill the missing by replacing them
# with the latest available price for each instrument.
# adj_close = adj_close.fillna(method='ffill')
adj_close.describe()
Out[22]:
In [25]:
adj_close['FB'].plot()
plt.grid(True)
plt.show()
In [29]:
# Get the MSFT time series. This now returns a Pandas Series object indexed by date.
msft = adj_close.ix[:, 'MSFT']
# Calculate the 20 and 100 days moving averages of the closing prices
short_rolling_msft = msft.rolling(window=10).mean()
long_rolling_msft = msft.rolling(window=50).mean()
# Plot everything by leveraging the very powerful matplotlib package
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(msft.index, msft, label='MSFT')
ax.plot(short_rolling_msft.index, short_rolling_msft, label='10 days rolling')
ax.plot(long_rolling_msft.index, long_rolling_msft, label='50 days rolling')
ax.set_xlabel('Date')
ax.set_ylabel('Adjusted closing price ($)')
ax.legend()
plt.show()