In [ ]:
import pandas as pd
from pandas_datareader import data, wb
import datetime
# We will look at stock prices over the past year, starting at January 1, 2016
start = datetime.datetime(2016,1,1)
end = datetime.date.today()
# Let's get Apple stock data; Apple's ticker symbol is AAPL
# First argument is the series we want, second is the source ("yahoo" for Yahoo! Finance), third is the start date, fourth is the end date
apple = data.get_data_yahoo("AAPL", start, end)
apple.head()
N values: df.head(N)df['column_name'] or df.column_name.df[['col1', 'col2']]df.query('Date>2017')df[df.Close>140]df.describe()df.groupby(pd.TimeGrouper(freq='60D')).mean()
In [ ]:
apple.Low.head()
In [ ]:
apple[['Low','High']].head()
In [ ]:
apple.query('Date>2017').head()
In [ ]:
apple[apple.Close>140].head()
In [ ]:
apple.describe()
In [ ]:
apple.groupby(pd.TimeGrouper(freq='60D')).mean()
In [ ]:
import pylab as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (15, 9) # Change the size of plots
apple[['Adj Close','Low','High']].plot(grid = True)
In [ ]:
microsoft = data.get_data_yahoo("MSFT", start, end)
google = data.get_data_yahoo("GOOG", start, end)
# Below I create a DataFrame consisting of the adjusted closing price of these stocks, first by making a list of these objects and using the join method
stocks = pd.DataFrame({"AAPL": apple["Adj Close"],
"MSFT": microsoft["Adj Close"],
"GOOG": google["Adj Close"]})
stocks.head()
In [ ]:
# plot here
stocks.plot(grid = True,logy=True)
Read http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.rolling.html and apply a 20-day rolling averaging window to the apple adjusted close.
In [ ]:
# stocks['AAPL']...
ax = stocks['AAPL'].rolling(window=20, center=True).mean().plot(grid=True)
ax = stocks['AAPL'].plot(grid=True,ax=ax)
plt.legend()
We'd like to normalize the share price so that we can fit all of the stock trends on one plot. To do that let's normalize by the price on the start date for each stock. Check the http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.apply.html documentation to apply this scaling to each column.
Plot resulting profit time series.
In [ ]:
# apply
stock_return = stocks.apply(lambda x: x / x[0])
stock_return.head()
In [ ]:
stock_return.plot(grid = True)
So far we manually specified a few ticker symbols. In order to do larger scale analysis, we need to pull data for more tickers. To do that, we need to find a data source with the ticker names. Fortunately, Wikipedia has all of the S&P tickers in a html table at https://en.wikipedia.org/wiki/List_of_S%26P_500_companies. Use pd.read_html to grab this ticker data and then loop through all ticker and pull down the stock data for each ticker.
In [ ]:
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
sp500 = pd.read_html(url,header=0)[0]
In [ ]:
output = {}
for ticker in sp500['Ticker symbol'][:200]:
try:
output[ticker] = data.get_data_yahoo(ticker, start, end)['Adj Close']
except:
continue
df_sp500 = pd.DataFrame(output)
df_sp500.head()
In [ ]:
stock_return_sp500 = df_sp500.apply(lambda x: x / x[0])
stock_return_sp500.plot(grid = True)
plt.legend([])
In [ ]:
plt.hist(stock_return_sp500.ix[-1,:],100)
plt.grid(True)
plt.show()
In [ ]:
stock_return_sp500.ix[-1,stock_return_sp500.ix[-1,:]>2]