In [1]:
#Let's go ahead and start with some imports
import pandas as pd
from pandas import Series,DataFrame
import numpy as np
from sklearn import linear_model
# For Visualization
import matplotlib.pyplot as plt
import seaborn as sns
#sns.set_style('whitegrid')
sns.set_style("ticks", {"xtick.major.size": 8, "ytick.major.size": 8})
%matplotlib inline
# For reading stock data from yahoo
from pandas.io.data import DataReader
# For time stamps
from datetime import datetime
# For division
from __future__ import division
In [2]:
# The tech stocks we'll use for this analysis
tech_list = ['FB','GOOG','MSFT','AMZN']
# Set up End and Start times for data grab
end = datetime.now()
start = datetime(end.year - 1,end.month,end.day)
#For loop for grabing yahoo finance data and setting as a dataframe
for stock in tech_list:
# Set DataFrame as the Stock Ticker
globals()[stock] = DataReader(stock,'yahoo',start,end)
#Creating a Date column
GOOG['Date']=GOOG.index
MSFT['Date']=MSFT.index
AMZN['Date']=AMZN.index
FB['Date']=FB.index
In [4]:
GOOG.describe()
Out[4]:
In [3]:
GOOG.info()
In [5]:
GOOG['Adj Close'].plot(legend=True,figsize=(10,4))
Out[5]:
In [6]:
FB['Volume'].plot(legend=True,figsize=(10,4))
Out[6]:
In [7]:
MSFT['Volume'].plot(legend=True,figsize=(10,4))
Out[7]:
In [8]:
AMZN['Volume'].plot(legend=True,figsize=(10,4))
Out[8]:
In [9]:
#plot out several moving averages
ma_day = [10,20,50]
for ma in ma_day:
column_name = "MA for %s days" %(str(ma))
FB[column_name]=pd.rolling_mean(FB['Adj Close'],ma)
FB[['Adj Close','MA for 10 days','MA for 20 days','MA for 50 days']].plot(subplots=False,figsize=(10,4))
Out[9]:
In [10]:
ma_day = [10,20,50]
for ma in ma_day:
column_name = "MA for %s days" %(str(ma))
GOOG[column_name]=pd.rolling_mean( GOOG['Adj Close'],ma)
GOOG[['Adj Close','MA for 10 days','MA for 20 days','MA for 50 days']].plot(subplots=False,figsize=(10,4))
Out[10]:
In [11]:
ma_day = [10,20,50]
for ma in ma_day:
column_name = "MA for %s days" %(str(ma))
MSFT[column_name]=pd.rolling_mean(MSFT['Adj Close'],ma)
MSFT[['Adj Close','MA for 10 days','MA for 20 days','MA for 50 days']].plot(subplots=False,figsize=(10,4))
Out[11]:
In [12]:
ma_day = [10,20,50]
for ma in ma_day:
column_name = "MA for %s days" %(str(ma))
AMZN[column_name]=pd.rolling_mean(AMZN['Adj Close'],ma)
AMZN[['Adj Close','MA for 10 days','MA for 20 days','MA for 50 days']].plot(subplots=False,figsize=(10,4))
Out[12]:
In [13]:
#daily return analysis
# We'll use pct_change to find the percent change for each day
FB['Daily Return'] = FB['Adj Close'].pct_change()
# Then we'll plot the daily return percentage
FB['Daily Return'].plot(figsize=(12,4),legend=True,linestyle='--',marker='o')
Out[13]:
In [14]:
#daily return analysis
# We'll use pct_change to find the percent change for each day
GOOG['Daily Return'] = GOOG['Adj Close'].pct_change()
# Then we'll plot the daily return percentage
GOOG['Daily Return'].plot(figsize=(12,4),legend=True,linestyle='--',marker='o')
Out[14]:
In [15]:
#daily return analysis
# We'll use pct_change to find the percent change for each day
AMZN['Daily Return'] = AMZN['Adj Close'].pct_change()
# Then we'll plot the daily return percentage
AMZN['Daily Return'].plot(figsize=(12,4),legend=True,linestyle='--',marker='o')
Out[15]:
In [16]:
#daily return analysis
# We'll use pct_change to find the percent change for each day
MSFT['Daily Return'] = MSFT['Adj Close'].pct_change()
# Then we'll plot the daily return percentage
MSFT['Daily Return'].plot(figsize=(12,4),legend=True,linestyle='--',marker='o')
Out[16]:
In [17]:
#an overall look at the average daily return using a histogram.
#We'll use seaborn to create both a histogram and kde plot on the same figure.
In [18]:
sns.distplot(FB['Daily Return'].dropna(),bins=100,color='purple')
Out[18]:
In [19]:
sns.distplot(AMZN['Daily Return'].dropna(),bins=100,color='Green')
Out[19]:
In [20]:
sns.distplot(MSFT['Daily Return'].dropna(),bins=100,color='red')
Out[20]:
In [21]:
sns.distplot(GOOG['Daily Return'].dropna(),bins=100,color='orange')
Out[21]:
In [22]:
#printing correlation coefficient between GOOGLE and MSFT
np.corrcoef(
GOOG['Adj Close'],MSFT['Adj Close'])
Out[22]:
In [23]:
#plotting the relation between values of adjacent close of GOOGLE and MICROSOFT stock
plt.scatter(GOOG['Adj Close'],MSFT['Adj Close'])
Out[23]:
In [24]:
#We are finding the linear regression between GOOG and MSFT
# Split the data into training sets
x_train=GOOG['Adj Close'][:200]
y_train=MSFT['Adj Close'][:200]
# Split the data into testing sets
x_test=GOOG['Adj Close'][200:]
y_test=MSFT['Adj Close'][200:]
# Create linear regression object
regr = linear_model.LinearRegression()
x_train=x_train.reshape((200,1))
y_train=y_train.reshape((200,1))
# Train the model using the training sets
regr.fit(x_train, y_train)
x_test=x_test.reshape((len(x_test),1))
y_test=y_test.reshape((len(y_test),1))
# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print("Mean squared error: %.2f"
% np.mean((regr.predict(x_test) - y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regr.score(x_test, y_test))
# Plot outputs
plt.scatter(x_test, y_test, color='black')
plt.plot(x_test, regr.predict(x_test), color='blue',
linewidth=3)
plt.xticks(())
plt.yticks(())
plt.show()
In [25]:
# Grab all the closing prices for the tech stock list into one DataFrame
closing_df = DataReader(['FB','GOOG','MSFT','AMZN'],'yahoo',start,end)['Adj Close']
In [26]:
closing_df.head()
Out[26]:
In [27]:
tech_rets = closing_df.pct_change()
In [28]:
# Comparing Google to FB should
sns.jointplot('FB','GOOG',tech_rets,kind='scatter',color='seagreen')
Out[28]:
In [29]:
#use joinplot to compare the daily returns of Amazon and Microsoft
sns.jointplot('AMZN','MSFT',tech_rets,kind='scatter')
Out[29]:
In [30]:
# We can simply call pairplot on our DataFrame for an automatic visual analysis of all the comparisons
sns.pairplot(tech_rets.dropna())
Out[30]:
In [31]:
#sns.PairGrid() for full control of the figure, including what kind of plots go
#in the diagonal, the upper triangle, and the lower triangle.
# Set up our figure by naming it returns_fig, call PairPLot on the DataFrame
returns_fig = sns.PairGrid(tech_rets.dropna())
# Using map_upper we can specify what the upper triangle will look like.
returns_fig.map_upper(plt.scatter,color='purple')
# We can also define the lower triangle in the figure, inclufing the plot type (kde) or the color map (BluePurple)
returns_fig.map_lower(sns.kdeplot,cmap='cool_d')
# Finally we'll define the diagonal as a series of histogram plots of the daily return
returns_fig.map_diag(plt.hist,bins=30)
Out[31]:
In [32]:
# Set up our figure by naming it returns_fig, call PairPLot on the DataFrame
returns_fig = sns.PairGrid(closing_df)
# Using map_upper we can specify what the upper triangle will look like.
returns_fig.map_upper(plt.scatter,color='purple')
# We can also define the lower triangle in the figure, inclufing the plot type (kde) or the color map (BluePurple)
returns_fig.map_lower(sns.kdeplot,cmap='cool_d')
# Finally we'll define the diagonal as a series of histogram plots of the closing price
returns_fig.map_diag(plt.hist,bins=30)
Out[32]:
In [33]:
# Let's go ahead and use sebron for a quick correlation plot for the daily returns
sns.corrplot(tech_rets.dropna(),annot=True)
Out[33]:
In [34]:
from sklearn.svm import SVR
def predict_price(dates, prices, x):
dates = np.reshape(dates,(len(dates), 1)) # converting to matrix of n X 1
#Selecting our SVR model
svr_rbf = SVR(kernel= 'rbf', C=1000, gamma= 0.1)
svr_rbf.fit(dates, prices) # fitting the data points in the models
# plotting the initial datapoints
plt.scatter(dates, prices, color= 'black', label= 'Data')
# plotting the line made by the RBF kernel
plt.plot(dates, svr_rbf.predict(dates), color= 'green', label= 'RBF model')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Support Vector Regression')
plt.legend()
plt.show()
return svr_rbf.predict(x)[0]
#selecting data for training the model
dates=GOOG['Date'][:250]
prices=GOOG['Adj Close'][:250]
#Calling the function and printing the Predicted price
predicted_price = predict_price(dates, prices, 2016-12-27)
print predicted_price
In [35]:
GOOG.tail()
Out[35]:
In [36]:
def predict_price(dates, prices, x):
dates = np.reshape(dates,(len(dates), 1)) # converting to matrix of n X 1
#Selecting our SVR model
svr_rbf = SVR(kernel= 'rbf', C=1000, gamma= 0.1)
svr_rbf.fit(dates, prices) # fitting the data points in the models
# plotting the initial datapoints
plt.scatter(dates, prices, color= 'black', label= 'Data')
# plotting the line made by the RBF kernel
plt.plot(dates, svr_rbf.predict(dates), color= 'green', label= 'RBF model')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Support Vector Regression')
plt.legend()
plt.show()
return svr_rbf.predict(x)[0]
dates=FB['Date'][:250]
prices=FB['Adj Close'][:250]
#calling our function and printing the predicted price
predicted_price = predict_price(dates, prices, 2016-12-27)
print predicted_price
In [37]:
FB.tail()
Out[37]:
In [38]:
def predict_price(dates, prices, x):
dates = np.reshape(dates,(len(dates), 1)) # converting to matrix of n X 1
#Selecting our SVR model
svr_rbf = SVR(kernel= 'rbf', C=1000, gamma= 0.1)
svr_rbf.fit(dates, prices) # fitting the data points in the models
# plotting the initial datapoints
plt.scatter(dates, prices, color= 'black', label= 'Data')
# plotting the line made by the RBF kernel
plt.plot(dates, svr_rbf.predict(dates), color= 'green', label= 'RBF model')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Support Vector Regression')
plt.legend()
plt.show()
return svr_rbf.predict(x)[0]
dates=MSFT['Date'][:250]
prices=MSFT['Adj Close'][:250]
predicted_price = predict_price(dates, prices, 2016-12-27)
print predicted_price
In [39]:
MSFT.tail()
Out[39]:
In [40]:
def predict_price(dates, prices, x):
dates = np.reshape(dates,(len(dates), 1)) # converting to matrix of n X 1
# defining the support vector regression model
svr_rbf = SVR(kernel= 'rbf', C=1000, gamma= 0.1)
# fitting the data points in the models
svr_rbf.fit(dates, prices)
# plotting the initial datapoints
plt.scatter(dates, prices, color= 'black', label= 'Data')
plt.plot(dates, svr_rbf.predict(dates), color= 'green', label= 'RBF model') # plotting the line made by the RBF kernel
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Support Vector Regression')
plt.legend()
plt.show()
return svr_rbf.predict(x)[0]#,svr_lin.predict(x)[0], svr_poly.predict(x)[0],
dates=AMZN['Date'][:250]
prices=AMZN['Adj Close'][:250]
predicted_price = predict_price(dates, prices, 2016-5-30)
print predicted_price
In [41]:
AMZN.tail()
Out[41]:
In [42]:
#Risk Analysis
#the most basic ways using the information we've gathered on daily percentage returns is by
#comparing the expected return with the standard deviation of the daily returns.
# Let's start by defining a new DataFrame as a clenaed version of the original tech_rets DataFrame
rets = tech_rets.dropna()
area = np.pi*20
plt.scatter(rets.mean(), rets.std(),alpha = 0.5,s =area)
# Set the x and y limits of the plot (optional, remove this if you don't see anything in your plot)
plt.ylim([0.01,0.025])
plt.xlim([-0.003,0.004])
#Set the plot axis titles
plt.xlabel('Expected returns')
plt.ylabel('Risk')
# Label the scatter plots, for more info on how this is done, chekc out the link below
# http://matplotlib.org/users/annotations_guide.html
for label, x, y in zip(rets.columns, rets.mean(), rets.std()):
plt.annotate(
label,
xy = (x, y), xytext = (50, 50),
textcoords = 'offset points', ha = 'right', va = 'bottom',
arrowprops = dict(arrowstyle = '-', connectionstyle = 'arc3,rad=-0.3'))
In [43]:
# Note the use of dropna() here, otherwise the NaN values can't be read by seaborn
sns.distplot(FB['Daily Return'].dropna(),bins=100,color='purple')
Out[43]:
In [44]:
rets['FB'].quantile(0.05)
Out[44]:
In [46]:
sns.distplot(MSFT['Daily Return'].dropna(),bins=100,color='Green')
Out[46]:
In [47]:
sns.distplot(AMZN['Daily Return'].dropna(),bins=100,color='Blue')
Out[47]:
In [49]:
sns.distplot(GOOG['Daily Return'].dropna(),bins=100,color='Grey')
Out[49]:
In [51]:
####Value at Risk using the Monte Carlo method
# Set up our time horizon
days = 365
# Now our delta
dt = 1/days
# Now let's grab our mu (drift) from the expected return data we got for AAPL
mu = rets.mean()['GOOG']
# Now let's grab the volatility of the stock from the std() of the average return
sigma = rets.std()['GOOG']
In [52]:
def stock_monte_carlo(start_price,days,mu,sigma):
''' This function takes in starting stock price, days of simulation,mu,sigma, and returns simulated price array'''
# Define a price array
price = np.zeros(days)
price[0] = start_price
# Schok and Drift
shock = np.zeros(days)
drift = np.zeros(days)
# Run price array for number of days
for x in xrange(1,days):
# Calculate Schock
shock[x] = np.random.normal(loc=mu * dt, scale=sigma * np.sqrt(dt))
# Calculate Drift
drift[x] = mu * dt
# Calculate Price
price[x] = price[x-1] + (price[x-1] * (drift[x] + shock[x]))
return price
In [53]:
# Get start price from GOOG.head()
start_price = 569.85
for run in xrange(100):
plt.plot(stock_monte_carlo(start_price,days,mu,sigma))
plt.xlabel("Days")
plt.ylabel("Price")
plt.title('Monte Carlo Analysis for Google')
Out[53]:
In [54]:
# Set a large numebr of runs
runs = 10000
# Create an empty matrix to hold the end price data
simulations = np.zeros(runs)
# Set the print options of numpy to only display 0-5 points from an array to suppress output
np.set_printoptions(threshold=5)
for run in xrange(runs):
# Set the simulation data point as the last stock price for that run
simulations[run] = stock_monte_carlo(start_price,days,mu,sigma)[days-1];
In [55]:
# Now we'lll define q as the 1% empirical qunatile, this basically means that 99% of the values should fall between here
q = np.percentile(simulations, 1)
# Now let's plot the distribution of the end prices
plt.hist(simulations,bins=200)
# Using plt.figtext to fill in some additional information onto the plot
# Starting Price
plt.figtext(0.6, 0.8, s="Start price: $%.2f" %start_price)
# Mean ending price
plt.figtext(0.6, 0.7, "Mean final price: $%.2f" % simulations.mean())
# Variance of the price (within 99% confidence interval)
plt.figtext(0.6, 0.6, "VaR(0.99): $%.2f" % (start_price - q,))
# Display 1% quantile
plt.figtext(0.15, 0.6, "q(0.99): $%.2f" % q)
# Plot a line at the 1% quantile result
plt.axvline(x=q, linewidth=4, color='r')
# Title
plt.title(u"Final price distribution for Google Stock after %s days" % days, weight='bold');