In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
tesla = pd.read_csv('Tesla_Stock.csv', parse_dates= True, index_col='Date')
tesla.head()
Out[2]:
In [3]:
ford = pd.read_csv('Ford_Stock.csv', parse_dates= True, index_col='Date')
ford.head()
Out[3]:
In [4]:
gm = pd.read_csv('GM_Stock.csv', parse_dates= True, index_col='Date')
gm.head()
Out[4]:
In [5]:
fig = plt.figure(figsize=(16,8))
tesla['Open'].plot(label = 'Tesla')
gm['Open'].plot(label = 'GM')
ford['Open'].plot(label = 'Ford')
plt.title('Open Price')
plt.legend()
Out[5]:
In [6]:
fig = plt.figure(figsize=(16,8))
tesla['Volume'].plot(label = 'Tesla')
gm['Volume'].plot(label = 'gm')
ford['Volume'].plot(label = 'ford')
plt.title('Volume Traded')
plt.legend()
Out[6]:
In [7]:
# https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.idxmax.html
# https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.name.html
ford.loc[ford['Volume'].idxmax()].name
Out[7]:
In [8]:
ford['Volume'].argmax()
Out[8]:
In [12]:
# https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.name.html
ford.loc[ford['Volume'].idxmax()].name
Out[12]:
In [9]:
tesla['Total Traded'] = tesla['Open'] * tesla['Volume']
tesla.head()
Out[9]:
In [10]:
ford['Total Traded'] = ford['Open'] * ford['Volume']
ford.head()
Out[10]:
In [11]:
gm['Total Traded'] = gm['Open'] * gm['Volume']
gm.head()
Out[11]:
In [13]:
fig = plt.figure(figsize=(16,8))
tesla['Total Traded'].plot(label = 'Tesla')
gm['Total Traded'].plot(label = 'GM')
ford['Total Traded'].plot(label = 'Ford')
plt.legend()
Out[13]:
In [14]:
tesla.loc[tesla['Total Traded'].idxmax()].name
Out[14]:
In [15]:
tesla['Total Traded'].argmax()
Out[15]:
In [16]:
gm['MA50'] = gm['Open'].rolling(window=50).mean()
gm['MA200'] = gm['Open'].rolling(window=200).mean()
gm[['Open','MA50', 'MA200']].plot(figsize=(16,8))
Out[16]:
In [17]:
from pandas.plotting import scatter_matrix
# https://stackoverflow.com/questions/30986989/reindex-a-dataframe-with-duplicate-index-values
# https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.rename.html
# Either use rename or use below
df = pd.concat([tesla['Open'], gm['Open'], ford['Open']], axis = 1)
df.columns = ['Tesla Open', 'GM Open', 'Ford Open']
df = pd.DataFrame(pd.concat([tesla['Open'].rename('Tesla Open'), gm['Open'].rename('GM Open'), ford['Open'].rename('Ford Open')], axis = 1))
df.head()
# https://stackoverflow.com/questions/43801637/pandas-legend-for-scatter-matrix
# hist_kwds = historgram keywords
scatter_matrix(df, alpha=0.2, figsize=(8, 8), diagonal='hist', hist_kwds={'bins':50});
In [18]:
# https://matplotlib.org/examples/pylab_examples/finance_demo.html
from matplotlib.dates import DateFormatter, WeekdayLocator, DayLocator, MONDAY, date2num
from matplotlib.finance import candlestick_ohlc
# Creating a ford dataframe suitable as per our needs
ford_reset = ford.loc['2012-01'].reset_index()
ford_reset
Out[18]:
In [19]:
ford_reset.info()
In [20]:
ford_reset['date_ax'] = ford_reset['Date'].apply(date2num)
ford_reset
Out[20]:
In [21]:
list_of_cols = ['date_ax', 'Open', 'High', 'Low', 'Close']
ford_values = [tuple(vals) for vals in ford_reset[list_of_cols].values]
ford_values
Out[21]:
In [22]:
mondays = WeekdayLocator(MONDAY) # major ticks on the mondays
alldays = DayLocator() # minor ticks on the days
weekFormatter = DateFormatter('%b %d') # e.g., Jan 12
dayFormatter = DateFormatter('%d') # e.g., 12
In [24]:
fig, ax = plt.subplots()
fig.subplots_adjust(bottom=0.2)
ax.xaxis.set_major_locator(mondays)
ax.xaxis.set_minor_locator(alldays)
ax.xaxis.set_major_formatter(weekFormatter)
#plot_day_summary(ax, quotes, ticksize=3)
candlestick_ohlc(ax, ford_values, width=0.6, colorup = 'g', colordown='r');
$ r_t = \frac{p_t}{p_{t-1}} -1$
This defines r_t (return at time t) as equal to the price at time t divided by the price at time t-1 (the previous day) minus 1. Basically this just informs you of your percent gain (or loss) if you bought the stock on day and then sold it the next day. While this isn't necessarily helpful for attempting to predict future values of the stock, its very helpful in analyzing the volatility of the stock. If daily returns have a wide distribution, the stock is more volatile from one day to the next. Let's calculate the percent returns and then plot them with a histogram, and decide which stock is the most stable!
In [25]:
# Using the shift method
tesla['returns'] = (tesla['Close'] / tesla['Close'].shift(1)) - 1
In [26]:
# https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.pct_change.html
tesla['returns'] = tesla['Close'].pct_change()
tesla.head()
Out[26]:
In [27]:
ford['returns'] = ford['Close'].pct_change()
ford.head()
Out[27]:
In [28]:
gm['returns'] = gm['Close'].pct_change()
gm.head()
Out[28]:
In [29]:
ford['returns'].plot.hist(bins=100, grid=True)
Out[29]:
In [30]:
gm['returns'].plot.hist(bins=100, grid=True)
Out[30]:
In [31]:
tesla['returns'].plot.hist(bins=100, grid=True)
Out[31]:
In [32]:
tesla['returns'].hist(bins=100, label='Tesla', figsize=(10,8), alpha=0.4)
gm['returns'].hist(bins=100, label='GM', figsize=(10,8), alpha=0.4)
ford['returns'].hist(bins=100, label='Ford', figsize=(10,8), alpha=0.4)
plt.legend();
In [36]:
df = pd.concat([tesla['returns'], gm['returns'],ford['returns']], axis = 1)
df.columns = ['Tesla','GM','Ford']
df.plot.kde(figsize=(12,6))
Out[36]:
In [35]:
df.plot.box(figsize=(8,12))
Out[35]:
In [37]:
scatter_matrix(df, alpha=0.2, figsize=(8, 8), diagonal='hist', hist_kwds={'bins':50});
In [38]:
df.plot(kind='scatter', x='Ford', y='GM', alpha=0.5, figsize=(11,8))
Out[38]:
Great! Now we can see which stock was the most wide ranging in daily returns (you should have realized it was Tesla, our original stock price plot should have also made that obvious).
With daily cumulative returns, the question we are trying to answer is the following, if I invested $1 in the company at the beginning of the time series, how much would is be worth today? This is different than just the stock price at the current day, because it will take into account the daily returns. Keep in mind, our simple calculation here won't take into account stocks that give back a dividend. Let's look at some simple examples:
Lets us say there is a stock 'ABC' that is being actively traded on an exchange. ABC has the following prices corresponding to the dates given
Date Price
01/01/2018 10
01/02/2018 15
01/03/2018 20
01/04/2018 25
Daily Return : Daily return is the profit/loss made by the stock compared to the previous day. (This is what ew just calculated above). A value above one indicates profit, similarly a value below one indicates loss. It is also expressed in percentage to convey the information better. (When expressed as percentage, if the value is above 0, the stock had give you profit else loss). So for the above example the daily returns would be
Date Daily Return %Daily Return
01/01/2018 10/10 = 1 -
01/02/2018 15/10 = 3/2 50%
01/03/2018 20/15 = 4/3 33%
01/04/2018 25/20 = 5/4 20%
Cumulative Return: While daily returns are useful, it doesn't give the investor a immediate insight into the gains he had made till date, especially if the stock is very volatile. Cumulative return is computed relative to the day investment is made. If cumulative return is above one, you are making profits else you are in loss. So for the above example cumulative gains are as follows
Date Cumulative Return %Cumulative Return
01/01/2018 10/10 = 1 100 %
01/02/2018 15/10 = 3/2 150 %
01/03/2018 20/10 = 2 200 %
01/04/2018 25/10 = 5/2 250 %
The formula for a cumulative daily return is:
$ i_i = (1+r_t) * i_{t-1} $
Here we can see we are just multiplying our previous investment at i at t-1 by 1+our percent returns. Pandas makes this very simple to calculate with its cumprod() method. Using something in the following manner:
df[daily_cumulative_return] = ( 1 + df[pct_daily_return] ).cumprod()
In [40]:
# cumprod - cumulative product
tesla['Cumulative Return'] = (1 + tesla['returns']).cumprod()
tesla.head()
Out[40]:
In [41]:
ford['Cumulative Return'] = (1 + ford['returns']).cumprod()
ford.head()
Out[41]:
In [42]:
gm['Cumulative Return'] = (1 + gm['returns']).cumprod()
gm.head()
Out[42]:
In [43]:
fig = plt.figure(figsize=(16,8))
tesla['Cumulative Return'].plot(label = 'Tesla')
gm['Cumulative Return'].plot(label = 'GM')
ford['Cumulative Return'].plot(label = 'Ford')
plt.title('Cumulative Return')
plt.legend()
Out[43]:
In [ ]: