Find Relationships between Stocks


In [24]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pandas_datareader.data as web
from datetime import datetime
%matplotlib inline

In [2]:
# set dates
end = datetime.now()
start = datetime(end.year-2, end.month, end.day)

1. Plot Multiple Stocks


In [19]:
FCT = web.DataReader("J69U.SI", 'yahoo', start, end)
CMT = web.DataReader("C38U.SI", 'yahoo', start, end)
SGX = web.DataReader("S68.SI", 'yahoo', start, end)
SLA = web.DataReader("5CP.SI", 'yahoo', start, end)
MIT = web.DataReader("ME8U.SI", 'yahoo', start, end)
SING = web.DataReader("Z74.SI", 'yahoo', start, end)
FIRST = web.DataReader("AW9U.SI", 'yahoo', start, end)


# create new dataframe with just closing price for each stock
df = pd.DataFrame({'FCT': FCT['Adj Close'], 'CMT': CMT['Adj Close'],
                   'SGX': SGX['Adj Close'], 'SLA': SLA['Adj Close'],
                   'MIT': MIT['Adj Close'], 'SING': SING['Adj Close'],
                   'FIRST': FIRST['Adj Close']})
df.head(2)


Out[19]:
CMT FCT FIRST MIT SGX SING SLA
Date
2015-02-16 1.885 1.801 1.18166 1.33454 7.581 3.99 0.98637
2015-02-17 1.876 1.810 1.16853 1.33882 7.599 3.96 0.96733

In [20]:
df.plot(figsize=(10,4))
plt.ylabel('Price')


Out[20]:
<matplotlib.text.Text at 0x11d20b150>

2. Normalising Multiple Stocks


In [21]:
returnfstart = df.apply(lambda x: x / x[0])
returnfstart.plot(figsize=(10,4)).axhline(1, lw=1, color='black')
plt.ylabel('Return From Start Price')


Out[21]:
<matplotlib.text.Text at 0x11c76a210>

In [22]:
df2=df.pct_change()
df2.head(3)


Out[22]:
CMT FCT FIRST MIT SGX SING SLA
Date
2015-02-16 NaN NaN NaN NaN NaN NaN NaN
2015-02-17 -0.004775 0.004997 -0.011111 0.003207 0.002374 -0.007519 -0.019303
2015-02-18 0.028785 -0.009945 -0.003748 0.006386 0.003685 0.007576 0.015744

In [7]:
df2.plot(figsize=(10,4))
plt.axhline(0, color='black', lw=1)
plt.ylabel('Daily Percentage Return')


Out[7]:
<matplotlib.text.Text at 0x11b6c4f50>

3. Finding Risk vs Returns


In [23]:
import numpy as np

# set size of circles
area = np.pi*50

# note mean represents average returns, stdev represents volatility and risk
plt.scatter(df2.mean(), df2.std(), alpha = 0.5, s =area)

# see this link http://matplotlib.org/users/annotations_guide.html#plotting-guide-annotation
for label, x, y in zip(df2.columns, df2.mean(), df2.std()):
    plt.annotate(label, 
                 xy = (x, y), 
                 xytext = (50, 50),
                 textcoords = 'offset points', 
                 ha = 'center', 
                 va = 'bottom',
                 arrowprops = dict(arrowstyle='-', connectionstyle='angle, angleA=90, angleB=180, rad=5'))

plt.ylabel('Risk / Stdev / Volatility')
plt.xlabel('Avg Return / Mean of Daily Pct Change')
# set axes limits
plt.ylim([0, 0.055])
plt.xlim([-0.0005, 0.001])


Out[23]:
(-0.0005, 0.001)

4. Plot Correlations


In [119]:
sns.jointplot('MIT', 'SLA', df, kind='scatter', color='seagreen')


Out[119]:
<seaborn.axisgrid.JointGrid at 0x15a15c490>

In [120]:
plt.figure(figsize=(8,8))
sns.corrplot(df.dropna())


Out[120]:
<matplotlib.axes._subplots.AxesSubplot at 0x15a7eaf90>

In [124]:
sns.pairplot(df.dropna())


Out[124]:
<seaborn.axisgrid.PairGrid at 0x15f8f9a10>

In [99]:
plt.figure(figsize=(10,10))
# use pairgrid for full control of each plot
fig = sns.PairGrid(df.dropna())

# define top, bottom and diagonal plots
fig.map_upper(plt.scatter, color='purple')
fig.map_lower(sns.kdeplot, cmap='cool_d')
fig.map_diag(sns.distplot, bins=30)


Out[99]:
<seaborn.axisgrid.PairGrid at 0x15104ccd0>
<matplotlib.figure.Figure at 0x15104ce10>