Risk


In [55]:
start = '2002-01-02'
end = '2015-11-09'
df = get_pricing('SPY', fields=['close_price'], start_date=start, end_date=end)

In [56]:
df.plot()


Out[56]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4376f07b10>

In [57]:
daily_returns = (df.close_price.shift(-1) - df.close_price) / df.close_price
df = df.ix[1:] # remove empty value
df['daily_returns'] = daily_returns

In [58]:
mean_annual_return = df.daily_returns.mean() * 252
annualized_std = df.daily_returns.std() * (252**.5)

In [59]:
mean_excess_return = mean_annual_return - .04
sharpe_ratio = mean_excess_return / annualized_std
opt_leverage = mean_excess_return / (annualized_std**2)

In [60]:
print sharpe_ratio


0.106973189664

In [61]:
print opt_leverage


0.545621394786

In [62]:
capital = 100000
purchase = int(capital * opt_leverage)
print "If the kelly optimal leverage is {} and you have {} to invest, you should \
buy (and/or borrow to buy) {} worth of SPY, under the assumption you believe the \
expected values of your returns and standard deviations (which \
assumes they are normally distributed)".format(opt_leverage, capital, purchase)


If the kelly optimal leverage is 0.545621394786 and you have 100000 to invest, you should buy (and/or borrow to buy) 54562 worth of SPY, under the assumption you believe the expected values of your returns and standard deviations (which assumes they are normally distributed)

Regression & Plotting


In [63]:
import pandas as pd
import matplotlib.pyplot as plt

In [64]:
start = '2003-09-22'
end = '2013-09-17'
prices = get_pricing('WFM', fields='price', start_date=start, end_date=end)
prices.plot()


Out[64]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4374dc1fd0>

In [65]:
prices.describe()


Out[65]:
count    2515.000000
mean       26.004168
std        12.064238
min         4.095000
25%        17.880000
50%        23.430000
75%        32.990000
max        58.150000
Name: Equity(8158 [WFM]), dtype: float64

In [66]:
pd.rolling_mean(prices, 30).plot()


Out[66]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4374cfb890>

In [67]:
import numpy as np
from statsmodels import regression
import statsmodels.api as sm

In [68]:
prices.index


Out[68]:
DatetimeIndex(['2003-09-22', '2003-09-23', '2003-09-24', '2003-09-25',
               '2003-09-26', '2003-09-29', '2003-09-30', '2003-10-01',
               '2003-10-02', '2003-10-03', 
               ...
               '2013-09-04', '2013-09-05', '2013-09-06', '2013-09-09',
               '2013-09-10', '2013-09-11', '2013-09-12', '2013-09-13',
               '2013-09-16', '2013-09-17'],
              dtype='datetime64[ns]', length=2515, freq=None, tz='UTC')

In [69]:
prices.values


Out[69]:
array([ 13.638,  13.539,  13.308, ...,  57.11 ,  57.71 ,  58.15 ])

In [70]:
def linreg(X, Y):
    # fit
    X = sm.add_constant(X)
    model = regression.linear_model.OLS(Y, X).fit()
    a = model.params[0]
    b = model.params[1]
    X = X[:, 1]

    # summarize and plot
    X2 = np.linspace(X.min(), X.max(), 100)
    Y_hat = X2 * b + a
    plt.scatter(X, Y, alpha=0.3) # Plot the raw data
    plt.plot(X2, Y_hat, 'r', alpha=0.9);  # Add the regression line, colored in red
    plt.xlabel('SPY')
    plt.ylabel('WFM')
    return model.summary()

In [71]:
start = '2002-01-02'
end = '2015-11-09'
asset = get_pricing('WFM', fields='price', start_date=start, end_date=end)
benchmark = get_pricing('SPY', fields='price', start_date=start, end_date=end)

# We have to take the percent changes to get to returns
# Get rid of the first (0th) element because it is NAN
r_a = asset.pct_change()[1:]
r_b = benchmark.pct_change()[1:]

linreg(r_b.values, r_a.values)


Out[71]:
OLS Regression Results
Dep. Variable: y R-squared: 0.249
Model: OLS Adj. R-squared: 0.249
Method: Least Squares F-statistic: 1155.
Date: Wed, 18 Nov 2015 Prob (F-statistic): 7.51e-219
Time: 22:08:03 Log-Likelihood: 8492.7
No. Observations: 3488 AIC: -1.698e+04
Df Residuals: 3486 BIC: -1.697e+04
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
const 0.0004 0.000 0.986 0.324 -0.000 0.001
x1 0.9880 0.029 33.979 0.000 0.931 1.045
Omnibus: 2047.688 Durbin-Watson: 1.992
Prob(Omnibus): 0.000 Jarque-Bera (JB): 327316.963
Skew: 1.755 Prob(JB): 0.00
Kurtosis: 50.327 Cond. No. 81.0

Data


In [72]:
from odo import odo
import pandas as pd
from quantopian.interactive.data.eventvestor import clinical_trials_free as clinical_trials

In [73]:
# Blaze
clinical_trials.dshape


Out[73]:
dshape("""var * {
  event_id: ?float64,
  asof_date: datetime,
  trade_date: ?datetime,
  symbol: ?string,
  event_type: ?string,
  event_headline: ?string,
  clinical_phase: ?string,
  clinical_scope: ?string,
  clinical_result: ?string,
  product_name: ?string,
  event_rating: ?float64,
  timestamp: datetime,
  sid: ?int64
  }""")

In [74]:
clinical_trials.count()


Out[74]:
48448

In [75]:
clinical_trials[:3]


Out[75]:
event_id asof_date trade_date symbol event_type event_headline clinical_phase clinical_scope clinical_result product_name event_rating timestamp sid
0 138303 2007-01-03 2007-01-03 IMCL Clinical Trials ImClone Systems Commences Patient Treatment in... Phase I NaN NaN IMC-3G3 1 2007-01-04 3871
1 138180 2007-01-04 2007-01-04 DNA Clinical Trials Genentech Announces Positive Results From Rand... Phase II NaN Positive Pertuzumab 1 2007-01-05 24847
2 952759 2007-01-04 2007-01-04 VICL Clinical Trials Vical Initiates Pivotal Phase 3 Trial of Allov... Phase III NaN NaN Allovectin-7 1 2007-01-05 8763

In [76]:
phase_three = clinical_trials[clinical_trials.clinical_phase == "Phase III"][['timestamp', 'sid','product_name']].sort('timestamp')
phase_three


Out[76]:
timestamp sid product_name
0 2007-01-05 8763 Allovectin-7
1 2007-01-09 1416 FENTORA
2 2007-01-11 3871 ERBITUX
3 2007-01-25 8763 Allovectin-7
4 2007-02-09 24415 Xibrom
5 2007-02-23 24847 Avastin
6 2007-04-05 3871 ERBITUX (Cetuximab)
7 2007-04-11 3871 ERBITUX
8 2007-04-17 3871 ERBITUX (Cetuximab)
9 2007-04-26 23846 BEMA Fentanyl
10 2007-04-27 5847 Nuvion

In [77]:
gsk_sid = symbols('GSK').sid
gsk = clinical_trials[clinical_trials.sid == gsk_sid].sort('timestamp', ascending=False)
gsk_df = odo(gsk, pd.DataFrame)
# filter down to the Phase 4 trials
gsk_df = gsk_df[gsk_df.clinical_phase == "Phase III"]

In [78]:
gsk_df.head(1)


Out[78]:
event_id asof_date trade_date symbol event_type event_headline clinical_phase clinical_scope clinical_result product_name event_rating timestamp sid
0 1647384 2013-11-12 2013-11-12 GSK Clinical Trials GlaxoSmithKline Announces Phase III Stability ... Phase III NaN Negative Darapladib 1 2013-11-13 3242

In [79]:
gsk_df.drop(gsk_df.columns[[0, 2, 3, 7, 11, 12]], axis=1, inplace=True)

In [80]:
gsk_df = gsk_df[gsk_df.clinical_result != u'NaN']

In [81]:
gsk_df.head()


Out[81]:
asof_date event_type event_headline clinical_phase clinical_result product_name event_rating
0 2013-11-12 Clinical Trials GlaxoSmithKline Announces Phase III Stability ... Phase III Negative Darapladib 1
1 2013-09-05 Clinical Trials GlaxoSmithKline's MAGE-A3 Vaccine Fails to Mee... Phase III Negative MAGE-A3 1
2 2012-12-19 Clinical Trials GlaxoSmithKline, Amicus Therapeutics Announce ... Phase III Negative Migalastat HCl 1
5 2012-07-11 Clinical Trials Shionogi-ViiV Healthcare Reports Positive Init... Phase III Positive ING114467 1
6 2012-07-11 Clinical Trials GlaxoSmithKline Reports Positive Results in Ph... Phase III Positive Albiglutide 1

In [82]:
gsk_prices = get_pricing('GSK', fields='price', start_date='2008-01-01', end_date='2014-01-01')

In [83]:
gsk_prices.plot()


Out[83]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4374cbb950>