In [55]:
start = '2002-01-02'
end = '2015-11-09'
df = get_pricing('SPY', fields=['close_price'], start_date=start, end_date=end)
In [56]:
df.plot()
Out[56]:
In [57]:
daily_returns = (df.close_price.shift(-1) - df.close_price) / df.close_price
df = df.ix[1:] # remove empty value
df['daily_returns'] = daily_returns
In [58]:
mean_annual_return = df.daily_returns.mean() * 252
annualized_std = df.daily_returns.std() * (252**.5)
In [59]:
mean_excess_return = mean_annual_return - .04
sharpe_ratio = mean_excess_return / annualized_std
opt_leverage = mean_excess_return / (annualized_std**2)
In [60]:
print sharpe_ratio
In [61]:
print opt_leverage
In [62]:
capital = 100000
purchase = int(capital * opt_leverage)
print "If the kelly optimal leverage is {} and you have {} to invest, you should \
buy (and/or borrow to buy) {} worth of SPY, under the assumption you believe the \
expected values of your returns and standard deviations (which \
assumes they are normally distributed)".format(opt_leverage, capital, purchase)
In [63]:
import pandas as pd
import matplotlib.pyplot as plt
In [64]:
start = '2003-09-22'
end = '2013-09-17'
prices = get_pricing('WFM', fields='price', start_date=start, end_date=end)
prices.plot()
Out[64]:
In [65]:
prices.describe()
Out[65]:
In [66]:
pd.rolling_mean(prices, 30).plot()
Out[66]:
In [67]:
import numpy as np
from statsmodels import regression
import statsmodels.api as sm
In [68]:
prices.index
Out[68]:
In [69]:
prices.values
Out[69]:
In [70]:
def linreg(X, Y):
# fit
X = sm.add_constant(X)
model = regression.linear_model.OLS(Y, X).fit()
a = model.params[0]
b = model.params[1]
X = X[:, 1]
# summarize and plot
X2 = np.linspace(X.min(), X.max(), 100)
Y_hat = X2 * b + a
plt.scatter(X, Y, alpha=0.3) # Plot the raw data
plt.plot(X2, Y_hat, 'r', alpha=0.9); # Add the regression line, colored in red
plt.xlabel('SPY')
plt.ylabel('WFM')
return model.summary()
In [71]:
start = '2002-01-02'
end = '2015-11-09'
asset = get_pricing('WFM', fields='price', start_date=start, end_date=end)
benchmark = get_pricing('SPY', fields='price', start_date=start, end_date=end)
# We have to take the percent changes to get to returns
# Get rid of the first (0th) element because it is NAN
r_a = asset.pct_change()[1:]
r_b = benchmark.pct_change()[1:]
linreg(r_b.values, r_a.values)
Out[71]:
In [72]:
from odo import odo
import pandas as pd
from quantopian.interactive.data.eventvestor import clinical_trials_free as clinical_trials
In [73]:
# Blaze
clinical_trials.dshape
Out[73]:
In [74]:
clinical_trials.count()
Out[74]:
In [75]:
clinical_trials[:3]
Out[75]:
In [76]:
phase_three = clinical_trials[clinical_trials.clinical_phase == "Phase III"][['timestamp', 'sid','product_name']].sort('timestamp')
phase_three
Out[76]:
In [77]:
gsk_sid = symbols('GSK').sid
gsk = clinical_trials[clinical_trials.sid == gsk_sid].sort('timestamp', ascending=False)
gsk_df = odo(gsk, pd.DataFrame)
# filter down to the Phase 4 trials
gsk_df = gsk_df[gsk_df.clinical_phase == "Phase III"]
In [78]:
gsk_df.head(1)
Out[78]:
In [79]:
gsk_df.drop(gsk_df.columns[[0, 2, 3, 7, 11, 12]], axis=1, inplace=True)
In [80]:
gsk_df = gsk_df[gsk_df.clinical_result != u'NaN']
In [81]:
gsk_df.head()
Out[81]:
In [82]:
gsk_prices = get_pricing('GSK', fields='price', start_date='2008-01-01', end_date='2014-01-01')
In [83]:
gsk_prices.plot()
Out[83]: