In [55]:
    
start = '2002-01-02'
end = '2015-11-09'
df = get_pricing('SPY', fields=['close_price'], start_date=start, end_date=end)
    
In [56]:
    
df.plot()
    
    Out[56]:
    
In [57]:
    
daily_returns = (df.close_price.shift(-1) - df.close_price) / df.close_price
df = df.ix[1:] # remove empty value
df['daily_returns'] = daily_returns
    
In [58]:
    
mean_annual_return = df.daily_returns.mean() * 252
annualized_std = df.daily_returns.std() * (252**.5)
    
In [59]:
    
mean_excess_return = mean_annual_return - .04
sharpe_ratio = mean_excess_return / annualized_std
opt_leverage = mean_excess_return / (annualized_std**2)
    
In [60]:
    
print sharpe_ratio
    
    
In [61]:
    
print opt_leverage
    
    
In [62]:
    
capital = 100000
purchase = int(capital * opt_leverage)
print "If the kelly optimal leverage is {} and you have {} to invest, you should \
buy (and/or borrow to buy) {} worth of SPY, under the assumption you believe the \
expected values of your returns and standard deviations (which \
assumes they are normally distributed)".format(opt_leverage, capital, purchase)
    
    
In [63]:
    
import pandas as pd
import matplotlib.pyplot as plt
    
In [64]:
    
start = '2003-09-22'
end = '2013-09-17'
prices = get_pricing('WFM', fields='price', start_date=start, end_date=end)
prices.plot()
    
    Out[64]:
    
In [65]:
    
prices.describe()
    
    Out[65]:
In [66]:
    
pd.rolling_mean(prices, 30).plot()
    
    Out[66]:
    
In [67]:
    
import numpy as np
from statsmodels import regression
import statsmodels.api as sm
    
In [68]:
    
prices.index
    
    Out[68]:
In [69]:
    
prices.values
    
    Out[69]:
In [70]:
    
def linreg(X, Y):
    # fit
    X = sm.add_constant(X)
    model = regression.linear_model.OLS(Y, X).fit()
    a = model.params[0]
    b = model.params[1]
    X = X[:, 1]
    # summarize and plot
    X2 = np.linspace(X.min(), X.max(), 100)
    Y_hat = X2 * b + a
    plt.scatter(X, Y, alpha=0.3) # Plot the raw data
    plt.plot(X2, Y_hat, 'r', alpha=0.9);  # Add the regression line, colored in red
    plt.xlabel('SPY')
    plt.ylabel('WFM')
    return model.summary()
    
In [71]:
    
start = '2002-01-02'
end = '2015-11-09'
asset = get_pricing('WFM', fields='price', start_date=start, end_date=end)
benchmark = get_pricing('SPY', fields='price', start_date=start, end_date=end)
# We have to take the percent changes to get to returns
# Get rid of the first (0th) element because it is NAN
r_a = asset.pct_change()[1:]
r_b = benchmark.pct_change()[1:]
linreg(r_b.values, r_a.values)
    
    Out[71]:
    
In [72]:
    
from odo import odo
import pandas as pd
from quantopian.interactive.data.eventvestor import clinical_trials_free as clinical_trials
    
In [73]:
    
# Blaze
clinical_trials.dshape
    
    Out[73]:
In [74]:
    
clinical_trials.count()
    
    Out[74]:
In [75]:
    
clinical_trials[:3]
    
    Out[75]:
In [76]:
    
phase_three = clinical_trials[clinical_trials.clinical_phase == "Phase III"][['timestamp', 'sid','product_name']].sort('timestamp')
phase_three
    
    Out[76]:
In [77]:
    
gsk_sid = symbols('GSK').sid
gsk = clinical_trials[clinical_trials.sid == gsk_sid].sort('timestamp', ascending=False)
gsk_df = odo(gsk, pd.DataFrame)
# filter down to the Phase 4 trials
gsk_df = gsk_df[gsk_df.clinical_phase == "Phase III"]
    
In [78]:
    
gsk_df.head(1)
    
    Out[78]:
In [79]:
    
gsk_df.drop(gsk_df.columns[[0, 2, 3, 7, 11, 12]], axis=1, inplace=True)
    
In [80]:
    
gsk_df = gsk_df[gsk_df.clinical_result != u'NaN']
    
In [81]:
    
gsk_df.head()
    
    Out[81]:
In [82]:
    
gsk_prices = get_pricing('GSK', fields='price', start_date='2008-01-01', end_date='2014-01-01')
    
In [83]:
    
gsk_prices.plot()
    
    Out[83]: