In [11]:
import peakutils
import requests
import datetime
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from math import pi
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook
from bokeh.layouts import widgetbox, column
from bokeh.models import ColumnDataSource, Span, BoxAnnotation, \
Label, Arrow, OpenHead, NormalHead, \
VeeHead
from bokeh.models.widgets import DataTable, DateFormatter, \
TableColumn, NumberFormatter
from datetime import datetime, timedelta
from collections import OrderedDict
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:70% !important; }</style>"))
%matplotlib inline
#plt.style.use('ggplot')
sns.set(style="darkgrid")
output_notebook()
In [374]:
import os
d = os.listdir('.')
l = [d.split('.')[0] for d in d if 'VIX_temp' in d]
In [377]:
w = read_multiple_df(l)
w.reset_index(inplace=True)
In [376]:
w.columns = ['date', 'C', 'H', 'L', 'O', 'vol']
In [672]:
ss.drop_duplicates(inplace=True)
ss['date'] = pd.to_datetime(ss['date'])
ss = ss.set_index('date')
ss.sort_index(inplace=True)
ss.reset_index(inplace=True)
In [673]:
ww = pd.concat([ss, w], ignore_index=True)
ww = ww.set_index('date')
In [685]:
ww.to_csv('VIX.csv')
In [13]:
from scipy import stats
In [113]:
def read_data(file_name):
df = pd.read_csv(file_name + '.csv', index_col=0,
skiprows=1, header=None)
return df
def read_multiple_df(file_name_list):
l = []
for f in file_name_list:
df = read_data(f)
df['symbol'] = f
l.append(df)
combined_df = pd.concat(l)
return combined_df
def clean_finance_data(df):
df.index.name = 'date'
df.columns = ['close','high', 'low', 'open', 'volume', 'symbol']
df.index = pd.to_datetime(df.index)
df.sort_index(inplace=True)
df.reset_index(inplace=True)
return df
file_name_list = [
'VIX',
'UVXY',
'SPY'
]
df = read_multiple_df(file_name_list)
df = clean_finance_data(df)
g = df.groupby('symbol')
df['close_pct_change'] = g['close'].pct_change() * 100
In [114]:
for name, group in g:
fig, ax = plt.subplots(figsize=[12, 3])
sns.lineplot(x='date', y='close', data=g.get_group(name),
linewidth=2.5)
fig.suptitle(name, fontsize=20)
font = {'size': 16}
plt.rc('font', **font)
In [87]:
g = df.groupby('symbol')
for name, group in g:
fig, ax = plt.subplots(figsize=[12, 3])
sns.lineplot(x='date', y='close_pct_change',
data=g.get_group(name),
linewidth=2.5)
fig.suptitle(name, fontsize=20)
font = {'size': 16}
plt.rc('font', **font)
In [118]:
for name, group in g:
fig, ax = plt.subplots(figsize=[12, 3])
g.get_group(name)['close_pct_change'].dropna().hist(bins=100)
fig.suptitle(name, fontsize=20)
font = {'size': 16}
plt.rc('font', **font)
In [89]:
start_date = '2005-1-1'
end_date = '2010-10-1'
In [90]:
df = g.get_group('UVXY')
In [91]:
df = df[df['date'] >= start_date]
df = df[df['date'] <= end_date]
In [94]:
start_date = '2005-1-1'
end_date = '2010-10-1'
vix = g.get_group('VIX')
vix = vix[vix['date'] >= start_date]
vix = vix[vix['date'] <= end_date]
In [95]:
a = pd.merge(df, vix, how='inner', on=['date'], sort=True,
suffixes=('_uvxy', '_vix'), copy=True)
In [96]:
g = sns.jointplot("close_pct_change_uvxy", "close_pct_change_vix", data=a,
kind="reg", color="m", height=10)
In [97]:
fig, ax = plt.subplots(figsize=[12, 5])
stats.probplot(vix['close'], plot=plt)
plt.show()
In [98]:
fig, ax = plt.subplots(figsize=[12, 5])
stats.probplot(vix['close_pct_change'], plot=plt)
plt.show()
In [393]:
start_date = '2017-1-1'
end_date = '2018-1-1'
vix = g.get_group('VIX')
vix = vix[vix['date'] >= start_date]
vix = vix[vix['date'] <= end_date]
In [394]:
# vix spike is calculated as stdeviation exceeding 1.0 using a
# 14 rolling window,
vix['rolling_std_close'] = vix['close'].rolling(14).std()
# days since spike calculation
vix_mask = vix['rolling_std_close'] > 1
vix_masked = vix[vix_mask]
vix.loc[vix_mask, 'shifted_date'] = vix_masked['date'] - vix_masked['date'].shift()
# mark starting day of vix spike as true
mask = (vix['shifted_date'].shift(1).isnull() & vix['shifted_date'].notnull())
vix_masked = vix[mask]
vix.loc[mask, 'spike_start'] = True
# mark ending day of vix spike as true
mask = (vix['shifted_date'].shift(-1).isnull() & vix['shifted_date'].notnull())
vix_masked = vix[mask]
vix.loc[mask, 'spike_end'] = True
# calculate duration from spike start to spike end
a = vix[vix['spike_start'] == True]['date']
b = vix[vix['spike_end'] == True]['date']
c = pd.concat([a, b])
k = c.sort_values().diff().shift(-1)
vix.loc[k.index, 'spike_duration'] = k.values
mask = (vix['spike_start'] == True & vix['shifted_date'].notnull())
vix.loc[mask, 'spike_end'] = False
mask = (vix['spike_end'] == True & vix['shifted_date'].notnull())
vix.loc[mask, 'spike_start'] = False
# get days sink vix spike start
vix['days_sink_spike_start'] = vix[vix['spike_start'] == True]['date'].diff()
In [395]:
# for drawing vertical lines and spanning boxes for plot
spike_start_mask = vix['spike_start'] == True
spike_end_mask = vix['spike_end'] == True
ds = [pd.to_datetime(x) for x in vix.loc[spike_start_mask, 'date'].values]
de = [pd.to_datetime(x) for x in vix.loc[spike_end_mask, 'date'].values]
spike_duration_mask = vix['spike_start'] == True
durations = [x for x in vix.loc[spike_start_mask, 'spike_duration'].values]
# plot
fig, ax = plt.subplots(figsize=[17, 10])
sns.lineplot(x='date', y='rolling_std_close',
data=vix.reset_index(),
linewidth=2.5)
plt.axhline(y=1, color='r', linestyle='--')
sns.lineplot(x='date', y='close', data=vix,
linewidth=2.5)
for d in ds:
ax.axvline(x=d, color='g', linestyle=':')
for d in de:
ax.axvline(x=d, color='r', linestyle=':')
for i, duration in enumerate(durations):
plt.axvspan(ds[i], ds[i]+duration, color='b',
alpha=0.4)
fig.suptitle('vix rolling std close', fontsize=20)
font = {'size': 16}
plt.rc('font', **font)
In [396]:
fig, ax = plt.subplots(figsize=[12, 7])
vix[vix['spike_start'] == True]['spike_duration'].dt.days.plot.hist(bins=50)
fig.suptitle('vix duration hist', fontsize=16)
font = {'size': 20}
plt.rc('font', **font)
plt.show()
In [397]:
median_vix_spike_duration = vix[vix['spike_start'] == True]['spike_duration'].dt.days.median()
print('median vix spike duration', median_vix_spike_duration, 'days')
In [398]:
fig, ax = plt.subplots(figsize=[12, 7])
vix[vix['spike_start'] == True]['days_sink_spike_start'].dt.days.plot.hist(bins=50)
fig.suptitle('days_sink_spike_start', fontsize=16)
font = {'size': 20}
plt.rc('font', **font)
plt.show()
In [399]:
days_sink_spike_start = vix[vix['spike_start'] == True]['days_sink_spike_start'].dt.days.median()
print('median time between vix spikes', days_sink_spike_start, 'days')
In [400]:
counts_days_since_spike = vix[vix['spike_start'] == True]['days_sink_spike_start'].dt.days.values
x = np.linspace(0, 300, 1000)
weibull_params = stats.weibull_min.fit(counts_days_since_spike, floc=0)
shape = weibull_params[0] # k
loc = weibull_params[1] # theta
scale = weibull_params[2] # lambda
In [401]:
weibull_pdf = stats.weibull_min.pdf(x, weibull_params[0], scale=scale)
In [402]:
weibull_cdf = stats.weibull_min.cdf(x, shape, loc=0, scale=scale)
In [403]:
fig, ax = plt.subplots(figsize=[12, 7])
vix[vix['spike_start'] == True]['days_sink_spike_start'].dt.days.plot.hist(bins=100)
ax2 = ax.twinx() # instantiate a second axes that shares the same x-axis
ax2.plot(x, weibull_cdf, '--')
fig.suptitle('days_sink_spike_start', fontsize=16)
font = {'size': 20}
plt.rc('font', **font)
fig.tight_layout()
plt.show()
In [802]:
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt
import statsmodels.api as sm
import scipy.stats as scs
In [803]:
from arch import arch_model
In [ ]:
def _get_best_model(TS):
best_aic = np.inf
best_order = None
best_mdl = None
pq_rng = range(5) # [0,1,2,3,4]
d_rng = range(2) # [0,1]
for i in pq_rng:
for d in d_rng:
for j in pq_rng:
print('testing {} {} {}'.format(i, d, j))
try:
tmp_mdl = smt.ARIMA(TS, order=(i,d,j)).fit(
method='mle', trend='nc'
)
tmp_aic = tmp_mdl.aic
if tmp_aic < best_aic:
best_aic = tmp_aic
best_order = (i, d, j)
best_mdl = tmp_mdl
except: continue
print('aic: {:6.5f} | order: {}'.format(best_aic, best_order))
return best_aic, best_order, best_mdl
In [ ]:
window_length = 252 # trading days in years
In [ ]:
res_tup = _get_best_model(vix)
In [ ]:
foreLength = len(vix) - window_length
In [796]:
for d in range(foreLength):
# create a rolling window by selecting
# values between d+1 and d+T of S&P500 returns
TS = lrets[(1+d):(windowLength+d)]
# Find the best ARIMA fit
# set d = 0 since we've already taken log return of the series
res_tup = _get_best_model(TS)
order = res_tup[1]
model = res_tup[2]
#now that we have our ARIMA fit, we feed this to GARCH model
p_ = order[0]
o_ = order[1]
q_ = order[2]
am = arch_model(model.resid, p=p_, o=o_, q=q_, dist='StudentsT')
res = am.fit(update_freq=5, disp='off')
# Generate a forecast of next day return using our fitted model
out = res.forecast(horizon=1, start=None, align='origin')
#Set trading signal equal to the sign of forecasted return
# Buy if we expect positive returns, sell if negative
signal.iloc[d] = np.sign(out.mean['h.1'].iloc[-1])
In [ ]:
# Now we can fit the arch model using the best fit arima model parameters
p_ = res_tup[1][0]
o_ = res_tup[1][1]
q_ = res_tup[1][2]
# Using student T distribution usually provides better fit
am = arch_model(vix, p=p_, o=o_, q=q_, dist='StudentsT')
res = am.fit(update_freq=5)
yhat = res.forecast()
In [ ]:
In [379]:
import warnings
import sys
if not sys.warnoptions:
warnings.simplefilter("ignore")
from zipline.api import (order, record, symbol, set_benchmark,
order_target, set_commission, commission)
import zipline
import pytz
%load_ext zipline
In [380]:
data = OrderedDict()
capital = 3000
s = 'UVXY'
In [381]:
def read_data_backtest(symbol='SPY'):
df = pd.read_csv(symbol + '.csv', index_col=0, skiprows=1,
header=None)
df.index.name = 'date'
df.columns = ['close','high', 'low', 'open', 'volume']
df.index = pd.to_datetime(df.index, utc=True)
df.reset_index(inplace=True)
df.drop_duplicates(subset=['date'], keep='first', inplace=True)
df.set_index('date', inplace=True)
df.loc[:, 'volume'] = (
df['volume'].replace(r'[KM]+$', '', regex=True).astype(float) * \
df['volume'].str.extract(r'[\d\.]+([KM]+)', expand=False)
.fillna(1)
.replace(['K','M'], [10**3, 10**6]).astype(int))
data[symbol] = df
return data
In [382]:
data = read_data_backtest(symbol=s)
panel = pd.Panel(data)
panel.minor_axis = ["open", "high", "low", "close", "volume"]
panel.major_axis = panel.major_axis
In [411]:
vvv = vix.reset_index(drop=True)
In [419]:
vvv[vvv['spike_start'] == True]
Out[419]:
In [417]:
vvv.iloc[1+1]
Out[417]:
In [390]:
def initialize(context):
context.i = 0
context.asset = symbol(s)
set_commission(commission.PerShare(cost=0.005))
def handle_data(context, data):
countdown =
context.i += 1
if vix.iloc[context.i+1]['spike_start'] == True:
order_target(context.asset, 10)
kwargs = {
s: data.current(context.asset, 'price')
}
record(**kwargs)
# if context.i < 14:
# return
# # Compute averages
# # data.history() has to be called with the same params
# # from above and returns a pandas dataframe.
# short_mavg = data.history(context.asset, 'price',
# bar_count=9, frequency="1d").mean()
# long_mavg = data.history(context.asset, 'price',
# bar_count=50, frequency="1d").mean()
# # Trading logic
# if short_mavg > long_mavg:
# order_target(context.asset, 10)
# elif short_mavg < long_mavg:
# order_target(context.asset, 0)
# order(symbol(s), 1)
# record(SPY=data.current(symbol(s), 'price'))
# kwargs = {
# s: data.current(context.asset, 'price'),
# 'short_mavg': short_mavg,
# 'long_mavg':long_mavg
# }
# record(**kwargs)
def analyze(context, perf):
fig = plt.figure(figsize=[20, 14])
ax1 = fig.add_subplot(211)
perf.portfolio_value.plot(style=['k'], ax=ax1)
ax1.set_ylabel('portfolio value in $')
ax1.set_xlabel('time in years')
# ax2 = fig.add_subplot(212, sharex=ax1)
# perf[s].plot(style=['k'], ax=ax2)
# style = ['--', '--']
# perf[['short_mavg', 'long_mavg']].plot(style=style, ax=ax2)
# perf_trans = perf.iloc[
# [t != [] for t in perf.transactions]
# ]
# buys = perf_trans.iloc[
# [t[0]['amount'] > 0 for t in perf_trans.transactions]
# ]
# sells = perf_trans.iloc[
# [t[0]['amount'] < 0 for t in perf_trans.transactions]
# ]
# ax2.plot(buys.index, perf.short_mavg.ix[buys.index],
# '^', markersize=16, color='b')
# ax2.plot(sells.index, perf.short_mavg.ix[sells.index],
# 'v', markersize=16, color='r')
# ax2.set_ylabel('price in $')
# ax2.set_xlabel('time in years')
font = {'size': 20}
plt.rc('font', **font)
plt.rc('lines', lw=2)
plt.legend(loc=0)
plt.show()
In [391]:
perf = zipline.run_algorithm(start=datetime(2017, 1, 1, 0, 0, 0, 0, pytz.utc),
end=datetime(2018, 1, 1, 0, 0, 0, 0, pytz.utc),
initialize=initialize,
capital_base=capital,
handle_data=handle_data,
analyze=analyze,
data=panel)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
# df["Date"] = pd.to_datetime(df['Date'])
# inc = df.Close > df.Open
# dec = df.Open > df.Close
# w = 1 * 60 * 60 * 1000
# TOOLS = "pan, wheel_zoom, box_zoom, reset,save"
# p = figure(x_axis_type="datetime",
# tools=TOOLS,
# plot_height=700,
# plot_width=1500)
# p.xaxis.major_label_orientation = pi/4
# p.grid.grid_line_alpha=0.3
# p.segment(df.Date, df.High, df.Date, df.Low, color="black")
# p.vbar(df.Date[inc], w, df.Open[inc], df.Close[inc],
# fill_color="#D5E1DD", line_color="black")
# p.vbar(df.Date[dec], w, df.Open[dec], df.Close[dec],
# fill_color="#F2583E", line_color="black")
# p.line(df['Date'], df['12_ema'], legend='12_ema',
# color='orange', line_width=3)
# p.line(df['Date'], df['9_ema'], legend='9_ema',
# color='lime', line_width=3)
# for index, row in df.iterrows():
# if row['buy_crossing']:
# p.add_layout(
# Arrow(end=NormalHead(fill_color="lime", line_width=0, size=20),
# x_start=row['Date'],
# y_start=row['Open']+1,
# x_end=row['Date'],
# y_end=row['Open']))
# if row['sell_crossing']:
# p.add_layout(
# Arrow(end=NormalHead(fill_color="red", line_width=0, size=20),
# x_start=row['Date'],
# y_start=row['Open']+1,
# x_end=row['Date'],
# y_end=row['Open']))
# show(p)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
import pymc3 as pm
with pm.Model() as model:
std = pm.Uniform('std', 0, 100)
beta = pm.Normal('beta', mu=0, sd=100)
alpha = pm.Normal('alpha', mu=0, sd=100)
mean = pm.Deterministic('mean', alpha + beta*df['signal'])
obs = pm.Normal('obs', mu=mean, sd=std, observed=df['close_return'])
trace = pm.sample(100000, step=pm.Metropolis())
burned_trace = trace[20000:]
In [ ]:
pm.plots.traceplot(trace=burned_trace, varnames=['std', 'beta', 'alpha'])
pm.plot_posterior(trace=burned_trace, varnames=['std', 'beta', 'alpha'], kde_plot=True)
In [ ]:
fig, ax = plt.subplots()
fig.set_size_inches(20, 20)
sns.regplot(x="signal", y="open_return", data=df, ax=ax)
In [ ]:
# trade signal using space between moving averages
# use bayes estimator regression
# thresholding based on poisson regression
In [ ]:
In [ ]:
r = requests.get('https://api.gdax.com/products')
r.json()
In [ ]:
gran = {
'1 minute': 60,
'5 minutes': 300,
'15 minutes': 900,
'30 minuets': 1800
}
params = {'granularity': gran['15 minutes']}
In [ ]:
r = requests.get(
gdax_base_endpoint + '/products' +\
'/LTC-USD' + '/candles', params=params)
df = pd.DataFrame(r.json())
In [ ]:
df.columns = c
df['time'] = pd.to_datetime(df['time'], unit='s')
In [ ]:
min_date_string = '2017-08-17'
max_date_string = '2017-08-05'
shares = 100
column_list = []
TOOLS = "pan, wheel_zoom, reset"
# generate the figure
p = figure(x_axis_type="datetime",
tools=TOOLS,
plot_width=1000,
title="tech analysis")
p.background_fill_color= "#dddddd"
p.xaxis.major_label_orientation = np.pi / 4
p.grid.grid_line_alpha=0.3
p.grid.grid_line_color="white"
p.segment(df.t, df.H, df.t, df.L, color="black")
add_candlestick_lines(p)
column_list.insert(0, p)
show(column(column_list))
In [ ]:
def add_stop_loss_line(p, min_date_string, max_date_string, shares):
slp, loss_table = make_loss_table(min_date_string,
max_date_string,
shares)
column_list.append(loss_table)
p.add_layout(
Span(location=slp,
dimension='width',
line_color='purple',
line_dash='dashed',
line_width=2)
)
def add_resistance_lines(p, minimum, maximum):
# support and resistance lines
p.add_layout(
Span(location=maximum,
dimension='width',
line_color='#00FF00',
line_dash='dashed',
line_width=2)
)
p.add_layout(
Span(location=minimum,
dimension='width',
line_color='red',
line_dash='dashed',
line_width=2)
)
def add_candlestick_lines(p):
inc = df.C > df.O
dec = df.O > df.C
w = 12 * 60 * 60 * 1000 # half day in ms
# candlestick lines
p.vbar(df.t[inc],
w,
df.O[inc],
df.C[inc],
fill_color="#00FF00",
line_color="black")
p.vbar(df.t[dec],
w,
df.O[dec],
df.C[dec],
fill_color="#F2583E",
line_color="black")
In [ ]:
def make_profit_table(shares, min_date_string, max_date_string):
minimum, maximum = get_min_max_range(min_date_string, max_date_string)
pm = get_profit_margin(min_date_string, max_date_string)
pm_percent = get_profit_margin(min_date_string, max_date_string, percent=True)
profit = pm * shares
price_latest = get_latest()
data = {
'shares': [shares],
'profit %': [pm_percent],
'profit margin': [pm],
'entry price per share': [price_latest],
'exit price per share': [maximum],
'entry price': [price_latest * shares],
'exit price': [maximum * shares],
'profit': [profit]
}
profit_source = ColumnDataSource(data)
profit_table = DataTable(source=profit_source,
columns=get_profit_columns(),
width=950,
height=50)
return profit_table
def get_profit_columns():
formatter = NumberFormatter(format='$0,0.00')
formatter_percent = NumberFormatter(format='0.0%')
profit_columns = [
TableColumn(field="shares",
title='shares',
width=100),
TableColumn(field="profit %",
title='profit %',
width=100,
formatter=formatter_percent),
TableColumn(field="profit margin",
title='profit margin',
width=200,
formatter=formatter),
TableColumn(field="entry price per share",
title='entry price per share',
width=300,
formatter=formatter),
TableColumn(field="exit price per share",
title='exit price per share',
formatter=formatter),
TableColumn(field="entry price",
title='entry price',
formatter=formatter),
TableColumn(field="exit price",
title='exit price',
formatter=formatter),
TableColumn(field="profit",
title='profit',
formatter=formatter),
]
return profit_columns