In [29]:
import pandas as pd
import numpy as np
import json
import seaborn as sns
import requests
%matplotlib inline
In [30]:
data = requests.get('https://min-api.cryptocompare.com/data/histoday?fsym=BTC&tsym=USD&limit=2000&aggregate=1&e=CCCAGG').json()
In [31]:
df = pd.io.json.json_normalize(data['Data'])
df['time'] = pd.to_datetime(df['time'], unit='s')
df
Out[31]:
In [32]:
df.to_csv('../bitcoin-daily-bars/latest-btcusd-cccagg.csv')
df.apply(lambda x: x.to_dict())
Out[32]:
In [33]:
In [37]:
import json
def to_json(df, path):
a = []
for i,d in list(df.iterrows()):
d = d.to_dict()
d['time'] = str(d['time'])
a.append(d)
with open(path, 'w') as f:
json.dump(a, f)
to_json(df, '../bitcoin-daily-bars/latest-btcusd-cccagg.json')
In [67]:
btc.tail()
Out[67]:
In [ ]:
In [71]:
df[df['time'].astype('str')> '2017-04-14'].head()
Out[71]:
In [72]:
btc = pd.DataFrame(index=df.index)
btc['Open'] = df.open
btc['Close'] = df.close
btc['High']= df.high
btc['Low'] = df.low
btc['Volume'] = df.volumefrom
btc['$Turnover'] = df.volumeto
btc['Volatility'] = df.high - df.low
btc['Date'] = df.time
btc.sample(10)
Out[72]:
In [73]:
btc['Date'] = pd.to_datetime(btc['Date'])
btc = btc.set_index('Date')
In [74]:
ind = pd.DataFrame(index=btc.index)
ind['pd_T0'] = btc.Close - btc.Open
ind['rpd_T0'] = ind['pd_T0'] / btc.Open
ind['v_T0'] = btc.Volume
ind['hl_T0'] = btc.High - btc.Low
ind['s_T0'] = btc['$Turnover']
btc_T1 = btc.shift(-1)
ind['pd_T1'] = btc_T1.Close - btc.Open
ind['rpd_T1'] = ind['pd_T1'] / btc.Open
ind['dv_T1'] = btc_T1.Volume - btc.Volume
ind['v_T1'] = btc_T1.Volume
ind['hl_T1'] = btc_T1.High - btc.Low
ind['s_T1'] = btc_T1['$Turnover']
btc_T2 = btc.shift(-2)
ind['pd_T2'] = btc_T2.Close - btc.Open
ind['rpd_T1'] = ind['pd_T2'] / btc.Open
ind['v_T2'] = btc_T2.Volume
ind['s_T2'] = btc_T2['$Turnover']
btc_M3 = btc.rolling(window=3).mean()
ind['dh_m3'] = btc.High - btc_M3.High
ind['dl_m3'] = btc.Low - btc_M3.Low
ind['do_m3'] = btc.Open - btc_M3.Close
ind['dp_m3'] = (btc.High-btc.Low)/2 - (btc_M3.High - btc_M3.Low)/2
ind.dropna(inplace=True)
In [75]:
# market fluctuations
i = ind.ix['2016-06-01':]
(i.dv_T1 / i.v_T0 * 100).hist(bins=np.linspace(-200, 1000, 100))
Out[75]:
In [76]:
volume_jump = (i.dv_T1 / i.v_T0 * 100) > 100
volume_jump[volume_jump]
Out[76]:
In [77]:
#apply log and cube root transform
col_names = lambda x: x[x].index
log_columns = col_names(ind.min() > 0)
cbrt_columns = col_names(ind.min() <= 0)
for c in log_columns:
ind['log_'+c] = ind[c].apply(np.log1p)
for c in cbrt_columns:
ind['cbrt_'+c] = ind[c].apply(np.cbrt)
In [78]:
# drop corelated
corr = ind.corr()
related = np.abs(corr) > 0.8
np.fill_diagonal(related.values, False)
related *= np.tri(*related.shape)
for c in col_names(related.max() > 0):
del ind[c]
In [79]:
ind.plot(subplots=True, figsize=(12, 30))
Out[79]:
In [81]:
sns.heatmap(ind.corr())
Out[81]:
In [82]:
export = ind.copy()
export.columns = ['BTC_'+c for c in export.columns]
export.to_csv('../notebooks/btc-ind.csv')
In [ ]:
In [83]:
volume_jump[volume_jump]
Out[83]:
In [ ]:
In [ ]: