In [29]:
import pandas as pd
import numpy as np
import json
import seaborn as sns
import requests
%matplotlib inline

In [30]:
data = requests.get('https://min-api.cryptocompare.com/data/histoday?fsym=BTC&tsym=USD&limit=2000&aggregate=1&e=CCCAGG').json()

In [31]:
df = pd.io.json.json_normalize(data['Data'])
df['time'] = pd.to_datetime(df['time'], unit='s')
df


Out[31]:
close high low open time volumefrom volumeto
0 4.88 5.29 4.65 5.22 2012-01-03 125170.25 6.191705e+05
1 5.57 5.70 4.75 4.88 2012-01-04 131170.86 6.887179e+05
2 6.95 7.22 5.57 5.57 2012-01-05 182328.19 1.130623e+06
3 6.70 7.21 6.13 6.95 2012-01-06 218077.48 1.481328e+06
4 6.81 7.02 6.38 6.70 2012-01-07 48387.58 3.250540e+05
5 7.11 7.20 6.73 6.81 2012-01-08 57193.53 4.031514e+05
6 6.33 7.19 5.81 7.11 2012-01-09 148406.85 9.605923e+05
7 6.36 6.89 6.00 6.33 2012-01-10 110210.25 7.050565e+05
8 6.90 7.14 6.41 6.36 2012-01-11 99442.07 6.776428e+05
9 6.80 7.00 6.34 6.90 2012-01-12 140116.52 9.441659e+05
10 6.41 6.80 6.40 6.80 2012-01-13 60228.77 3.983122e+05
11 6.75 6.95 6.22 6.41 2012-01-14 141896.32 9.351925e+05
12 7.00 7.08 6.74 6.75 2012-01-15 62233.33 4.297009e+05
13 6.68 7.19 6.51 7.00 2012-01-16 122666.57 8.374136e+05
14 5.60 6.95 4.64 6.68 2012-01-17 213409.80 1.288539e+06
15 5.92 6.95 5.10 5.60 2012-01-18 226680.97 1.354753e+06
16 6.36 6.36 5.87 5.92 2012-01-19 114820.45 7.097053e+05
17 6.49 6.58 6.26 6.36 2012-01-20 66786.48 4.298036e+05
18 6.18 6.55 6.10 6.49 2012-01-21 64761.69 4.071164e+05
19 6.31 6.45 6.10 6.18 2012-01-22 44890.65 2.822338e+05
20 6.36 6.39 6.21 6.31 2012-01-23 17693.05 1.116306e+05
21 6.29 6.39 6.22 6.36 2012-01-24 35663.08 2.246656e+05
22 5.75 6.32 5.50 6.29 2012-01-25 133115.25 7.820537e+05
23 5.34 5.92 5.30 5.75 2012-01-26 123954.11 6.926247e+05
24 5.29 5.51 5.05 5.34 2012-01-27 115567.83 6.115812e+05
25 5.63 5.76 5.26 5.29 2012-01-28 89708.02 4.998815e+05
26 5.38 5.70 5.33 5.63 2012-01-29 46430.45 2.544572e+05
27 5.49 5.60 5.37 5.38 2012-01-30 42303.14 2.331797e+05
28 5.48 5.65 5.43 5.49 2012-01-31 48296.66 2.670993e+05
29 6.08 6.20 5.47 5.48 2012-02-01 115523.96 6.726326e+05
... ... ... ... ... ... ... ...
1971 2052.43 2322.37 1868.56 2244.89 2017-05-27 222952.91 4.460340e+08
1972 2189.02 2300.52 2051.07 2052.43 2017-05-28 125004.39 2.667661e+08
1973 2278.21 2337.35 2117.34 2189.02 2017-05-29 92235.69 2.041016e+08
1974 2192.55 2329.26 2147.10 2278.21 2017-05-30 120964.11 2.700691e+08
1975 2303.34 2330.62 2168.37 2192.55 2017-05-31 117749.10 2.612230e+08
1976 2412.65 2460.84 2303.34 2303.34 2017-06-01 122696.44 2.900704e+08
1977 2492.60 2493.97 2385.59 2412.65 2017-06-02 83028.45 1.997654e+08
1978 2545.43 2582.77 2449.95 2492.60 2017-06-03 70956.49 1.771686e+08
1979 2524.06 2559.79 2478.18 2545.43 2017-06-04 59478.96 1.490608e+08
1980 2704.96 2705.38 2523.92 2524.06 2017-06-05 86686.38 2.256119e+08
1981 2870.50 2931.24 2699.68 2704.96 2017-06-06 192228.25 5.387725e+08
1982 2691.51 2880.95 2626.56 2870.50 2017-06-07 122676.04 3.403299e+08
1983 2798.78 2808.41 2623.61 2691.51 2017-06-08 95420.44 2.608986e+08
1984 2811.43 2852.07 2786.07 2798.78 2017-06-09 67044.96 1.892371e+08
1985 2900.25 2914.19 2807.36 2811.43 2017-06-10 86516.58 2.468697e+08
1986 2973.45 2977.86 2867.25 2900.25 2017-06-11 83026.07 2.415167e+08
1987 2656.77 2985.06 2483.63 2973.45 2017-06-12 217963.32 5.851589e+08
1988 2712.99 2784.77 2647.44 2656.77 2017-06-13 116738.08 3.144763e+08
1989 2467.27 2803.72 2348.99 2712.99 2017-06-14 174189.24 4.482943e+08
1990 2442.46 2521.60 2125.56 2467.27 2017-06-15 262787.33 6.062644e+08
1991 2508.58 2536.42 2335.28 2442.46 2017-06-16 117206.43 2.866308e+08
1992 2655.10 2690.71 2454.81 2508.58 2017-06-17 95948.29 2.466986e+08
1993 2539.56 2676.04 2488.59 2655.10 2017-06-18 92004.22 2.352393e+08
1994 2616.82 2617.75 2518.78 2539.56 2017-06-19 81829.22 2.103728e+08
1995 2754.38 2800.48 2613.98 2616.82 2017-06-20 111303.55 3.000340e+08
1996 2677.62 2804.41 2624.35 2754.38 2017-06-21 116033.61 3.132347e+08
1997 2722.84 2757.32 2623.17 2677.62 2017-06-22 78297.60 2.103188e+08
1998 2710.37 2759.66 2699.48 2722.84 2017-06-23 56938.10 1.553034e+08
1999 2590.05 2741.56 2540.35 2710.37 2017-06-24 86964.18 2.280073e+08
2000 2642.38 2632.32 2533.58 2590.05 2017-06-25 25978.89 6.624443e+07

2001 rows × 7 columns


In [32]:
df.to_csv('../bitcoin-daily-bars/latest-btcusd-cccagg.csv')

df.apply(lambda x: x.to_dict())


Out[32]:
close         {0: 4.88, 1: 5.57, 2: 6.95, 3: 6.7, 4: 6.81, 5...
high          {0: 5.29, 1: 5.7, 2: 7.22, 3: 7.21, 4: 7.02, 5...
low           {0: 4.65, 1: 4.75, 2: 5.57, 3: 6.13, 4: 6.38, ...
open          {0: 5.22, 1: 4.88, 2: 5.57, 3: 6.95, 4: 6.7, 5...
time          {0: 2012-01-03 00:00:00, 1: 2012-01-04 00:00:0...
volumefrom    {0: 125170.25, 1: 131170.86, 2: 182328.19, 3: ...
volumeto      {0: 619170.54, 1: 688717.86, 2: 1130623.29, 3:...
dtype: object

In [33]:


In [37]:
import json
def to_json(df, path):
    a = []
    for i,d in list(df.iterrows()):
        d = d.to_dict()
        d['time'] = str(d['time'])
        a.append(d) 
    with open(path, 'w') as f:
        json.dump(a, f)
        
to_json(df, '../bitcoin-daily-bars/latest-btcusd-cccagg.json')

In [67]:
btc.tail()


Out[67]:
Open High Low Close Volume $Turnover Volatility
Date
2017-04-14 1172.56 8331.06 962.21 1187.99 14119.50 6.560816e+07 7368.85
2017-04-15 1169.53 5369.46 998.06 1175.72 3740.43 1.190863e+07 4371.40
2017-04-16 1170.21 5309.96 1001.76 1162.31 4923.99 1.553942e+07 4308.20
2017-04-17 1575.00 14504.40 991.21 1176.54 7416.33 5.746028e+07 13513.19
2017-04-18 1176.59 6974.72 974.69 1229.00 4793.14 1.905132e+07 6000.03

In [ ]:


In [71]:
df[df['time'].astype('str')> '2017-04-14'].head()


Out[71]:
close high low open time volumefrom volumeto
1930 1176.99 1194.85 841.53 1177.33 2017-04-15 28384.58 33877537.21
1931 1176.80 1187.97 1166.74 1176.99 2017-04-16 18930.29 22538918.55
1932 1194.01 1202.97 1172.81 1176.80 2017-04-17 46483.54 56095607.39
1933 1206.09 1213.88 1191.14 1194.01 2017-04-18 52747.52 64900219.25
1934 1215.20 1218.21 1194.08 1206.09 2017-04-19 43347.15 53034357.10

In [72]:
btc = pd.DataFrame(index=df.index)
btc['Open'] = df.open
btc['Close'] = df.close
btc['High']=  df.high
btc['Low'] = df.low
btc['Volume'] = df.volumefrom
btc['$Turnover'] = df.volumeto
btc['Volatility'] = df.high - df.low
btc['Date'] = df.time
btc.sample(10)


Out[72]:
Open Close High Low Volume $Turnover Volatility Date
1309 281.36 281.58 285.00 278.94 45894.10 13006246.28 6.06 2015-08-03
1251 225.29 224.74 225.76 223.53 22467.76 5074897.56 2.23 2015-06-06
439 46.95 47.00 47.36 46.27 16314.97 764861.89 1.09 2013-03-16
1422 322.12 318.35 323.26 315.54 50660.16 16282941.39 7.72 2015-11-24
106 4.93 4.98 5.02 4.93 30645.71 151983.96 0.09 2012-04-17
122 5.07 5.13 5.18 5.02 35421.07 180427.18 0.16 2012-05-03
121 5.00 5.07 5.18 4.97 95963.31 487937.82 0.21 2012-05-02
1541 411.11 416.21 417.94 410.78 49075.11 20425092.47 7.16 2016-03-22
1699 579.37 570.35 580.41 567.25 18686.65 10747219.53 13.16 2016-08-27
372 13.59 13.74 13.88 13.50 42798.37 587223.86 0.38 2013-01-08

In [73]:
btc['Date'] = pd.to_datetime(btc['Date'])
btc = btc.set_index('Date')

In [74]:
ind = pd.DataFrame(index=btc.index)
ind['pd_T0'] = btc.Close - btc.Open
ind['rpd_T0'] = ind['pd_T0'] / btc.Open
ind['v_T0'] = btc.Volume
ind['hl_T0'] = btc.High - btc.Low
ind['s_T0'] = btc['$Turnover']

btc_T1 = btc.shift(-1)
ind['pd_T1'] = btc_T1.Close - btc.Open
ind['rpd_T1'] = ind['pd_T1'] / btc.Open
ind['dv_T1'] = btc_T1.Volume - btc.Volume
ind['v_T1'] = btc_T1.Volume
ind['hl_T1'] = btc_T1.High - btc.Low
ind['s_T1'] = btc_T1['$Turnover']

btc_T2 = btc.shift(-2)
ind['pd_T2'] = btc_T2.Close - btc.Open
ind['rpd_T1'] = ind['pd_T2'] / btc.Open
ind['v_T2'] = btc_T2.Volume
ind['s_T2'] = btc_T2['$Turnover']

btc_M3 = btc.rolling(window=3).mean()
ind['dh_m3'] = btc.High - btc_M3.High
ind['dl_m3'] = btc.Low - btc_M3.Low
ind['do_m3'] = btc.Open - btc_M3.Close
ind['dp_m3'] = (btc.High-btc.Low)/2 - (btc_M3.High - btc_M3.Low)/2


ind.dropna(inplace=True)

In [75]:
# market fluctuations
i = ind.ix['2016-06-01':]
(i.dv_T1 / i.v_T0 * 100).hist(bins=np.linspace(-200, 1000, 100))


Out[75]:
<matplotlib.axes._subplots.AxesSubplot at 0x11e0f25d0>

In [76]:
volume_jump = (i.dv_T1 / i.v_T0 * 100) > 100
volume_jump[volume_jump]


Out[76]:
Date
2016-06-02    True
2016-06-11    True
2016-06-15    True
2016-06-20    True
2016-07-06    True
2016-07-16    True
2016-07-21    True
2016-07-30    True
2016-08-21    True
2016-09-10    True
2016-09-25    True
2016-10-10    True
2016-10-16    True
2016-11-02    True
2016-12-04    True
2017-01-03    True
2017-01-10    True
2017-01-16    True
2017-01-30    True
2017-02-08    True
2017-02-12    True
2017-03-06    True
2017-03-09    True
2017-03-15    True
2017-03-23    True
2017-04-16    True
2017-04-30    True
2017-05-21    True
2017-06-05    True
2017-06-11    True
dtype: bool

In [77]:
#apply log and cube root transform
col_names = lambda x: x[x].index
log_columns = col_names(ind.min() > 0)
cbrt_columns = col_names(ind.min() <= 0)

for c in log_columns:
    ind['log_'+c] = ind[c].apply(np.log1p)
for c in cbrt_columns:
    ind['cbrt_'+c] = ind[c].apply(np.cbrt)

In [78]:
# drop corelated
corr = ind.corr()
related = np.abs(corr) > 0.8
np.fill_diagonal(related.values, False)
related *= np.tri(*related.shape)
for c in col_names(related.max() > 0):
    del ind[c]

In [79]:
ind.plot(subplots=True, figsize=(12, 30))


Out[79]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x11e0f2a50>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11eb6e1d0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11ebee1d0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11ec50250>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11ecd2690>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11ee44e10>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11eedd290>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11ef620d0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11efc0d10>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11f152090>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11d96af90>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11da18490>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11d8756d0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11d506490>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11d4f8c90>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11d87a810>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11a129750>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11f18cc50>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11f299550>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11f31c390>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11f37cd10>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11f400c50>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11f458b50>], dtype=object)

In [81]:
sns.heatmap(ind.corr())


Out[81]:
<matplotlib.axes._subplots.AxesSubplot at 0x1226210d0>

In [82]:
export = ind.copy()
export.columns = ['BTC_'+c for c in export.columns]
export.to_csv('../notebooks/btc-ind.csv')

In [ ]:


In [83]:
volume_jump[volume_jump]


Out[83]:
Date
2016-06-02    True
2016-06-11    True
2016-06-15    True
2016-06-20    True
2016-07-06    True
2016-07-16    True
2016-07-21    True
2016-07-30    True
2016-08-21    True
2016-09-10    True
2016-09-25    True
2016-10-10    True
2016-10-16    True
2016-11-02    True
2016-12-04    True
2017-01-03    True
2017-01-10    True
2017-01-16    True
2017-01-30    True
2017-02-08    True
2017-02-12    True
2017-03-06    True
2017-03-09    True
2017-03-15    True
2017-03-23    True
2017-04-16    True
2017-04-30    True
2017-05-21    True
2017-06-05    True
2017-06-11    True
dtype: bool

In [ ]:


In [ ]: