In [1]:
import os
import numpy as np
import pandas as pd
import pickle
import quandl
from datetime import datetime

In [2]:
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)



In [3]:
def get_quandl_data(quandl_id):
    '''Download and cache Quandl dataseries'''
    cache_path = '{}.pkl'.format(quandl_id).replace('/','-')
    try:
        f = open(cache_path, 'rb')
        df = pickle.load(f)   
        print('Loaded {} from cache'.format(quandl_id))
    except (OSError, IOError) as e:
        print('Downloading {} from Quandl'.format(quandl_id))
        df = quandl.get(quandl_id, returns="pandas")
        df.to_pickle(cache_path)
        print('Cached {} at {}'.format(quandl_id, cache_path))
    return df

In [4]:
# Pull Kraken BTC price exchange data
btc_usd_price_kraken = get_quandl_data('BCHARTS/KRAKENUSD')


Loaded BCHARTS/KRAKENUSD from cache

In [5]:
btc_usd_price_kraken.head()


Out[5]:
Open High Low Close Volume (BTC) Volume (Currency) Weighted Price
Date
2014-01-07 874.67040 892.06753 810.00000 810.00000 15.622378 13151.472844 841.835522
2014-01-08 810.00000 899.84281 788.00000 824.98287 19.182756 16097.329584 839.156269
2014-01-09 825.56345 870.00000 807.42084 841.86934 8.158335 6784.249982 831.572913
2014-01-10 839.99000 857.34056 817.00000 857.33056 8.024510 6780.220188 844.938794
2014-01-11 858.20000 918.05471 857.16554 899.84105 18.748285 16698.566929 890.671709

In [6]:
# Chart the BTC pricing data
btc_trace = go.Scatter(x=btc_usd_price_kraken.index, y=btc_usd_price_kraken['Weighted Price'])
py.iplot([btc_trace])



In [7]:
# Pull pricing data for 3 more BTC exchanges
exchanges = ['COINBASE','BITSTAMP','ITBIT']

exchange_data = {}

exchange_data['KRAKEN'] = btc_usd_price_kraken

for exchange in exchanges:
    exchange_code = 'BCHARTS/{}USD'.format(exchange)
    btc_exchange_df = get_quandl_data(exchange_code)
    exchange_data[exchange] = btc_exchange_df


Loaded BCHARTS/COINBASEUSD from cache
Loaded BCHARTS/BITSTAMPUSD from cache
Loaded BCHARTS/ITBITUSD from cache

In [8]:
def merge_dfs_on_column(dataframes, labels, col):
    '''Merge a single column of each dataframe into a new combined dataframe'''
    series_dict = {}
    for index in range(len(dataframes)):
        series_dict[labels[index]] = dataframes[index][col]
        
    return pd.DataFrame(series_dict)

In [9]:
# Merge the BTC price dataseries' into a single dataframe
btc_usd_datasets = merge_dfs_on_column(list(exchange_data.values()), list(exchange_data.keys()), 'Weighted Price')

In [10]:
btc_usd_datasets.tail()


Out[10]:
BITSTAMP COINBASE ITBIT KRAKEN
Date
2018-02-12 8662.804892 8624.481010 8615.053923 8652.141065
2018-02-13 8588.471072 8626.772411 8602.873826 8616.905161
2018-02-14 9092.779649 9101.687262 9101.950611 9117.299676
2018-02-15 9831.309592 9840.512945 9804.438627 9858.177068
2018-02-16 10019.430910 10033.118257 10048.017204 10031.161471

In [11]:
def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    '''Generate a scatter plot of the entire dataframe'''
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= not seperate_y_axis,
            type=scale
        )
    )
    
    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale )
    
    visibility = 'visible'
    if initial_hide:
        visibility = 'legendonly'
        
    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index, 
            y=series, 
            name=label_arr[index],
            visible=visibility
        )
        
        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config    
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)

In [12]:
# Plot all of the BTC exchange prices
df_scatter(btc_usd_datasets, 'Bitcoin Price (USD) By Exchange')



In [13]:
# Remove "0" values
btc_usd_datasets.replace(0, np.nan, inplace=True)

In [14]:
# Plot the revised dataframe
df_scatter(btc_usd_datasets, 'Bitcoin Price (USD) By Exchange')



In [15]:
# Calculate the average BTC price as a new column
btc_usd_datasets['avg_btc_price_usd'] = btc_usd_datasets.mean(axis=1)

In [16]:
# Plot the average BTC price
btc_trace = go.Scatter(x=btc_usd_datasets.index, y=btc_usd_datasets['avg_btc_price_usd'])
py.iplot([btc_trace])



In [17]:
def get_json_data(json_url, cache_path):
    '''Download and cache JSON data, return as a dataframe.'''
    try:        
        f = open(cache_path, 'rb')
        df = pickle.load(f)   
        print('Loaded {} from cache'.format(json_url))
    except (OSError, IOError) as e:
        print('Downloading {}'.format(json_url))
        df = pd.read_json(json_url)
        df.to_pickle(cache_path)
        print('Cached {} at {}'.format(json_url, cache_path))
    return df

In [18]:
base_polo_url = 'https://poloniex.com/public?command=returnChartData&currencyPair={}&start={}&end={}&period={}'
start_date = datetime.strptime('2015-01-01', '%Y-%m-%d') # get data from the start of 2015
end_date = datetime.now() # up until today
pediod = 86400 # pull daily data (86,400 seconds per day)

def get_crypto_data(poloniex_pair):
    '''Retrieve cryptocurrency data from poloniex'''
    json_url = base_polo_url.format(poloniex_pair, start_date.timestamp(), end_date.timestamp(), pediod)
    data_df = get_json_data(json_url, poloniex_pair)
    data_df = data_df.set_index('date')
    return data_df

In [19]:
altcoins = ['ETH','LTC','XRP','ETC','STR','DASH','SC','XMR','XEM']

altcoin_data = {}
for altcoin in altcoins:
    coinpair = 'BTC_{}'.format(altcoin)
    crypto_price_df = get_crypto_data(coinpair)
    altcoin_data[altcoin] = crypto_price_df


Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETH&start=1420088400.0&end=1518873722.957863&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_LTC&start=1420088400.0&end=1518873722.957863&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XRP&start=1420088400.0&end=1518873722.957863&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETC&start=1420088400.0&end=1518873722.957863&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_STR&start=1420088400.0&end=1518873722.957863&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_DASH&start=1420088400.0&end=1518873722.957863&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_SC&start=1420088400.0&end=1518873722.957863&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XMR&start=1420088400.0&end=1518873722.957863&period=86400 from cache
Loaded https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XEM&start=1420088400.0&end=1518873722.957863&period=86400 from cache

In [20]:
altcoin_data['ETH'].tail()


Out[20]:
close high low open quoteVolume volume weightedAverage
date
2018-02-13 0.098621 0.098776 0.096888 0.097432 15066.324080 1475.767890 0.097951
2018-02-14 0.097405 0.098770 0.096000 0.098621 22458.349009 2189.831365 0.097506
2018-02-15 0.092200 0.097410 0.090854 0.097410 31422.308293 2960.448599 0.094215
2018-02-16 0.092002 0.094500 0.090000 0.092201 30151.716216 2788.010442 0.092466
2018-02-17 0.089345 0.093121 0.088901 0.092002 11276.643249 1020.273473 0.090477

In [21]:
# Calculate USD Price as a new column in each altcoin dataframe
for altcoin in altcoin_data.keys():
    altcoin_data[altcoin]['price_usd'] =  altcoin_data[altcoin]['weightedAverage'] * btc_usd_datasets['avg_btc_price_usd']

In [22]:
# Merge USD price of each altcoin into single dataframe 
combined_df = merge_dfs_on_column(list(altcoin_data.values()), list(altcoin_data.keys()), 'price_usd')

In [23]:
# Add BTC price to the dataframe
combined_df['BTC'] = btc_usd_datasets['avg_btc_price_usd']

In [24]:
# Chart all of the altocoin prices
df_scatter(combined_df, 'Cryptocurrency Prices (USD)', seperate_y_axis=False, y_axis_label='Coin Value (USD)', scale='log')



In [25]:
# Calculate the pearson correlation coefficients for cryptocurrencies in 2016
combined_df_2016 = combined_df[combined_df.index.year == 2016]
combined_df_2016.pct_change().corr(method='pearson')


Out[25]:
DASH ETC ETH LTC SC STR XEM XMR XRP BTC
DASH 1.000000 0.003992 0.122695 -0.012194 0.026602 0.058083 0.014571 0.121537 0.088657 -0.014040
ETC 0.003992 1.000000 -0.181991 -0.131079 -0.008066 -0.102654 -0.080938 -0.105898 -0.054095 -0.170538
ETH 0.122695 -0.181991 1.000000 -0.064652 0.169642 0.035093 0.043205 0.087216 0.085630 -0.006502
LTC -0.012194 -0.131079 -0.064652 1.000000 0.012253 0.113523 0.160667 0.129475 0.053712 0.750174
SC 0.026602 -0.008066 0.169642 0.012253 1.000000 0.143252 0.106153 0.047910 0.021098 0.035116
STR 0.058083 -0.102654 0.035093 0.113523 0.143252 1.000000 0.225132 0.027998 0.320116 0.079075
XEM 0.014571 -0.080938 0.043205 0.160667 0.106153 0.225132 1.000000 0.016438 0.101326 0.227674
XMR 0.121537 -0.105898 0.087216 0.129475 0.047910 0.027998 0.016438 1.000000 0.027649 0.127520
XRP 0.088657 -0.054095 0.085630 0.053712 0.021098 0.320116 0.101326 0.027649 1.000000 0.044161
BTC -0.014040 -0.170538 -0.006502 0.750174 0.035116 0.079075 0.227674 0.127520 0.044161 1.000000

In [26]:
def correlation_heatmap(df, title, absolute_bounds=True):
    '''Plot a correlation heatmap for the entire dataframe'''
    heatmap = go.Heatmap(
        z=df.corr(method='pearson').as_matrix(),
        x=df.columns,
        y=df.columns,
        colorbar=dict(title='Pearson Coefficient'),
    )
    
    layout = go.Layout(title=title)
    
    if absolute_bounds:
        heatmap['zmax'] = 1.0
        heatmap['zmin'] = -1.0
        
    fig = go.Figure(data=[heatmap], layout=layout)
    py.iplot(fig)

In [27]:
correlation_heatmap(combined_df_2016.pct_change(), "Cryptocurrency Correlations in 2016")



In [28]:
combined_df_2017 = combined_df[combined_df.index.year == 2017]
combined_df_2017.pct_change().corr(method='pearson')


Out[28]:
DASH ETC ETH LTC SC STR XEM XMR XRP BTC
DASH 1.000000 0.387555 0.506911 0.340153 0.291424 0.183038 0.325968 0.498418 0.091146 0.307095
ETC 0.387555 1.000000 0.601437 0.482062 0.298406 0.210387 0.321852 0.447398 0.114780 0.416562
ETH 0.506911 0.601437 1.000000 0.437609 0.373078 0.259399 0.399200 0.554632 0.212350 0.410771
LTC 0.340153 0.482062 0.437609 1.000000 0.339144 0.307589 0.379088 0.437204 0.323905 0.420645
SC 0.291424 0.298406 0.373078 0.339144 1.000000 0.402966 0.331350 0.378644 0.243872 0.325318
STR 0.183038 0.210387 0.259399 0.307589 0.402966 1.000000 0.339502 0.327488 0.509828 0.230957
XEM 0.325968 0.321852 0.399200 0.379088 0.331350 0.339502 1.000000 0.336076 0.268168 0.329431
XMR 0.498418 0.447398 0.554632 0.437204 0.378644 0.327488 0.336076 1.000000 0.226636 0.409183
XRP 0.091146 0.114780 0.212350 0.323905 0.243872 0.509828 0.268168 0.226636 1.000000 0.131469
BTC 0.307095 0.416562 0.410771 0.420645 0.325318 0.230957 0.329431 0.409183 0.131469 1.000000

In [29]:
correlation_heatmap(combined_df_2017.pct_change(), "Cryptocurrency Correlations in 2017")