In [2]:
import os
import numpy as np
import pandas as pd
import pickle
import quandl
from datetime import datetime

import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)


def get_quandl_data(quandl_id):
    '''Download and cache Quandl dataseries'''
    cache_path = '{}.pkl'.format(quandl_id).replace('/','-')
    try:
        f = open(cache_path, 'rb')
        df = pickle.load(f)
        print('Loaded {} from cache'.format(quandl_id))
    except (OSError, IOError) as e:
        print('Downloading {} from Quandl'.format(quandl_id))
        df = quandl.get(quandl_id, returns="pandas")
        df.to_pickle(cache_path)
        print('Cached {} at {}'.format(quandl_id, cache_path))
    return df



In [3]:
btc_usd_price_kraken = get_quandl_data('BCHARTS/KRAKENUSD')


Loaded BCHARTS/KRAKENUSD from cache

In [4]:
btc_usd_price_kraken.head()


Out[4]:
Open High Low Close Volume (BTC) Volume (Currency) Weighted Price
Date
2014-01-07 874.67040 892.06753 810.00000 810.00000 15.622378 13151.472844 841.835522
2014-01-08 810.00000 899.84281 788.00000 824.98287 19.182756 16097.329584 839.156269
2014-01-09 825.56345 870.00000 807.42084 841.86934 8.158335 6784.249982 831.572913
2014-01-10 839.99000 857.34056 817.00000 857.33056 8.024510 6780.220188 844.938794
2014-01-11 858.20000 918.05471 857.16554 899.84105 18.748285 16698.566929 890.671709

In [5]:
btc_trace = go.Scatter(x=btc_usd_price_kraken.index, y=btc_usd_price_kraken['Weighted Price'])
py.iplot([btc_trace])



In [6]:
exchanges = ['COINBASE','BITSTAMP','ITBIT']

exchange_data = {}

exchange_data['KRAKEN'] = btc_usd_price_kraken

for exchange in exchanges:
    exchange_code = 'BCHARTS/{}USD'.format(exchange)
    btc_exchange_df = get_quandl_data(exchange_code)
    exchange_data[exchange] = btc_exchange_df


Downloading BCHARTS/COINBASEUSD from Quandl
Cached BCHARTS/COINBASEUSD at BCHARTS-COINBASEUSD.pkl
Downloading BCHARTS/BITSTAMPUSD from Quandl
Cached BCHARTS/BITSTAMPUSD at BCHARTS-BITSTAMPUSD.pkl
Downloading BCHARTS/ITBITUSD from Quandl
Cached BCHARTS/ITBITUSD at BCHARTS-ITBITUSD.pkl

In [7]:
def merge_dfs_on_column(dataframes, labels, col):
    '''Merge a single column of each dataframe into a new combined dataframe'''
    series_dict = {}
    for index in range(len(dataframes)):
        series_dict[labels[index]] = dataframes[index][col]

    return pd.DataFrame(series_dict)

# Now we will merge all of the dataframes together on their "Weighted
# Price" column (merge the BTC price dataseries' into a single dataframe)

btc_usd_datasets = merge_dfs_on_column(list(exchange_data.values()), list(exchange_data.keys()), 'Weighted Price')

btc_usd_datasets.tail()


Out[7]:
BITSTAMP COINBASE ITBIT KRAKEN
Date
2017-08-18 4180.171091 4167.053043 4174.715155 4195.697579
2017-08-19 4030.604133 4096.284462 4052.981179 4121.371679
2017-08-20 4054.143713 4105.412784 4099.880702 4114.258059
2017-08-21 4007.736878 4021.619899 4006.994410 4044.508620
2017-08-22 3891.280600 3899.035619 3889.850873 3915.886659

In [8]:
def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    '''Generate a scatter plot of the entire dataframe'''
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))

    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= not seperate_y_axis,
            type=scale
        )
    )

    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale )

    visibility = 'visible'
    if initial_hide:
        visibility = 'legendonly'

    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index,
            y=series,
            name=label_arr[index],
            visible=visibility
        )

        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)

# We can now easily generate a graph for the Bitcoin pricing data.
# # Plot all of the BTC exchange prices
df_scatter(btc_usd_datasets, 'Bitcoin Price (USD) By Exchange')



In [9]:
# Remove "0" values
btc_usd_datasets.replace(0, np.nan, inplace=True)

# When we re-chart the dataframe, we'll see a much cleaner looking
# chart without the down-spikes:
df_scatter(btc_usd_datasets, 'Bitcoin Price (USD) By Exchange')

# We can now calculate a new column, containing the average daily
# Bitcoin price across all of the exchanges.
btc_usd_datasets['avg_btc_price_usd'] = btc_usd_datasets.mean(axis=1)

# This new column is our Bitcoin pricing index! Let's chart that
# column to make sure it looks ok.
btc_trace = go.Scatter(x=btc_usd_datasets.index, y=btc_usd_datasets['avg_btc_price_usd'])
py.iplot([btc_trace])



In [10]:
# Step 3 - Retrieve Altcoin Pricing Data
# --------------------------------------
# Step 3.1 - Define Poloniex API Helper Functions

# For retrieving data on cryptocurrencies we'll be using the Poloniex
# API. To assist in the altcoin data retrieval, we'll define two
# helper functions to download and cache JSON data from this
# API. First, we'll define get_json_data, which will download and
# cache JSON data from a provided URL.

def get_json_data(json_url, cache_path):
    '''Download and cache JSON data, return as a dataframe.'''
    try:
        f = open(cache_path, 'rb')
        df = pickle.load(f)
        print('Loaded {} from cache'.format(json_url))
    except (OSError, IOError) as e:
        print('Downloading {}'.format(json_url))
        df = pd.read_json(json_url)
        df.to_pickle(cache_path)
        print('Cached {} at {}'.format(json_url, cache_path))
    return df

# Next, we'll define a function that will generate Poloniex API HTTP
# requests, and will subsequently call our new get_json_data function
# to save the resulting data.

base_polo_url = 'https://poloniex.com/public?command=returnChartData&currencyPair={}&start={}&end={}&period={}'
start_date = datetime.strptime('2015-01-01', '%Y-%m-%d') # get data from the start of 2015
end_date = datetime.now() # up until today
pediod = 86400 # pull daily data (86,400 seconds per day)

def get_crypto_data(poloniex_pair):
    '''Retrieve cryptocurrency data from poloniex'''
    json_url = base_polo_url.format(poloniex_pair, start_date.timestamp(), end_date.timestamp(), pediod)
    data_df = get_json_data(json_url, poloniex_pair)
    data_df = data_df.set_index('date')
    return data_df

In [11]:
# Step 3.2 - Download Trading Data From Poloniex
# ----------------------------------------------
# We'll download exchange data for nine of the top cryptocurrencies -
# Ethereum, Litecoin, Ripple, Ethereum Classic, Stellar, Dashcoin,
# Siacoin, Monero, and NEM.

altcoins = ['ETH','LTC','XRP','ETC','STR','DASH','SC','XMR','XEM']

altcoin_data = {}
for altcoin in altcoins:
    coinpair = 'BTC_{}'.format(altcoin)
    crypto_price_df = get_crypto_data(coinpair)
    altcoin_data[altcoin] = crypto_price_df

# We can preview the last few rows of the Ethereum price table to make
# sure it looks ok.
altcoin_data['ETH'].tail()


Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETH&start=1420066800.0&end=1503525122.588533&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETH&start=1420066800.0&end=1503525122.588533&period=86400 at BTC_ETH
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_LTC&start=1420066800.0&end=1503525122.588533&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_LTC&start=1420066800.0&end=1503525122.588533&period=86400 at BTC_LTC
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XRP&start=1420066800.0&end=1503525122.588533&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XRP&start=1420066800.0&end=1503525122.588533&period=86400 at BTC_XRP
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETC&start=1420066800.0&end=1503525122.588533&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETC&start=1420066800.0&end=1503525122.588533&period=86400 at BTC_ETC
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_STR&start=1420066800.0&end=1503525122.588533&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_STR&start=1420066800.0&end=1503525122.588533&period=86400 at BTC_STR
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_DASH&start=1420066800.0&end=1503525122.588533&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_DASH&start=1420066800.0&end=1503525122.588533&period=86400 at BTC_DASH
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_SC&start=1420066800.0&end=1503525122.588533&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_SC&start=1420066800.0&end=1503525122.588533&period=86400 at BTC_SC
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XMR&start=1420066800.0&end=1503525122.588533&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XMR&start=1420066800.0&end=1503525122.588533&period=86400 at BTC_XMR
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XEM&start=1420066800.0&end=1503525122.588533&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XEM&start=1420066800.0&end=1503525122.588533&period=86400 at BTC_XEM
Out[11]:
close high low open quoteVolume volume weightedAverage
date
2017-08-19 0.070587 0.072988 0.070000 0.071321 179797.304636 12841.666823 0.071423
2017-08-20 0.073525 0.073710 0.070400 0.070690 100756.634696 7213.589872 0.071594
2017-08-21 0.080500 0.087044 0.071717 0.073500 491598.852480 39587.121362 0.080527
2017-08-22 0.076602 0.086280 0.075364 0.080500 287084.551159 23005.807952 0.080136
2017-08-23 0.077116 0.079350 0.075000 0.076576 128038.755698 9851.830599 0.076944

In [12]:
# Step 3.3 - Convert Prices to USD
# --------------------------------
# Now we can combine this BTC-altcoin exchange rate data with our
# Bitcoin pricing index to directly calculate the historical USD
# values for each altcoin.

for altcoin in altcoin_data.keys():
    altcoin_data[altcoin]['price_usd'] =  altcoin_data[altcoin]['weightedAverage'] * btc_usd_datasets['avg_btc_price_usd']

# Here, we've created a new column in each altcoin dataframe with the
# USD prices for that coin. Next, we can re-use our
# merge_dfs_on_column function from earlier to create a combined
# dataframe of the USD price for each cryptocurrency.

# Merge USD price of each altcoin into single dataframe
combined_df = merge_dfs_on_column(list(altcoin_data.values()), list(altcoin_data.keys()), 'price_usd')

# Easy. Now let's also add the Bitcoin prices as a final column to the
# combined dataframe ==> Add BTC price to the dataframe
combined_df['BTC'] = btc_usd_datasets['avg_btc_price_usd']

# Now we should have a single dataframe containing daily USD prices
# for the ten cryptocurrencies that we're examining.

# Let's reuse our df_scatter function from earlier to chart all of the
# cryptocurrency prices against each other.

# Chart all of the altocoin prices
df_scatter(combined_df, 'Cryptocurrency Prices (USD)', seperate_y_axis=False, y_axis_label='Coin Value (USD)', scale='log')



In [13]:
# Step 3.4 - Perform Correlation Analysis
# ---------------------------------------

# You might notice is that the cryptocurrency exchange rates, despite
# their wildly different values and volatility, look slightly
# correlated. Especially since the spike in April 2017, even many of
# the smaller fluctuations appear to be occurring in sync across the
# entire market.

# A visually-derived hunch is not much better than a guess until we
# have the stats to back it up.

# We can test our correlation hypothesis using the Pandas corr()
# method, which computes a Pearson correlation coefficient for each
# column in the dataframe against each other column.

# Computing correlations directly on a non-stationary time series
# (such as raw pricing data) can give biased correlation values. We
# will work around this by first applying the pct_change() method,
# which will convert each cell in the dataframe from an absolute price
# value to a daily return percentage.

# First we'll calculate correlations for 2016.

# Calculate the pearson correlation coefficients for cryptocurrencies in 2016
combined_df_2016 = combined_df[combined_df.index.year == 2016]
combined_df_2016.pct_change().corr(method='pearson')

# These correlation coefficients are all over the place. Coefficients
# close to 1 or -1 mean that the series' are strongly correlated or
# inversely correlated respectively, and coefficients close to zero
# mean that the values are not correlated, and fluctuate independently
# of each other.

# To help visualize these results, we'll create one more helper
# visualization function.

def correlation_heatmap(df, title, absolute_bounds=True):
    '''Plot a correlation heatmap for the entire dataframe'''
    heatmap = go.Heatmap(
        z=df.corr(method='pearson').as_matrix(),
        x=df.columns,
        y=df.columns,
        colorbar=dict(title='Pearson Coefficient'),
    )

    layout = go.Layout(title=title)

    if absolute_bounds:
        heatmap['zmax'] = 1.0
        heatmap['zmin'] = -1.0

    fig = go.Figure(data=[heatmap], layout=layout)
    py.iplot(fig)

correlation_heatmap(combined_df_2016.pct_change(), "Cryptocurrency Correlations in 2016")



In [14]:
combined_df_2017 = combined_df[combined_df.index.year == 2017]
combined_df_2017.pct_change().corr(method='pearson')

# These are somewhat more significant correlation coefficients. Strong
# enough to use as the sole basis for an investment? Certainly not.

# It is notable, however, that almost all of the cryptocurrencies have
# become more correlated with each other across the board.

correlation_heatmap(combined_df_2017.pct_change(), "Cryptocurrency Correlations in 2017")

# Why is this happening? Good question. I'm really not sure. The most
# immediate explanation that comes to mind is that hedge funds have
# recently begun publicly trading in crypto-currency
# markets[1][2]. These funds have vastly more capital to play with
# than the average trader, so if a fund is hedging their bets across
# multiple cryptocurrencies, and using similar trading strategies for
# each based on independent variables (say, the stock market), it
# could make sense that this trend of increasing correlations would
# emerge.



In [ ]: