Analyzing CrypoCurrency Markets Using Python

https://blog.patricktriest.com/analyzing-cryptocurrencies-python/


In [18]:
import os
import pickle
from datetime import datetime

import numpy as np
import pandas as pd
import quandl

In [19]:
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)



In [20]:
def get_quandl_data(quandl_id):
    """Download and cache Quandl dataseries"""
    cache_path = '{}.pkl'.format(quandl_id).replace('/', '-')

    try:
        f = open(cache_path, 'rb')
        df = pickle.load(f)

        print('Loaded {} from cache'.format(quandl_id))
    except (OSError, IOError) as e:
        print('Downloading {} from Quandl'.format(quandl_id))

        df = quandl.get(quandl_id, returns="pandas")
        df.to_pickle(cache_path)
        print('Cached {} at {}'.format(quandl_id, cache_path))

    return df

In [21]:
# Pull Kraken BTC price exchange data
btc_usd_price_kraken = get_quandl_data('BCHARTS/KRAKENUSD')


Loaded BCHARTS/KRAKENUSD from cache

In [22]:
btc_usd_price_kraken.head()


Out[22]:
Open High Low Close Volume (BTC) Volume (Currency) Weighted Price
Date
2014-01-07 874.67040 892.06753 810.00000 810.00000 15.622378 13151.472844 841.835522
2014-01-08 810.00000 899.84281 788.00000 824.98287 19.182756 16097.329584 839.156269
2014-01-09 825.56345 870.00000 807.42084 841.86934 8.158335 6784.249982 831.572913
2014-01-10 839.99000 857.34056 817.00000 857.33056 8.024510 6780.220188 844.938794
2014-01-11 858.20000 918.05471 857.16554 899.84105 18.748285 16698.566929 890.671709

In [23]:
# Chart the BTC pricing data
btc_trace = go.Scatter(x=btc_usd_price_kraken.index, y=btc_usd_price_kraken['Weighted Price'])
py.iplot([btc_trace])



In [24]:
# Pull pricing data for 3 more BTC exchanges
exchanges = ['COINBASE','BITSTAMP','ITBIT']

exchange_data = {}

exchange_data['KRAKEN'] = btc_usd_price_kraken

for exchange in exchanges:
    exchange_code = 'BCHARTS/{}USD'.format(exchange)
    btc_exchange_df = get_quandl_data(exchange_code)
    exchange_data[exchange] = btc_exchange_df


Loaded BCHARTS/COINBASEUSD from cache
Loaded BCHARTS/BITSTAMPUSD from cache
Loaded BCHARTS/ITBITUSD from cache

In [25]:
def merge_dfs_on_column(dataframes, labels, col):
    """Merge a single column of each dataframe into a new combined dataframe"""

    series_dict = {}

    for index in range(len(dataframes)):
        series_dict[labels[index]] = dataframes[index][col]

    return pd.DataFrame(series_dict)

In [26]:
# Merge the BTC price dataseries' into a single dataframe
btc_usd_datasets = merge_dfs_on_column(list(exchange_data.values()), list(exchange_data.keys()), 'Weighted Price')

In [27]:
btc_usd_datasets.tail()


Out[27]:
BITSTAMP COINBASE ITBIT KRAKEN
Date
2017-08-20 4054.143713 4105.412784 4099.880702 4114.258059
2017-08-21 4007.736878 4021.619899 4006.994410 4044.508620
2017-08-22 3894.115013 3903.716282 3890.774067 3919.595235
2017-08-23 4146.846811 4151.982614 4144.662062 4178.565261
2017-08-24 4183.429411 4205.407606 4218.692423 4225.176290

In [28]:
def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    '''Generate a scatter plot of the entire dataframe'''
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= not seperate_y_axis,
            type=scale
        )
    )
    
    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale )
    
    visibility = 'visible'
    if initial_hide:
        visibility = 'legendonly'
        
    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index, 
            y=series, 
            name=label_arr[index],
            visible=visibility
        )
        
        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config    
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)

In [29]:
# Plot all of the BTC exchange prices
df_scatter(btc_usd_datasets, 'Bitcoin Price (USD) By Exchange')



In [30]:
# Remove "0" values
btc_usd_datasets.replace(0, np.nan, inplace=True)

In [31]:
# Plot the revised dataframe
df_scatter(btc_usd_datasets, 'Bitcoin Price (USD) By Exchange')



In [32]:
# Calculate the average BTC price as a new column
btc_usd_datasets['avg_btc_price_usd'] = btc_usd_datasets.mean(axis=1)

In [33]:
# Plot the average BTC price
btc_trace = go.Scatter(x=btc_usd_datasets.index, y=btc_usd_datasets['avg_btc_price_usd'])
py.iplot([btc_trace])



In [34]:
def get_json_data(json_url, cache_path):
    """Download and cache JSON data, return as a dataframe."""

    try:
        f = open(cache_path, 'rb')
        df = pickle.load(f)

        print('Loaded {} from cache'.format(json_url))
    except (OSError, IOError) as e:
        print('Downloading {}'.format(json_url))

        df = pd.read_json(json_url)
        df.to_pickle(cache_path)

        print('Cached {} at {}'.format(json_url, cache_path))

    return df

In [35]:
base_polo_url = 'https://poloniex.com/public?command=returnChartData&currencyPair={}&start={}&end={}&period={}'
start_date = datetime.strptime('2015-01-01', '%Y-%m-%d') # get data from the start of 2015
end_date = datetime.now() # up until today
pediod = 86400 # pull daily data (86,400 seconds per day)

def get_crypto_data(poloniex_pair):
    """Retrieve cryptocurrency data from poloniex"""
    json_url = base_polo_url.format(poloniex_pair, start_date.timestamp(), end_date.timestamp(), pediod)
    data_df = get_json_data(json_url, poloniex_pair)
    data_df = data_df.set_index('date')
    
    return data_df

In [36]:
altcoins = ['ETH','LTC','XRP','ETC','STR','DASH','SC','XMR','XEM']

altcoin_data = {}

for altcoin in altcoins:
    coinpair = 'BTC_{}'.format(altcoin)
    crypto_price_df = get_crypto_data(coinpair)
    altcoin_data[altcoin] = crypto_price_df


Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETH&start=1420088400.0&end=1503665647.290797&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETH&start=1420088400.0&end=1503665647.290797&period=86400 at BTC_ETH
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_LTC&start=1420088400.0&end=1503665647.290797&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_LTC&start=1420088400.0&end=1503665647.290797&period=86400 at BTC_LTC
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XRP&start=1420088400.0&end=1503665647.290797&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XRP&start=1420088400.0&end=1503665647.290797&period=86400 at BTC_XRP
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETC&start=1420088400.0&end=1503665647.290797&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETC&start=1420088400.0&end=1503665647.290797&period=86400 at BTC_ETC
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_STR&start=1420088400.0&end=1503665647.290797&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_STR&start=1420088400.0&end=1503665647.290797&period=86400 at BTC_STR
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_DASH&start=1420088400.0&end=1503665647.290797&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_DASH&start=1420088400.0&end=1503665647.290797&period=86400 at BTC_DASH
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_SC&start=1420088400.0&end=1503665647.290797&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_SC&start=1420088400.0&end=1503665647.290797&period=86400 at BTC_SC
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XMR&start=1420088400.0&end=1503665647.290797&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XMR&start=1420088400.0&end=1503665647.290797&period=86400 at BTC_XMR
Downloading https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XEM&start=1420088400.0&end=1503665647.290797&period=86400
Cached https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XEM&start=1420088400.0&end=1503665647.290797&period=86400 at BTC_XEM

In [37]:
altcoin_data['ETH'].tail()


Out[37]:
close high low open quoteVolume volume weightedAverage
date
2017-08-21 0.080500 0.087044 0.071717 0.073500 491598.852480 39587.121362 0.080527
2017-08-22 0.076602 0.086280 0.075364 0.080500 287084.551159 23005.807952 0.080136
2017-08-23 0.076590 0.079350 0.075000 0.076576 138264.051896 10637.593564 0.076937
2017-08-24 0.075400 0.079053 0.074154 0.076580 130920.193385 10010.117896 0.076460
2017-08-25 0.076500 0.077642 0.074355 0.075400 58454.009991 4447.422060 0.076084

In [38]:
# Calculate USD Price as a new column in each altcoin dataframe
for altcoin in altcoin_data.keys():
    altcoin_data[altcoin]['price_usd'] =  altcoin_data[altcoin]['weightedAverage'] * btc_usd_datasets['avg_btc_price_usd']

In [39]:
# Merge USD price of each altcoin into single dataframe 
combined_df = merge_dfs_on_column(list(altcoin_data.values()), list(altcoin_data.keys()), 'price_usd')

In [40]:
# Add BTC price to the dataframe
combined_df['BTC'] = btc_usd_datasets['avg_btc_price_usd']

In [41]:
# Chart all of the altocoin prices
df_scatter(combined_df, 'Cryptocurrency Prices (USD)', seperate_y_axis=False, y_axis_label='Coin Value (USD)', scale='log')



In [42]:
# Calculate the pearson correlation coefficients for cryptocurrencies in 2016
combined_df_2016 = combined_df[combined_df.index.year == 2016]
combined_df_2016.pct_change().corr(method='pearson')


Out[42]:
DASH ETC ETH LTC SC STR XEM XMR XRP BTC
DASH 1.000000 0.003992 0.122695 -0.012194 0.026602 0.058083 0.014571 0.121537 0.088657 -0.014040
ETC 0.003992 1.000000 -0.181991 -0.131079 -0.008066 -0.102654 -0.080938 -0.105898 -0.054095 -0.170538
ETH 0.122695 -0.181991 1.000000 -0.064652 0.169642 0.035093 0.043205 0.087216 0.085630 -0.006502
LTC -0.012194 -0.131079 -0.064652 1.000000 0.012253 0.113523 0.160667 0.129475 0.053712 0.750174
SC 0.026602 -0.008066 0.169642 0.012253 1.000000 0.143252 0.106153 0.047910 0.021098 0.035116
STR 0.058083 -0.102654 0.035093 0.113523 0.143252 1.000000 0.225132 0.027998 0.320116 0.079075
XEM 0.014571 -0.080938 0.043205 0.160667 0.106153 0.225132 1.000000 0.016438 0.101326 0.227674
XMR 0.121537 -0.105898 0.087216 0.129475 0.047910 0.027998 0.016438 1.000000 0.027649 0.127520
XRP 0.088657 -0.054095 0.085630 0.053712 0.021098 0.320116 0.101326 0.027649 1.000000 0.044161
BTC -0.014040 -0.170538 -0.006502 0.750174 0.035116 0.079075 0.227674 0.127520 0.044161 1.000000

In [43]:
def correlation_heatmap(df, title, absolute_bounds=True):
    """Plot a correlation heatmap for the entire dataframe"""
    
    heatmap = go.Heatmap(
        z=df.corr(method='pearson').as_matrix(),
        x=df.columns,
        y=df.columns,
        colorbar=dict(title='Pearson Coefficient'),
    )
    
    layout = go.Layout(title=title)
    
    if absolute_bounds:
        heatmap['zmax'] = 1.0
        heatmap['zmin'] = -1.0
        
    fig = go.Figure(data=[heatmap], layout=layout)
    py.iplot(fig)

In [44]:
correlation_heatmap(combined_df_2016.pct_change(), "Cryptocurrency Correlations in 2016")



In [45]:
combined_df_2017 = combined_df[combined_df.index.year == 2017]
combined_df_2017.pct_change().corr(method='pearson')


Out[45]:
DASH ETC ETH LTC SC STR XEM XMR XRP BTC
DASH 1.000000 0.381211 0.471583 0.258826 0.197098 0.159066 0.305409 0.416137 0.060957 0.359518
ETC 0.381211 1.000000 0.600522 0.426445 0.255327 0.152687 0.299537 0.426804 0.064743 0.472299
ETH 0.471583 0.600522 1.000000 0.285640 0.319296 0.227204 0.336460 0.566343 0.117618 0.415991
LTC 0.258826 0.426445 0.285640 1.000000 0.297366 0.339592 0.248405 0.398592 0.327426 0.359448
SC 0.197098 0.255327 0.319296 0.297366 1.000000 0.416651 0.295787 0.314624 0.238577 0.379214
STR 0.159066 0.152687 0.227204 0.339592 0.416651 1.000000 0.390966 0.322694 0.623963 0.182477
XEM 0.305409 0.299537 0.336460 0.248405 0.295787 0.390966 1.000000 0.315310 0.250597 0.368069
XMR 0.416137 0.426804 0.566343 0.398592 0.314624 0.322694 0.315310 1.000000 0.238206 0.428781
XRP 0.060957 0.064743 0.117618 0.327426 0.238577 0.623963 0.250597 0.238206 1.000000 0.169443
BTC 0.359518 0.472299 0.415991 0.359448 0.379214 0.182477 0.368069 0.428781 0.169443 1.000000

In [46]:
correlation_heatmap(combined_df_2017.pct_change(), "Cryptocurrency Correlations in 2017")



In [ ]: