In [1]:
import numpy as np # Linear Alg
import pandas as pd # CSV file I/O & data processing
# Visualization
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import warnings
from matplotlib import style
from matplotlib.finance import candlestick_ohlc
warnings.filterwarnings("ignore")
# style.use('ggplot')
%matplotlib inline
plt.rcParams['figure.figsize'] = (12.0, 8.0)
from subprocess import check_output
We are using the Cryptocurrency Historical Prices dataset from Kaggle.
In [2]:
input_dir = '../input'
print('File List: \n')
print(check_output(["ls", input_dir]).decode("utf8"))
In [3]:
currencies = {}
currencies['bitcoin'] = pd.read_csv('{}/bitcoin_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['bitconnect'] = pd.read_csv('{}/bitconnect_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['dash'] = pd.read_csv('{}/dash_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['ethereum'] = pd.read_csv('{}/ethereum_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['iota'] = pd.read_csv('{}/iota_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['litecoin'] = pd.read_csv('{}/litecoin_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['monero'] = pd.read_csv('{}/monero_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['nem'] = pd.read_csv('{}/nem_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['neo'] = pd.read_csv('{}/neo_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['numeraire'] = pd.read_csv('{}/numeraire_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['omisego'] = pd.read_csv('{}/omisego_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['qtum'] = pd.read_csv('{}/qtum_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['ripple'] = pd.read_csv('{}/ripple_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['stratis'] = pd.read_csv('{}/stratis_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
currencies['waves'] = pd.read_csv('{}/waves_price.csv'.format(input_dir), parse_dates=['Date'], index_col=0)
# currencies['bitcoin_cash'] = pd.read_csv('{}/bitcoin_cash_price.csv'.format(input_dir))
# currencies['ethereum_data'] = pd.read_csv('{}/ethereum_dataset.csv'.format(input_dir))
# currencies['bitcoin_data'] = pd.read_csv('{}/bitcoin_dataset.csv'.format(input_dir))
# currencies['ethereum_classic'] = pd.read_csv('{}/ethereum_classic_price.csv'.format(input_dir))
print(len(currencies))
In [4]:
currencies['bitcoin'].head()
Out[4]:
In [ ]:
print('{} \n'.format(currencies['bitcoin_cash'].head()))
print('{} \n'.format(currencies['ethereum_data'].head()))
print('{} \n'.format(currencies['bitcoin_data'].head()))
print('{} \n'.format(currencies['ethereum_classic'].head()))
In [5]:
for c in currencies:
print('====================={}============================'.format(c))
print('Date of newest data: {}'.format(currencies[c].index[0]))
print('Date of oldest data: {}\n'.format(currencies[c].index[-1]))
In [6]:
# User select currency of interest for visualization
coin_type = 'bitcoin'
coin_feat = ['Open', 'Close']
currencies[coin_type].head()
Out[6]:
In [7]:
## Need to implement type validation checking mech
plt.plot(currencies[coin_type][coin_feat])
plt.legend(bbox_to_anchor=(1.01, 1))
plt.xlabel('Time(Yr-M)')
plt.ylabel('Value(USD)')
plt.title('{} Price - {}'.format(coin_feat, coin_type))
plt.show()
In [8]:
# Candlestick Graph Visualization
ohlc = currencies[coin_type][coin_feat].resample('10D').ohlc()
ohlc.reset_index(inplace=True)
ohlc['Date'] = ohlc['Date'].map(mdates.date2num)
fig, ax = plt.subplots()
candlestick_ohlc(ax, ohlc.values, width=2, colorup='g')
ax.xaxis_date()
plt.title('Candlestick Chart - {}'.format(coin_type))
plt.xlabel('Time(Yr-M)')
plt.ylabel('Value(USD)')
plt.legend()
plt.show()
In [9]:
ohlc = ['Open', 'High', 'Low', 'Close']
for feat in ohlc:
plt.plot(currencies[coin_type][feat], label=feat)
# plt.plot(currencies[coin_type]['Close'], label=coin_type)
plt.legend(bbox_to_anchor=(1.01, 1))
plt.xlabel('Time(Yr-M)')
plt.ylabel('Value(USD)')
plt.show()
In [10]:
files_to_use = [
'bitcoin_price.csv',
'bitconnect_price.csv',
'dash_price.csv',
'ethereum_price.csv',
'iota_price.csv',
'litecoin_price.csv',
'monero_price.csv',
'nem_price.csv',
'neo_price.csv',
'numeraire_price.csv',
'omisego_price.csv',
'qtum_price.csv',
'ripple_price.csv',
'stratis_price.csv',
'waves_price.csv']
cols_to_use = []
for ind, file_name in enumerate(files_to_use):
currency_name = file_name.split("_")[0]
if ind == 0:
df = pd.read_csv("../input/"+file_name, usecols=["Date", "Close"], parse_dates=["Date"])
df.columns = ["Date", currency_name]
else:
temp_df = pd.read_csv("../input/"+file_name, usecols=["Date", "Close"], parse_dates=["Date"])
temp_df.columns = ["Date", currency_name]
df = pd.merge(df, temp_df, on="Date")
cols_to_use.append(currency_name)
df.head()
temp_df = df[cols_to_use]
corrmat = temp_df.corr(method='spearman')
fig, ax = plt.subplots(figsize=(10, 10))
sns.heatmap(corrmat, vmax=1., square=True)
plt.title("Cryptocurrency correlation map", fontsize=15)
plt.show()
In [11]:
# Unnormalized data
for c in currencies:
plt.plot(currencies[c]['Close'], label=c)
plt.legend(bbox_to_anchor=(1.01, 1))
plt.xlabel('Time(Yr-M)')
plt.ylabel('Value(USD)')
plt.show()
In [12]:
for c in currencies:
plt.plot(currencies[c]['Close'].iloc[:365], label=c)
plt.legend(bbox_to_anchor=(1.01, 1))
plt.xlabel('Time(Yr-M)')
plt.ylabel('Value(USD)')
plt.show()
In [ ]:
In [ ]: