https://www.kaggle.com/sudalairajkumar/cryptocurrencypricehistory/data
Using bitcoin_cash_price.csv
In [81]:
%%bash
ls -lh
In [190]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.style as style
style.use('ggplot')
%matplotlib inline
In [122]:
df = pd.read_csv('bitcoin_cash_price.csv', parse_dates=['Date'])
df.info()
In [123]:
df.head(3)
Out[123]:
In [124]:
# Market Cap has '-' values
df['Volume'] = df['Volume'].apply(lambda x: x.replace(',',''))
df['Volume'] = df['Volume'].astype('float64')
df.dtypes
Out[124]:
In [86]:
# df['Volume'] = pd.to_numeric(df['Volume'], errors='coerce')
# df['Market Cap'] = df['Market Cap'].apply(\
# lambda x: pd.to_numeric(x.replace(',','')))
# # , errors='coerce')
# df.head(3)
In [87]:
plt.figure(figsize=(20,10))
# df.boxplot()
df.index = df['Date']
plt.plot(df.index, df['Open'], c='r')
plt.plot(df.index, df['Close'], c='b')
plt.fill_between(df.index, df['Low'], df['High'], facecolor='blue', alpha=.2)
#plt.fill_between(df.index, 0, df['Low'], alpha=.8)
plt.legend()
plt.show()
In [188]:
print(plt.style.available)
In [202]:
# https://www.tradeciety.com/how-to-use-moving-averages/
styles = ['seaborn-darkgrid', 'dark_background', 'seaborn-whitegrid']
for style in styles: # plt.style.available
with plt.style.context((style)):
fig = plt.figure(figsize=(20,10))
# plt.rcParams['axes.facecolor'] = '#262626'
# plt.rcParams['figure.facecolor'] = '#262626'
plt.title(style)
# df.boxplot()
df.index = df['Date']
min_per = 1
# plt.plot(df.index, df['Close'].rolling(15, min_periods=min_per).mean(), '--', c='r', label='SMA(15)')
# setting alpha=0.3 is mutually exclusive of span
plt.plot(df.index, df['Close'].ewm(span=10, min_periods=min_per).mean(), '--', lw=2, c='#4f7ccb', label='EWM(10)')
plt.plot(df.index, df['Close'].ewm(span=21, min_periods=min_per).mean(), '--', lw=2, c='#fce651', label='EWM(21)')
plt.plot(df.index, df['Close'].ewm(span=50, min_periods=min_per).mean(), '--', lw=2, c='#f54aff', label='EWM(50)')
plt.plot(df.index, df['Close'], c='b', label='Close')
rolling_std = df['Close'].rolling(15, min_periods=min_per).std()
plt.fill_between(df.index, df['Close'] - 2*rolling_std, df['Close'] + 2*rolling_std, facecolor='#4f7ccb', alpha=.2, label='Bollinger bands')
#plt.fill_between(df.index, df['Low'], df['High'], facecolor='blue', alpha=.2)
#plt.fill_between(df.index, 0, df['Low'], alpha=.8)
plt.legend()
plt.show()
In [88]:
import matplotlib.ticker as mticker
from matplotlib.finance import candlestick_ohlc
import matplotlib.dates as mdates
import datetime as dt
In [89]:
# https://stackoverflow.com/questions/42437349/candlestick-plot-from-a-pandas-dataframe
def candlestick(df):
df_ohlc = df
#Converting dates column to float values
df_ohlc['Date'] = df_ohlc['Date'].map(mdates.date2num)
#Making plot
fig = plt.figure(figsize=(20,10))
ax1 = plt.subplot2grid((6,1), (0,0), rowspan=6, colspan=1)
#Converts raw mdate numbers to dates
ax1.xaxis_date()
plt.xlabel('Date')
#print(df_ohlc)
#Making candlestick plot
candlestick_ohlc(ax1,df_ohlc.values,width=1, colorup='g', colordown='k',alpha=0.75)
plt.ylabel('Price')
plt.legend()
plt.show()
candlestick(df)
In [99]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
import numpy as np
In [125]:
features = ['Open', 'High', 'Low', 'Volume']
X = np.array(df[features]).reshape(-1,len(features))
Y = np.array(df['Close']).reshape(-1,1)
print(type(X))
print(type(Y))
print(X[:3])
print(Y[:3])
In [126]:
x_train, x_test, y_train, y_test = train_test_split(\
X, Y, test_size=0.33, random_state=42)
for item in ['x_train', 'x_test', 'y_train', 'y_test']:
print('%s: %s' % (item, eval(item).shape))
print('%s: %s:'% (item, type(eval(item))))
print('%s: %s:'% (item, eval(item)[:5]))
In [127]:
model = linear_model.LinearRegression() # normalize=True
In [129]:
# https://stackoverflow.com/questions/16453644/regression-with-date-variable-using-scikit-learn
# model.fit(np.array(df.index).reshape(-1,1), df['Close'])
model.fit(x_train,y_train)
y_pred_train = model.predict(x_train)
y_pred_test = model.predict(x_test)
train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train))
test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
print(train_rmse)
print(test_rmse)
In [149]:
print(len(df.index[:len(x_train):]))
print(len(y_pred_train))
print(len(df.index[len(x_train):]))
print(len(y_pred_test))
In [151]:
plt.figure(figsize=(20,10))
# df.boxplot()
df.index = df['Date']
plt.plot(df.index, df['Open'], c='r')
plt.plot(df.index[:len(x_train)], y_pred_train, c='green')
plt.plot(df.index[len(x_train):], y_pred_test, c='orange')
plt.fill_between(df.index, df['Low'], df['High'], facecolor='blue', alpha=.2)
#plt.fill_between(df.index, 0, df['Low'], alpha=.8)
plt.legend()
plt.show()
In [ ]:
In [ ]:
In [ ]: