Data Preparation


In [56]:
import numpy as np
import pandas as pd
import os

btc = pd.read_csv('/Users/oj/Desktop/PythonProjectCrypto/CryptoData/bitcoin_price.csv')
eth = pd.read_csv('/Users/oj/Desktop/PythonProjectCrypto/CryptoData/ethereum_price.csv')
ripple = pd.read_csv('/Users/oj/Desktop/PythonProjectCrypto/CryptoData/ripple_price.csv')
monero = pd.read_csv('/Users/oj/Desktop/PythonProjectCrypto/CryptoData/monero_price.csv')
dash = pd.read_csv('/Users/oj/Desktop/PythonProjectCrypto/CryptoData/dash_price.csv')
litecoin = pd.read_csv('/Users/oj/Desktop/PythonProjectCrypto/CryptoData/litecoin_price.csv')

In [57]:
def req_data(coindb):
    coindb = coindb.loc[:,['Date','Close']] #Only working with Closing values for this study
    coindb['Date'] = pd.to_datetime(coindb['Date'])
    
    return coindb
btc_req_data = pd.DataFrame(req_data(btc))
eth_req_data = pd.DataFrame(req_data(eth))
ripple_req_data = pd.DataFrame(req_data(ripple))
monero_req_data = pd.DataFrame(req_data(monero))
dash_req_data = pd.DataFrame(req_data(dash))
litecoin_req_data = pd.DataFrame(req_data(litecoin))

btc_req_data
#eth_req_data
#ripple_req_data
#monero_req_data
#dash_req_data
#litecoin_req_data


Out[57]:
Date Close
0 2017-09-17 3582.88
1 2017-09-16 3625.04
2 2017-09-15 3637.52
3 2017-09-14 3154.95
4 2017-09-13 3882.59
5 2017-09-12 4130.81
6 2017-09-11 4161.27
7 2017-09-10 4122.94
8 2017-09-09 4226.06
9 2017-09-08 4228.75
10 2017-09-07 4599.88
11 2017-09-06 4597.12
12 2017-09-05 4376.53
13 2017-09-04 4236.31
14 2017-09-03 4582.96
15 2017-09-02 4578.77
16 2017-09-01 4892.01
17 2017-08-31 4703.39
18 2017-08-30 4565.30
19 2017-08-29 4579.02
20 2017-08-28 4382.66
21 2017-08-27 4382.88
22 2017-08-26 4352.40
23 2017-08-25 4371.60
24 2017-08-24 4334.68
25 2017-08-23 4151.52
26 2017-08-22 4100.52
27 2017-08-21 4001.74
28 2017-08-20 4087.66
29 2017-08-19 4193.70
... ... ...
1574 2013-05-27 129.75
1575 2013-05-26 133.48
1576 2013-05-25 131.98
1577 2013-05-24 133.20
1578 2013-05-23 126.70
1579 2013-05-22 123.89
1580 2013-05-21 122.88
1581 2013-05-20 122.00
1582 2013-05-19 121.99
1583 2013-05-18 123.50
1584 2013-05-17 123.02
1585 2013-05-16 118.76
1586 2013-05-15 114.22
1587 2013-05-14 111.50
1588 2013-05-13 117.98
1589 2013-05-12 115.00
1590 2013-05-11 115.24
1591 2013-05-10 117.20
1592 2013-05-09 112.67
1593 2013-05-08 113.57
1594 2013-05-07 111.50
1595 2013-05-06 112.30
1596 2013-05-05 115.91
1597 2013-05-04 112.50
1598 2013-05-03 97.75
1599 2013-05-02 105.21
1600 2013-05-01 116.99
1601 2013-04-30 139.00
1602 2013-04-29 144.54
1603 2013-04-28 134.21

1604 rows × 2 columns

Exploration/Plotting


In [58]:
# Historical prices 
import matplotlib.pyplot as plt

def plot_data(coindb, name):
    plt.figure(figsize=(16,6))
    x = coindb['Date'] 
    y = coindb['Close']
    plt.xlabel('Timeline')
    plt.ylabel(name)                    
    plt.plot(x,y,'--')
    _ = plt.xticks(rotation=45)
    plt.show()

plot_data(btc_req_data, 'Bitcoin Closing Values')
plot_data(eth_req_data, 'Ethereum Closing Values')
plot_data(ripple_req_data, 'Ripple Closing Values')
plot_data(monero_req_data, 'Monero Closing Values')
plot_data(dash_req_data, 'Dash Closing Values')
plot_data(litecoin_req_data, 'Litecoin Closing Values')


OBSERVATIONS:

1. For the given time frame, it is observed that there has been a sudden surge in
the closing prices of the cryptocurrencies

In [59]:
# import seaborn as sns
New_dataSet = pd.DataFrame()
New_dataSet['Bitcoin Closing Price'] = btc_req_data['Close']
New_dataSet['Ethereum Closing Price'] = eth_req_data['Close']
New_dataSet['Ripple Closing Price'] = ripple_req_data['Close']
New_dataSet['Monero Closing Price'] = monero_req_data['Close']
New_dataSet['Dash Closing Price'] = dash_req_data['Close']
New_dataSet['Litecoin Closing Price'] = litecoin_req_data['Close']

New_dataSet.fillna(0, inplace=True)

New_dataSet


Out[59]:
Bitcoin Closing Price Ethereum Closing Price Ripple Closing Price Monero Closing Price Dash Closing Price Litecoin Closing Price
0 3582.88 251.75 0.178393 93.74 313.84 48.49
1 3625.04 246.52 0.179082 95.29 298.86 48.26
2 3637.52 250.46 0.181358 99.85 284.36 48.21
3 3154.95 213.91 0.164167 83.04 236.24 41.58
4 3882.59 277.11 0.200778 111.33 301.29 61.73
5 4130.81 291.46 0.209303 111.89 325.97 64.23
6 4161.27 294.53 0.214441 112.75 318.40 66.04
7 4122.94 288.75 0.213031 112.33 320.11 61.61
8 4226.06 294.40 0.209995 116.27 323.14 66.01
9 4228.75 296.50 0.211518 118.04 333.21 67.79
10 4599.88 329.43 0.223640 120.84 343.91 78.48
11 4597.12 334.34 0.226186 121.63 347.17 80.11
12 4376.53 312.99 0.215189 118.82 327.23 71.29
13 4236.31 295.17 0.204968 106.17 316.13 65.21
14 4582.96 347.48 0.228811 126.01 356.39 76.84
15 4578.77 348.98 0.226669 124.80 350.17 79.02
16 4892.01 387.74 0.248479 141.20 393.35 86.04
17 4703.39 383.04 0.255630 140.41 378.32 71.06
18 4565.30 378.49 0.228006 132.38 368.34 64.17
19 4579.02 370.67 0.218564 133.03 359.55 63.17
20 4382.66 347.75 0.224508 145.40 356.78 62.36
21 4382.88 347.89 0.202995 130.68 362.89 61.16
22 4352.40 333.88 0.212577 138.05 399.85 51.75
23 4371.60 331.92 0.217828 108.26 315.89 51.18
24 4334.68 325.61 0.218958 86.29 315.37 50.19
25 4151.52 317.52 0.246827 90.33 292.57 53.25
26 4100.52 314.79 0.239286 91.16 291.50 46.73
27 4001.74 321.59 0.191918 77.82 279.58 47.94
28 4087.66 301.43 0.158735 54.71 294.20 46.23
29 4193.70 297.47 0.155057 55.96 288.70 45.48
... ... ... ... ... ... ...
1574 129.75 0.00 0.000000 0.00 0.00 3.10
1575 133.48 0.00 0.000000 0.00 0.00 3.25
1576 131.98 0.00 0.000000 0.00 0.00 3.12
1577 133.20 0.00 0.000000 0.00 0.00 3.18
1578 126.70 0.00 0.000000 0.00 0.00 3.18
1579 123.89 0.00 0.000000 0.00 0.00 3.12
1580 122.88 0.00 0.000000 0.00 0.00 3.09
1581 122.00 0.00 0.000000 0.00 0.00 3.18
1582 121.99 0.00 0.000000 0.00 0.00 3.30
1583 123.50 0.00 0.000000 0.00 0.00 3.29
1584 123.02 0.00 0.000000 0.00 0.00 3.19
1585 118.76 0.00 0.000000 0.00 0.00 2.92
1586 114.22 0.00 0.000000 0.00 0.00 2.94
1587 111.50 0.00 0.000000 0.00 0.00 2.82
1588 117.98 0.00 0.000000 0.00 0.00 3.28
1589 115.00 0.00 0.000000 0.00 0.00 3.27
1590 115.24 0.00 0.000000 0.00 0.00 3.35
1591 117.20 0.00 0.000000 0.00 0.00 3.44
1592 112.67 0.00 0.000000 0.00 0.00 3.42
1593 113.57 0.00 0.000000 0.00 0.00 3.41
1594 111.50 0.00 0.000000 0.00 0.00 3.33
1595 112.30 0.00 0.000000 0.00 0.00 3.37
1596 115.91 0.00 0.000000 0.00 0.00 3.59
1597 112.50 0.00 0.000000 0.00 0.00 3.48
1598 97.75 0.00 0.000000 0.00 0.00 3.04
1599 105.21 0.00 0.000000 0.00 0.00 3.37
1600 116.99 0.00 0.000000 0.00 0.00 3.80
1601 139.00 0.00 0.000000 0.00 0.00 4.30
1602 144.54 0.00 0.000000 0.00 0.00 4.38
1603 134.21 0.00 0.000000 0.00 0.00 4.35

1604 rows × 6 columns


In [60]:
New_dataSet = New_dataSet.iloc[::-1]
mx = New_dataSet.plot(figsize=(20,16))
mx.set_xlabel("Time")
mx.set_ylabel("Closing Prices")

plt.show()


# NORMALIZED PLOT

In [61]:
Norm_New_dataSet = pd.DataFrame()
Norm_New_dataSet
Norm_New_dataSet['Norm Bitcoin Closing Price'] = New_dataSet['Bitcoin Closing Price']/New_dataSet['Bitcoin Closing Price'].mean()
Norm_New_dataSet['Norm Ethereum Closing Price'] = New_dataSet['Ethereum Closing Price']/New_dataSet['Ethereum Closing Price'].mean()
Norm_New_dataSet['Norm Ripple Closing Price'] = New_dataSet['Ripple Closing Price']/New_dataSet['Ripple Closing Price'].mean()
Norm_New_dataSet['Norm Monero Closing Price'] = New_dataSet['Monero Closing Price']/New_dataSet['Monero Closing Price'].mean()
Norm_New_dataSet['Norm Dash Closing Price'] = New_dataSet['Dash Closing Price']/New_dataSet['Dash Closing Price'].mean()
Norm_New_dataSet['Norm Litecoin Closing Price'] = New_dataSet['Litecoin Closing Price']/New_dataSet['Litecoin Closing Price'].mean()

Norm_New_dataSet = Norm_New_dataSet[::-1]

Norm_New_dataSet


Out[61]:
Norm Bitcoin Closing Price Norm Ethereum Closing Price Norm Ripple Closing Price Norm Monero Closing Price Norm Dash Closing Price Norm Litecoin Closing Price
0 5.224473 9.698012 6.481849 12.830468 13.298869 5.446033
1 5.285950 9.496540 6.506884 13.042621 12.664096 5.420201
2 5.304148 9.648319 6.589582 13.666761 12.049663 5.414586
3 4.600476 8.240325 5.964953 11.365927 10.010594 4.669954
4 5.661504 10.674940 7.295201 15.238062 12.767066 6.933051
5 6.023452 11.227737 7.604954 15.314711 13.812873 7.213832
6 6.067869 11.346000 7.791641 15.432422 13.492097 7.417118
7 6.011977 11.123341 7.740409 15.374935 13.564558 6.919573
8 6.162344 11.340992 7.630097 15.914215 13.692953 7.413748
9 6.166266 11.421890 7.685435 16.156480 14.119666 7.613665
10 6.707440 12.690432 8.125884 16.539724 14.573075 8.814285
11 6.703415 12.879577 8.218392 16.647854 14.711217 8.997355
12 6.381756 12.057124 7.818820 16.263241 13.866266 8.006758
13 6.177290 11.370655 7.447443 14.531798 13.395907 7.323898
14 6.682767 13.385761 8.313770 17.247357 15.101911 8.630093
15 6.676657 13.443545 8.235942 17.081741 14.838341 8.874934
16 7.133417 14.936673 9.028401 19.326457 16.668079 9.663368
17 6.858375 14.755617 9.288230 19.218327 16.031188 7.980926
18 6.657016 14.580341 8.284521 18.119237 15.608288 7.207093
19 6.677022 14.279095 7.941449 18.208205 15.235815 7.094781
20 6.390694 13.396162 8.157422 19.901323 15.118437 7.003808
21 6.391015 13.401555 7.375755 17.886553 15.377347 6.869033
22 6.346570 12.861857 7.723914 18.895307 16.943515 5.812172
23 6.374567 12.786353 7.914707 14.817863 13.385737 5.748154
24 6.320731 12.543276 7.955765 11.810764 13.363702 5.636965
25 6.053651 12.231630 8.968376 12.363731 12.397559 5.980641
26 5.979284 12.126464 8.694376 12.477336 12.352218 5.248363
27 5.835246 12.388416 6.973276 10.651451 11.847112 5.384261
28 5.960532 11.611805 5.767583 7.488318 12.466630 5.192207
29 6.115157 11.459256 5.633944 7.659409 12.233569 5.107973
... ... ... ... ... ... ...
1574 0.189198 0.000000 0.000000 0.000000 0.000000 0.348169
1575 0.194637 0.000000 0.000000 0.000000 0.000000 0.365016
1576 0.192450 0.000000 0.000000 0.000000 0.000000 0.350415
1577 0.194229 0.000000 0.000000 0.000000 0.000000 0.357154
1578 0.184751 0.000000 0.000000 0.000000 0.000000 0.357154
1579 0.180654 0.000000 0.000000 0.000000 0.000000 0.350415
1580 0.179181 0.000000 0.000000 0.000000 0.000000 0.347046
1581 0.177898 0.000000 0.000000 0.000000 0.000000 0.357154
1582 0.177883 0.000000 0.000000 0.000000 0.000000 0.370631
1583 0.180085 0.000000 0.000000 0.000000 0.000000 0.369508
1584 0.179385 0.000000 0.000000 0.000000 0.000000 0.358277
1585 0.173173 0.000000 0.000000 0.000000 0.000000 0.327953
1586 0.166553 0.000000 0.000000 0.000000 0.000000 0.330199
1587 0.162587 0.000000 0.000000 0.000000 0.000000 0.316721
1588 0.172036 0.000000 0.000000 0.000000 0.000000 0.368385
1589 0.167690 0.000000 0.000000 0.000000 0.000000 0.367262
1590 0.168040 0.000000 0.000000 0.000000 0.000000 0.376247
1591 0.170898 0.000000 0.000000 0.000000 0.000000 0.386355
1592 0.164293 0.000000 0.000000 0.000000 0.000000 0.384109
1593 0.165605 0.000000 0.000000 0.000000 0.000000 0.382986
1594 0.162587 0.000000 0.000000 0.000000 0.000000 0.374001
1595 0.163753 0.000000 0.000000 0.000000 0.000000 0.378493
1596 0.169017 0.000000 0.000000 0.000000 0.000000 0.403202
1597 0.164045 0.000000 0.000000 0.000000 0.000000 0.390848
1598 0.142537 0.000000 0.000000 0.000000 0.000000 0.341430
1599 0.153415 0.000000 0.000000 0.000000 0.000000 0.378493
1600 0.170592 0.000000 0.000000 0.000000 0.000000 0.426788
1601 0.202687 0.000000 0.000000 0.000000 0.000000 0.482944
1602 0.210765 0.000000 0.000000 0.000000 0.000000 0.491929
1603 0.195702 0.000000 0.000000 0.000000 0.000000 0.488559

1604 rows × 6 columns


In [62]:
ax = Norm_New_dataSet[::-1].plot(figsize=(20,16))
ax.set_xlabel("Time")
ax.set_ylabel("Normalized Closing values")
plt.show()


Stability Check


In [63]:
import statistics
lst=[]
coindbdict = {'Bitcoin': btc_req_data, 'Ethereum': eth_req_data, 'Ripple': ripple_req_data, 'Monero':monero_req_data,'Dash':dash_req_data,'Litecoin':litecoin_req_data}

def Var_Coeff(coindb, name):
        mean = coindb['Close'].mean()
        #print(mean)
        std_dev = statistics.stdev(coindb['Close'])
        coeff_var = round((std_dev/mean),5)
        return coeff_var, name

for key in coindbdict:
    lst.append(Var_Coeff(coindbdict[key], key))
lst.sort()

# print(lst)
print("Most Stable coin / Least volatile crypto-currency, for the given dataset is:",min(lst)[1].upper(),"with a variance coeff of",min(lst)[0])
print("Least Stable coin / Most volatile crypto-currency, for the given dataset is:",max(lst)[1].upper(),"with a variance coeff of",max(lst)[0])


Most Stable coin / Least volatile crypto-currency, for the given dataset is: BITCOIN with a variance coeff of 1.14076
Least Stable coin / Most volatile crypto-currency, for the given dataset is: DASH with a variance coeff of 2.24313

Correlation


In [64]:
New_dataSet_Corr = pd.DataFrame()
New_dataSet_Corr['Bitcoin Closing Price'] = btc_req_data['Close']
New_dataSet_Corr['Ethereum Closing Price'] = eth_req_data['Close']
New_dataSet_Corr['Ripple Closing Price'] = ripple_req_data['Close']
New_dataSet_Corr['Monero Closing Price'] = monero_req_data['Close']
New_dataSet_Corr['Dash Closing Price'] = dash_req_data['Close']
New_dataSet_Corr['Litecoin Closing Price'] = litecoin_req_data['Close']
#New_dataSet
New_dataSet_Corr.fillna(0, inplace=True)

x = New_dataSet_Corr.pct_change().corr(method='pearson')
x


Out[64]:
Bitcoin Closing Price Ethereum Closing Price Ripple Closing Price Monero Closing Price Dash Closing Price Litecoin Closing Price
Bitcoin Closing Price 1.000000 0.213053 0.321753 0.374451 0.373705 0.666596
Ethereum Closing Price 0.213053 1.000000 0.051813 0.191895 0.160224 0.176788
Ripple Closing Price 0.321753 0.051813 1.000000 0.017415 0.076917 0.281097
Monero Closing Price 0.374451 0.191895 0.017415 1.000000 0.310150 0.254751
Dash Closing Price 0.373705 0.160224 0.076917 0.310150 1.000000 0.259650
Litecoin Closing Price 0.666596 0.176788 0.281097 0.254751 0.259650 1.000000

In [65]:
plt.subplots(figsize=(10,10))
plt.xticks(range(6), x.columns.values, rotation='vertical')
plt.yticks(range(6), x.columns.values)
plt.xlabel('CryptoCurrency Correlation')
heat = plt.imshow(x, cmap="hot")#, figsize=(20,16))
plt.colorbar(heat)

plt.show()


Regression models & Prediction


In [77]:
import numpy as np 
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd


def TestData(coindb):
    X = coindb['Date'].reset_index(drop=True)
    Y = coindb['Close'].reset_index(drop=True)
    #print(X)
    #print(Y)
    m=(len(coindb)//4)*3
    n=len(coindb)//4
    X=X.values.reshape(len(X),1)
    Y=Y.values.reshape(len(Y),1)
 
    # Split the data into training/testing sets
    X_train = X[:m]
    X_test = X[m:len(X)]
 
    # Split the targets into training/testing sets
    Y_train = Y[:m]
    Y_test = Y[m:len(X)]

    return X_test,X_train,Y_test,Y_train

In [105]:
def linear_reg(df):
    df = df[::-1]
    X_test,X_train,Y_test,Y_train=TestData(df)
    plt.scatter(X_test, Y_test, color='black',s=3)
    plt.title('Test Data')
    plt.xlabel('Date')
    plt.ylabel('Close Price')
    plt.xticks(())
    plt.yticks(())
 
   
    regr = linear_model.LinearRegression()
    # Train the model using the training sets
    regr.fit(X_train, Y_train)
    
    plt.plot(X_test, regr.predict(Y_test), color='red',linewidth=3)

    plt.show()
    
linear_reg(btc_req_data)