notebook.community

Edit and run



In [1]:

    
import pandas as pd
import matplotlib.pyplot as plt



In [2]:

    
train_set_path = r'C:\Users\dmpas\thesis\data\network\bitcoin\estimates\train_set.csv'
test_set_path = r'C:\Users\dmpas\thesis\data\network\bitcoin\estimates\test_set.csv'



In [3]:

    
def ma(price):
    if price < -100:
        return 5
    if price < -50:
        return 4
    if price < 0:
        return 3
    if price < 50:
        return 2
    return 1



In [4]:

    
train_df = pd.read_csv(train_set_path, index_col=0)
train_df.columns = ['Author', 'bc', 'cc', 'chibs', 'dc', 'ec', 'hm', 'is', 'lr', 'price']
train_df['price_category'] = train_df.price.map(ma)

print(len(train_df))



In [5]:

    
train_df.head()









    Out[5]:







  
    
      
      Author
      bc
      cc
      chibs
      dc
      ec
      hm
      is
      lr
      price
      price_category
    
  
  
    
      0
      177412
      0.634528
      1.239146
      -18.257680
      1.438233
      0.372907
      -1325.966306
      -22076.378149
      -27003.578634
      6.9
      2
    
    
      1
      53511
      0.262976
      1.225784
      -101.154828
      1.062802
      0.305459
      -498.691400
      -7557.808217
      -7461.446531
      6.9
      2
    
    
      2
      156757
      0.091405
      1.200112
      -3.394149
      0.757074
      0.209725
      -237.573484
      -3396.268925
      -4125.309848
      6.9
      2
    
    
      3
      100398
      0.098763
      1.190790
      -26.896536
      0.670117
      0.167796
      -252.269969
      -3612.483019
      -3672.512010
      6.9
      2
    
    
      4
      166687
      0.065896
      1.192045
      -8.781838
      0.503796
      0.157702
      -117.802576
      -1858.142623
      -1775.790037
      6.9
      2



In [6]:

    
test_df = pd.read_csv(test_set_path, index_col=0)
test_df.columns = ['Author', 'bc', 'cc', 'chibs', 'dc', 'ec', 'hm', 'is', 'lr', 'price']
test_df['price_category'] = train_df.price.map(ma)

print(len(test_df))



In [7]:

    
test_df.head()









    Out[7]:







  
    
      
      Author
      bc
      cc
      chibs
      dc
      ec
      hm
      is
      lr
      price
      price_category
    
  
  
    
      0
      99508
      0.022297
      1.169596
      -30.870187
      0.502415
      0.150627
      -222.525649
      -2995.403068
      -2606.443962
      6.9
      2
    
    
      1
      177268
      0.016294
      1.159342
      -9.926871
      0.429262
      0.128385
      -73.944996
      -1053.014603
      -954.976211
      6.9
      2
    
    
      2
      50584
      0.017190
      1.161668
      16.082362
      0.324362
      0.094380
      -84.019078
      -1102.406890
      -1055.546625
      6.9
      2
    
    
      3
      153645
      0.008935
      1.117261
      -12.029111
      0.314010
      0.091849
      -86.938191
      -981.997684
      -975.776543
      6.9
      2
    
    
      4
      250893
      0.004391
      1.115628
      -20.254224
      0.287785
      0.085615
      -34.020792
      -442.435106
      -418.764624
      6.9
      2



In [8]:

    
from sklearn import preprocessing



In [9]:

    
train_set_X = preprocessing.scale(train_df.loc[:, ['bc', 'cc', 'chibs', 'dc', 'ec', 'hm', 'is', 'lr']])
train_set_Y = train_df.loc[:, ['price_category']].values.ravel()



In [10]:

    
test_set_X = preprocessing.scale(test_df.loc[:, ['bc', 'cc', 'chibs', 'dc', 'ec', 'hm', 'is', 'lr']])
test_set_Y = test_df.loc[:, ['price_category']].values.ravel()



In [11]:

    
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC



In [12]:

    
n_estimators = 8



In [13]:

    
svc1 = OneVsRestClassifier(BaggingClassifier(SVC(kernel='linear'), max_samples=1.0/50, n_estimators=n_estimators))
svc1.fit(train_set_X, train_set_Y)

y_pred_1 = svc1.predict(test_set_X)
print(confusion_matrix(test_set_Y, y_pred_1))









    



[[ 7885 18471   371  4269   144]
 [ 4817 12209   291  2016   118]
 [ 2485  7860    77  1930    34]
 [    0     0     0     0     0]
 [ 6564 14908   305  3663   104]]



In [ ]:



In [14]:

    
svc2 = OneVsRestClassifier(BaggingClassifier(SVC(kernel='poly', degree=4), max_samples=1.0/50, n_estimators=n_estimators))
svc2.fit(train_set_X, train_set_Y)
y_pred_2 = svc2.predict(test_set_X)

print(confusion_matrix(test_set_Y, y_pred_2))









    



[[18453  3784  2822  2297  3784]
 [11236  2629  1793  1355  2438]
 [ 7944  1293   879  1028  1242]
 [    0     0     0     0     0]
 [16216  2626  1907  1833  2962]]



In [ ]:



In [15]:

    
svc4 = OneVsRestClassifier(BaggingClassifier(SVC(kernel='rbf'), max_samples=1.0/50, n_estimators=n_estimators))
svc4.fit(train_set_X, train_set_Y)
y_pred_4 = svc4.predict(test_set_X)

print(confusion_matrix(test_set_Y, y_pred_4))









    



[[14610  5656  2322  2365  6187]
 [ 9854  3647  1438  1193  3319]
 [ 5580  1684   965  1338  2819]
 [    0     0     0     0     0]
 [12206  3547  1881  2302  5608]]



In [ ]:



In [16]:

    
svc5 = OneVsRestClassifier(BaggingClassifier(SVC(kernel='sigmoid'), max_samples=1.0/50, n_estimators=n_estimators))
svc5.fit(train_set_X, train_set_Y)
y_pred_5 = svc5.predict(test_set_X)

print(confusion_matrix(test_set_Y, y_pred_5))









    



[[26216  1988     0  2827   109]
 [16901  1239     0  1263    48]
 [10551   766     0  1018    51]
 [    0     0     0     0     0]
 [21397  1648     0  2406    93]]



In [ ]:



In [17]:

    
rfc = RandomForestClassifier(min_samples_leaf=20)
rfc.fit(train_set_X, train_set_Y)

y_pred_6 = rfc.predict(test_set_X)
print(confusion_matrix(test_set_Y, y_pred_6))









    



[[13250  7652  5042  2703  2493]
 [ 9090  5503  1906  2203   749]
 [ 5503  1850  1091  2625  1317]
 [    0     0     0     0     0]
 [15069  4226  1480  2500  2269]]



In [ ]:



In [ ]:

	Author	bc	cc	chibs	dc	ec	hm	is	lr	price	price_category
0	177412	0.634528	1.239146	-18.257680	1.438233	0.372907	-1325.966306	-22076.378149	-27003.578634	6.9	2
1	53511	0.262976	1.225784	-101.154828	1.062802	0.305459	-498.691400	-7557.808217	-7461.446531	6.9	2
2	156757	0.091405	1.200112	-3.394149	0.757074	0.209725	-237.573484	-3396.268925	-4125.309848	6.9	2
3	100398	0.098763	1.190790	-26.896536	0.670117	0.167796	-252.269969	-3612.483019	-3672.512010	6.9	2
4	166687	0.065896	1.192045	-8.781838	0.503796	0.157702	-117.802576	-1858.142623	-1775.790037	6.9	2

	Author	bc	cc	chibs	dc	ec	hm	is	lr	price	price_category
0	99508	0.022297	1.169596	-30.870187	0.502415	0.150627	-222.525649	-2995.403068	-2606.443962	6.9	2
1	177268	0.016294	1.159342	-9.926871	0.429262	0.128385	-73.944996	-1053.014603	-954.976211	6.9	2
2	50584	0.017190	1.161668	16.082362	0.324362	0.094380	-84.019078	-1102.406890	-1055.546625	6.9	2
3	153645	0.008935	1.117261	-12.029111	0.314010	0.091849	-86.938191	-981.997684	-975.776543	6.9	2
4	250893	0.004391	1.115628	-20.254224	0.287785	0.085615	-34.020792	-442.435106	-418.764624	6.9	2