In [1]:
import pandas as pd
import matplotlib.pyplot as plt
In [2]:
train_set_path = r'C:\Users\dmpas\thesis\data\network\bitcoin\estimates\train_set.csv'
test_set_path = r'C:\Users\dmpas\thesis\data\network\bitcoin\estimates\test_set.csv'
In [3]:
def ma(price):
if price < -100:
return 5
if price < -50:
return 4
if price < 0:
return 3
if price < 50:
return 2
return 1
In [4]:
train_df = pd.read_csv(train_set_path, index_col=0)
train_df.columns = ['Author', 'bc', 'cc', 'chibs', 'dc', 'ec', 'hm', 'is', 'lr', 'price']
train_df['price_category'] = train_df.price.map(ma)
print(len(train_df))
In [5]:
train_df.head()
Out[5]:
In [6]:
test_df = pd.read_csv(test_set_path, index_col=0)
test_df.columns = ['Author', 'bc', 'cc', 'chibs', 'dc', 'ec', 'hm', 'is', 'lr', 'price']
test_df['price_category'] = train_df.price.map(ma)
print(len(test_df))
In [7]:
test_df.head()
Out[7]:
In [8]:
from sklearn import preprocessing
In [9]:
train_set_X = preprocessing.scale(train_df.loc[:, ['bc', 'cc', 'chibs', 'dc', 'ec', 'hm', 'is', 'lr']])
train_set_Y = train_df.loc[:, ['price_category']].values.ravel()
In [10]:
test_set_X = preprocessing.scale(test_df.loc[:, ['bc', 'cc', 'chibs', 'dc', 'ec', 'hm', 'is', 'lr']])
test_set_Y = test_df.loc[:, ['price_category']].values.ravel()
In [11]:
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
In [12]:
n_estimators = 8
In [13]:
svc1 = OneVsRestClassifier(BaggingClassifier(SVC(kernel='linear'), max_samples=1.0/50, n_estimators=n_estimators))
svc1.fit(train_set_X, train_set_Y)
y_pred_1 = svc1.predict(test_set_X)
print(confusion_matrix(test_set_Y, y_pred_1))
In [ ]:
In [14]:
svc2 = OneVsRestClassifier(BaggingClassifier(SVC(kernel='poly', degree=4), max_samples=1.0/50, n_estimators=n_estimators))
svc2.fit(train_set_X, train_set_Y)
y_pred_2 = svc2.predict(test_set_X)
print(confusion_matrix(test_set_Y, y_pred_2))
In [ ]:
In [15]:
svc4 = OneVsRestClassifier(BaggingClassifier(SVC(kernel='rbf'), max_samples=1.0/50, n_estimators=n_estimators))
svc4.fit(train_set_X, train_set_Y)
y_pred_4 = svc4.predict(test_set_X)
print(confusion_matrix(test_set_Y, y_pred_4))
In [ ]:
In [16]:
svc5 = OneVsRestClassifier(BaggingClassifier(SVC(kernel='sigmoid'), max_samples=1.0/50, n_estimators=n_estimators))
svc5.fit(train_set_X, train_set_Y)
y_pred_5 = svc5.predict(test_set_X)
print(confusion_matrix(test_set_Y, y_pred_5))
In [ ]:
In [17]:
rfc = RandomForestClassifier(min_samples_leaf=20)
rfc.fit(train_set_X, train_set_Y)
y_pred_6 = rfc.predict(test_set_X)
print(confusion_matrix(test_set_Y, y_pred_6))
In [ ]:
In [ ]: