In [62]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.utils import shuffle
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
In [33]:
data = pd.read_csv('SBERP_prepared.csv').dropna()
In [34]:
data.head()
Out[34]:
In [35]:
data['TARGET'] = data['CLOSE9'] > data['OPEN9']
data = data.drop(["DATE", "CLOSE9", "LOW9", "HIGH9", "VOL9",
"TIME1", "TIME2", "TIME3", "TIME4", "TIME5", "TIME6", "TIME7","TIME8","TIME9",
"VOL1", "VOL2", "VOL3", "VOL4", "VOL5", "VOL6", "VOL7","VOL8","VOL9"
], axis=1)
In [36]:
data['LOW_MA3'] = data['LOW1'].rolling(3).mean()
data['LOW_MA6'] = data['LOW1'].rolling(6).mean()
data['LOW_MA7'] = data['LOW1'].rolling(7).mean()
data['LOW_MA12'] = data['LOW1'].rolling(12).mean()
data['LOW_MA16'] = data['LOW1'].rolling(16).mean()
data['LOW_MA24'] = data['LOW1'].rolling(24).mean()
data['LOW_MA36'] = data['LOW1'].rolling(36).mean()
data['LOW_MA48'] = data['LOW1'].rolling(48).mean()
data['LOW_MA72'] = data['LOW1'].rolling(72).mean()
data['LOW_MA120'] = data['LOW1'].rolling(120).mean()
data['LOW_MA240'] = data['LOW1'].rolling(240).mean()
data['LOW_MA3'] = data['LOW2'].rolling(3).mean()
data['LOW_MA6'] = data['LOW2'].rolling(6).mean()
data['LOW_MA7'] = data['LOW2'].rolling(7).mean()
data['LOW_MA12'] = data['LOW2'].rolling(12).mean()
data['LOW_MA16'] = data['LOW2'].rolling(16).mean()
data['LOW_MA24'] = data['LOW2'].rolling(24).mean()
data['LOW_MA36'] = data['LOW2'].rolling(36).mean()
data['LOW_MA48'] = data['LOW2'].rolling(48).mean()
data['LOW_MA72'] = data['LOW2'].rolling(72).mean()
data['LOW_MA120'] = data['LOW2'].rolling(120).mean()
data['LOW_MA240'] = data['LOW2'].rolling(240).mean()
data['LOW_MA3'] = data['LOW3'].rolling(3).mean()
data['LOW_MA6'] = data['LOW3'].rolling(6).mean()
data['LOW_MA7'] = data['LOW3'].rolling(7).mean()
data['LOW_MA12'] = data['LOW3'].rolling(12).mean()
data['LOW_MA16'] = data['LOW3'].rolling(16).mean()
data['LOW_MA24'] = data['LOW3'].rolling(24).mean()
data['LOW_MA36'] = data['LOW3'].rolling(36).mean()
data['LOW_MA48'] = data['LOW3'].rolling(48).mean()
data['LOW_MA72'] = data['LOW3'].rolling(72).mean()
data['LOW_MA120'] = data['LOW3'].rolling(120).mean()
data['LOW_MA240'] = data['LOW3'].rolling(240).mean()
data['LOW_MA3'] = data['LOW4'].rolling(3).mean()
data['LOW_MA6'] = data['LOW4'].rolling(6).mean()
data['LOW_MA7'] = data['LOW4'].rolling(7).mean()
data['LOW_MA12'] = data['LOW4'].rolling(12).mean()
data['LOW_MA16'] = data['LOW4'].rolling(16).mean()
data['LOW_MA24'] = data['LOW4'].rolling(24).mean()
data['LOW_MA36'] = data['LOW4'].rolling(36).mean()
data['LOW_MA48'] = data['LOW4'].rolling(48).mean()
data['LOW_MA72'] = data['LOW4'].rolling(72).mean()
data['LOW_MA120'] = data['LOW4'].rolling(120).mean()
data['LOW_MA240'] = data['LOW4'].rolling(240).mean()
data['LOW_MA3'] = data['LOW5'].rolling(3).mean()
data['LOW_MA6'] = data['LOW5'].rolling(6).mean()
data['LOW_MA7'] = data['LOW5'].rolling(7).mean()
data['LOW_MA12'] = data['LOW5'].rolling(12).mean()
data['LOW_MA16'] = data['LOW5'].rolling(16).mean()
data['LOW_MA24'] = data['LOW5'].rolling(24).mean()
data['LOW_MA36'] = data['LOW5'].rolling(36).mean()
data['LOW_MA48'] = data['LOW5'].rolling(48).mean()
data['LOW_MA72'] = data['LOW5'].rolling(72).mean()
data['LOW_MA120'] = data['LOW5'].rolling(120).mean()
data['LOW_MA240'] = data['LOW5'].rolling(240).mean()
data['LOW_MA3'] = data['LOW6'].rolling(3).mean()
data['LOW_MA6'] = data['LOW6'].rolling(6).mean()
data['LOW_MA7'] = data['LOW6'].rolling(7).mean()
data['LOW_MA12'] = data['LOW6'].rolling(12).mean()
data['LOW_MA16'] = data['LOW6'].rolling(16).mean()
data['LOW_MA24'] = data['LOW6'].rolling(24).mean()
data['LOW_MA36'] = data['LOW6'].rolling(36).mean()
data['LOW_MA48'] = data['LOW6'].rolling(48).mean()
data['LOW_MA72'] = data['LOW6'].rolling(72).mean()
data['LOW_MA120'] = data['LOW6'].rolling(120).mean()
data['LOW_MA240'] = data['LOW6'].rolling(240).mean()
data['LOW_MA3'] = data['LOW7'].rolling(3).mean()
data['LOW_MA6'] = data['LOW7'].rolling(6).mean()
data['LOW_MA7'] = data['LOW7'].rolling(7).mean()
data['LOW_MA12'] = data['LOW7'].rolling(12).mean()
data['LOW_MA16'] = data['LOW7'].rolling(16).mean()
data['LOW_MA24'] = data['LOW7'].rolling(24).mean()
data['LOW_MA36'] = data['LOW7'].rolling(36).mean()
data['LOW_MA48'] = data['LOW7'].rolling(48).mean()
data['LOW_MA72'] = data['LOW7'].rolling(72).mean()
data['LOW_MA120'] = data['LOW7'].rolling(120).mean()
data['LOW_MA240'] = data['LOW7'].rolling(240).mean()
data['LOW_MA3'] = data['LOW8'].rolling(3).mean()
data['LOW_MA6'] = data['LOW8'].rolling(6).mean()
data['LOW_MA7'] = data['LOW8'].rolling(7).mean()
data['LOW_MA12'] = data['LOW8'].rolling(12).mean()
data['LOW_MA16'] = data['LOW8'].rolling(16).mean()
data['LOW_MA24'] = data['LOW8'].rolling(24).mean()
data['LOW_MA36'] = data['LOW8'].rolling(36).mean()
data['LOW_MA48'] = data['LOW8'].rolling(48).mean()
data['LOW_MA72'] = data['LOW8'].rolling(72).mean()
data['LOW_MA120'] = data['LOW8'].rolling(120).mean()
data['LOW_MA240'] = data['LOW8'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN1'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN1'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN1'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN1'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN1'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN1'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN1'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN1'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN1'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN1'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN1'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN2'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN2'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN2'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN2'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN2'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN2'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN2'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN2'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN2'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN2'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN2'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN3'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN3'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN3'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN3'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN3'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN3'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN3'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN3'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN3'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN3'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN3'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN4'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN4'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN4'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN4'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN4'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN4'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN4'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN4'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN4'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN4'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN4'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN5'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN5'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN5'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN5'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN5'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN5'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN5'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN5'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN5'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN5'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN5'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN6'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN6'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN6'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN6'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN6'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN6'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN6'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN6'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN6'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN6'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN6'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN7'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN7'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN7'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN7'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN7'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN7'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN7'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN7'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN7'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN7'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN7'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN8'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN8'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN8'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN8'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN8'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN8'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN8'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN8'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN8'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN8'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN8'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE1'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE1'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE1'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE1'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE1'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE1'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE1'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE1'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE1'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE1'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE1'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE2'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE2'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE2'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE2'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE2'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE2'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE2'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE2'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE2'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE2'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE2'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE3'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE3'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE3'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE3'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE3'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE3'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE3'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE3'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE3'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE3'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE3'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE4'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE4'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE4'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE4'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE4'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE4'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE4'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE4'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE4'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE4'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE4'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE5'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE5'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE5'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE5'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE5'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE5'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE5'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE5'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE5'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE5'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE5'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE6'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE6'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE6'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE6'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE6'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE6'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE6'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE6'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE6'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE6'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE6'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE7'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE7'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE7'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE7'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE7'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE7'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE7'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE7'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE7'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE7'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE7'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE8'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE8'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE8'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE8'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE8'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE8'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE8'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE8'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE8'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE8'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE8'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH1'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH1'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH1'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH1'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH1'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH1'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH1'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH1'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH1'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH1'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH1'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH2'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH2'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH2'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH2'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH2'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH2'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH2'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH2'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH2'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH2'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH2'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH3'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH3'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH3'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH3'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH3'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH3'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH3'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH3'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH3'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH3'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH3'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH4'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH4'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH4'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH4'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH4'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH4'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH4'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH4'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH4'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH4'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH4'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH5'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH5'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH5'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH5'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH5'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH5'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH5'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH5'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH5'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH5'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH5'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH6'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH6'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH6'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH6'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH6'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH6'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH6'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH6'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH6'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH6'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH6'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH7'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH7'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH7'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH7'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH7'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH7'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH7'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH7'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH7'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH7'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH7'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH8'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH8'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH8'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH8'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH8'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH8'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH8'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH8'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH8'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH8'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH8'].rolling(240).mean()
In [37]:
data.head(10)
Out[37]:
In [38]:
data = data[241:]
In [53]:
data = shuffle(data)
In [54]:
train_data = data[:1000]
test_data = data[1000:]
In [55]:
train_features = train_data.drop(["TARGET"], axis=1)
train_target = train_data["TARGET"]
test_features = test_data.drop(["TARGET"], axis=1)
test_target = test_data["TARGET"]
In [56]:
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)
In [67]:
acs = []
pss = []
rss = []
f1s = []
ras = []
result = []
for i in range(1, 1000, 1):
train_features_scaled = shuffle(train_features_scaled)
lr = LogisticRegression()
lr.fit(train_features_scaled, train_target)
test_features_scaled = shuffle(test_features_scaled)
predictions = lr.predict(test_features_scaled)
acs.append(accuracy_score(test_target, predictions))
pss.append(precision_score(test_target, predictions))
rss.append(recall_score(test_target, predictions))
f1s.append(f1_score(test_target, predictions))
ras.append(roc_auc_score(test_target, predictions))
# res = pd.DataFrame(test_data["CLOSE9"].tolist(), columns = ["ACTUAL_CLOSE"])
# res["OPEN"] = pd.Series(test_features["OPEN9"].tolist())
# res["PREDICTED_CLOSE"] = pd.Series(predictions)
# res["RESULT"] = np.where(res['PREDICTED_CLOSE'] == True, res['ACTUAL_CLOSE'] - res['OPEN'], 0)
# result.append(np.sum(res['RESULT']))
print("Доля правильных ответов:\t{}\nТочность\t\t\t{}\nПолнота\t\t\t{}\nF1 score\t\t{}\nROC\t\t\t{}\nResult:\t\t\t{}"
.format(np.mean(acs), np.mean(pss), np.mean(rss), np.mean(f1s), np.mean(ras), np.mean(result)))
In [68]:
h = .02 # step size in the mesh
names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
"Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
"Naive Bayes", "QDA"]
classifiers = [
KNeighborsClassifier(3),
SVC(kernel="linear", C=0.025),
SVC(gamma=2, C=1),
GaussianProcessClassifier(1.0 * RBF(1.0)),
DecisionTreeClassifier(max_depth=5),
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
MLPClassifier(alpha=1),
AdaBoostClassifier(),
GaussianNB(),
QuadraticDiscriminantAnalysis()]
i = 1
# iterate over datasets
X, y = train_features, train_target
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=.4, random_state=42)
# iterate over classifiers
for name, clf in zip(names, classifiers):
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print("======{}====\nДоля правильных ответов:\t{}\nТочность\t\t\t{}\nПолнота\t\t\t{}\nF1 score\t\t{}\nROC\t\t\t{}\nResult:\t\t\t{}"
.format(
name,
accuracy_score(y_test, predictions),
precision_score(y_test, predictions),
recall_score(y_test, predictions),
f1_score(y_test, predictions),
roc_auc_score(y_test, predictions),
0))