logistic-regression-sberbank-stock-price-prediction-02



In [62]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.utils import shuffle

from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [33]:
data = pd.read_csv('SBERP_prepared.csv').dropna()

In [34]:
data.head()


Out[34]:
DATE TIME1 OPEN1 HIGH1 LOW1 CLOSE1 VOL1 TIME2 OPEN2 HIGH2 ... HIGH8 LOW8 CLOSE8 VOL8 TIME9 OPEN9 HIGH9 LOW9 CLOSE9 VOL9
0 20120301 110000 75.38 75.45 74.91 74.95 2334800 120000 74.95 75.21 ... 75.30 75.12 75.16 754600 190000.0 75.14 75.61 75.12 75.58 2269200.0
1 20120302 110000 76.01 76.35 75.79 76.13 3527600 120000 76.14 76.30 ... 76.72 76.03 76.72 4305900 190000.0 76.72 77.48 76.68 77.09 6021500.0
2 20120305 110000 77.09 78.59 77.09 78.39 5464300 120000 78.38 78.40 ... 78.11 77.72 78.10 1385100 190000.0 78.10 78.11 77.53 77.59 2659900.0
3 20120306 110000 77.15 77.20 76.71 77.03 2307800 120000 77.03 77.11 ... 75.83 75.03 75.07 1457600 190000.0 75.07 75.11 73.80 73.81 5285200.0
4 20120307 110000 74.00 74.20 73.23 73.86 3703100 120000 73.86 74.37 ... 74.56 73.90 74.40 2827700 190000.0 74.40 74.78 74.21 74.53 5004100.0

5 rows × 55 columns


In [35]:
data['TARGET'] = data['CLOSE9'] > data['OPEN9']
data = data.drop(["DATE", "CLOSE9", "LOW9", "HIGH9", "VOL9",
                 "TIME1", "TIME2", "TIME3", "TIME4", "TIME5", "TIME6", "TIME7","TIME8","TIME9",
                 "VOL1", "VOL2", "VOL3", "VOL4", "VOL5", "VOL6", "VOL7","VOL8","VOL9"
                 ], axis=1)

In [36]:
data['LOW_MA3'] = data['LOW1'].rolling(3).mean()
data['LOW_MA6'] = data['LOW1'].rolling(6).mean()
data['LOW_MA7'] = data['LOW1'].rolling(7).mean()
data['LOW_MA12'] = data['LOW1'].rolling(12).mean()
data['LOW_MA16'] = data['LOW1'].rolling(16).mean()
data['LOW_MA24'] = data['LOW1'].rolling(24).mean()
data['LOW_MA36'] = data['LOW1'].rolling(36).mean()
data['LOW_MA48'] = data['LOW1'].rolling(48).mean()
data['LOW_MA72'] = data['LOW1'].rolling(72).mean()
data['LOW_MA120'] = data['LOW1'].rolling(120).mean()
data['LOW_MA240'] = data['LOW1'].rolling(240).mean()
data['LOW_MA3'] = data['LOW2'].rolling(3).mean()
data['LOW_MA6'] = data['LOW2'].rolling(6).mean()
data['LOW_MA7'] = data['LOW2'].rolling(7).mean()
data['LOW_MA12'] = data['LOW2'].rolling(12).mean()
data['LOW_MA16'] = data['LOW2'].rolling(16).mean()
data['LOW_MA24'] = data['LOW2'].rolling(24).mean()
data['LOW_MA36'] = data['LOW2'].rolling(36).mean()
data['LOW_MA48'] = data['LOW2'].rolling(48).mean()
data['LOW_MA72'] = data['LOW2'].rolling(72).mean()
data['LOW_MA120'] = data['LOW2'].rolling(120).mean()
data['LOW_MA240'] = data['LOW2'].rolling(240).mean()
data['LOW_MA3'] = data['LOW3'].rolling(3).mean()
data['LOW_MA6'] = data['LOW3'].rolling(6).mean()
data['LOW_MA7'] = data['LOW3'].rolling(7).mean()
data['LOW_MA12'] = data['LOW3'].rolling(12).mean()
data['LOW_MA16'] = data['LOW3'].rolling(16).mean()
data['LOW_MA24'] = data['LOW3'].rolling(24).mean()
data['LOW_MA36'] = data['LOW3'].rolling(36).mean()
data['LOW_MA48'] = data['LOW3'].rolling(48).mean()
data['LOW_MA72'] = data['LOW3'].rolling(72).mean()
data['LOW_MA120'] = data['LOW3'].rolling(120).mean()
data['LOW_MA240'] = data['LOW3'].rolling(240).mean()
data['LOW_MA3'] = data['LOW4'].rolling(3).mean()
data['LOW_MA6'] = data['LOW4'].rolling(6).mean()
data['LOW_MA7'] = data['LOW4'].rolling(7).mean()
data['LOW_MA12'] = data['LOW4'].rolling(12).mean()
data['LOW_MA16'] = data['LOW4'].rolling(16).mean()
data['LOW_MA24'] = data['LOW4'].rolling(24).mean()
data['LOW_MA36'] = data['LOW4'].rolling(36).mean()
data['LOW_MA48'] = data['LOW4'].rolling(48).mean()
data['LOW_MA72'] = data['LOW4'].rolling(72).mean()
data['LOW_MA120'] = data['LOW4'].rolling(120).mean()
data['LOW_MA240'] = data['LOW4'].rolling(240).mean()
data['LOW_MA3'] = data['LOW5'].rolling(3).mean()
data['LOW_MA6'] = data['LOW5'].rolling(6).mean()
data['LOW_MA7'] = data['LOW5'].rolling(7).mean()
data['LOW_MA12'] = data['LOW5'].rolling(12).mean()
data['LOW_MA16'] = data['LOW5'].rolling(16).mean()
data['LOW_MA24'] = data['LOW5'].rolling(24).mean()
data['LOW_MA36'] = data['LOW5'].rolling(36).mean()
data['LOW_MA48'] = data['LOW5'].rolling(48).mean()
data['LOW_MA72'] = data['LOW5'].rolling(72).mean()
data['LOW_MA120'] = data['LOW5'].rolling(120).mean()
data['LOW_MA240'] = data['LOW5'].rolling(240).mean()
data['LOW_MA3'] = data['LOW6'].rolling(3).mean()
data['LOW_MA6'] = data['LOW6'].rolling(6).mean()
data['LOW_MA7'] = data['LOW6'].rolling(7).mean()
data['LOW_MA12'] = data['LOW6'].rolling(12).mean()
data['LOW_MA16'] = data['LOW6'].rolling(16).mean()
data['LOW_MA24'] = data['LOW6'].rolling(24).mean()
data['LOW_MA36'] = data['LOW6'].rolling(36).mean()
data['LOW_MA48'] = data['LOW6'].rolling(48).mean()
data['LOW_MA72'] = data['LOW6'].rolling(72).mean()
data['LOW_MA120'] = data['LOW6'].rolling(120).mean()
data['LOW_MA240'] = data['LOW6'].rolling(240).mean()
data['LOW_MA3'] = data['LOW7'].rolling(3).mean()
data['LOW_MA6'] = data['LOW7'].rolling(6).mean()
data['LOW_MA7'] = data['LOW7'].rolling(7).mean()
data['LOW_MA12'] = data['LOW7'].rolling(12).mean()
data['LOW_MA16'] = data['LOW7'].rolling(16).mean()
data['LOW_MA24'] = data['LOW7'].rolling(24).mean()
data['LOW_MA36'] = data['LOW7'].rolling(36).mean()
data['LOW_MA48'] = data['LOW7'].rolling(48).mean()
data['LOW_MA72'] = data['LOW7'].rolling(72).mean()
data['LOW_MA120'] = data['LOW7'].rolling(120).mean()
data['LOW_MA240'] = data['LOW7'].rolling(240).mean()
data['LOW_MA3'] = data['LOW8'].rolling(3).mean()
data['LOW_MA6'] = data['LOW8'].rolling(6).mean()
data['LOW_MA7'] = data['LOW8'].rolling(7).mean()
data['LOW_MA12'] = data['LOW8'].rolling(12).mean()
data['LOW_MA16'] = data['LOW8'].rolling(16).mean()
data['LOW_MA24'] = data['LOW8'].rolling(24).mean()
data['LOW_MA36'] = data['LOW8'].rolling(36).mean()
data['LOW_MA48'] = data['LOW8'].rolling(48).mean()
data['LOW_MA72'] = data['LOW8'].rolling(72).mean()
data['LOW_MA120'] = data['LOW8'].rolling(120).mean()
data['LOW_MA240'] = data['LOW8'].rolling(240).mean()

data['OPEN_MA3'] = data['OPEN1'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN1'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN1'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN1'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN1'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN1'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN1'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN1'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN1'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN1'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN1'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN2'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN2'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN2'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN2'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN2'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN2'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN2'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN2'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN2'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN2'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN2'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN3'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN3'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN3'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN3'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN3'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN3'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN3'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN3'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN3'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN3'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN3'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN4'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN4'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN4'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN4'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN4'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN4'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN4'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN4'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN4'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN4'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN4'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN5'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN5'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN5'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN5'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN5'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN5'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN5'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN5'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN5'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN5'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN5'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN6'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN6'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN6'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN6'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN6'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN6'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN6'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN6'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN6'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN6'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN6'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN7'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN7'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN7'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN7'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN7'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN7'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN7'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN7'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN7'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN7'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN7'].rolling(240).mean()
data['OPEN_MA3'] = data['OPEN8'].rolling(3).mean()
data['OPEN_MA6'] = data['OPEN8'].rolling(6).mean()
data['OPEN_MA7'] = data['OPEN8'].rolling(7).mean()
data['OPEN_MA12'] = data['OPEN8'].rolling(12).mean()
data['OPEN_MA16'] = data['OPEN8'].rolling(16).mean()
data['OPEN_MA24'] = data['OPEN8'].rolling(24).mean()
data['OPEN_MA36'] = data['OPEN8'].rolling(36).mean()
data['OPEN_MA48'] = data['OPEN8'].rolling(48).mean()
data['OPEN_MA72'] = data['OPEN8'].rolling(72).mean()
data['OPEN_MA120'] = data['OPEN8'].rolling(120).mean()
data['OPEN_MA240'] = data['OPEN8'].rolling(240).mean()


data['CLOSE_MA3'] = data['CLOSE1'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE1'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE1'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE1'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE1'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE1'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE1'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE1'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE1'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE1'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE1'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE2'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE2'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE2'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE2'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE2'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE2'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE2'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE2'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE2'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE2'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE2'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE3'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE3'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE3'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE3'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE3'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE3'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE3'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE3'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE3'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE3'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE3'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE4'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE4'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE4'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE4'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE4'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE4'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE4'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE4'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE4'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE4'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE4'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE5'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE5'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE5'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE5'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE5'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE5'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE5'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE5'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE5'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE5'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE5'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE6'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE6'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE6'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE6'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE6'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE6'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE6'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE6'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE6'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE6'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE6'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE7'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE7'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE7'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE7'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE7'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE7'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE7'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE7'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE7'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE7'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE7'].rolling(240).mean()
data['CLOSE_MA3'] = data['CLOSE8'].rolling(3).mean()
data['CLOSE_MA6'] = data['CLOSE8'].rolling(6).mean()
data['CLOSE_MA7'] = data['CLOSE8'].rolling(7).mean()
data['CLOSE_MA12'] = data['CLOSE8'].rolling(12).mean()
data['CLOSE_MA16'] = data['CLOSE8'].rolling(16).mean()
data['CLOSE_MA24'] = data['CLOSE8'].rolling(24).mean()
data['CLOSE_MA36'] = data['CLOSE8'].rolling(36).mean()
data['CLOSE_MA48'] = data['CLOSE8'].rolling(48).mean()
data['CLOSE_MA72'] = data['CLOSE8'].rolling(72).mean()
data['CLOSE_MA120'] = data['CLOSE8'].rolling(120).mean()
data['CLOSE_MA240'] = data['CLOSE8'].rolling(240).mean()

data['HIGH_MA3'] = data['HIGH1'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH1'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH1'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH1'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH1'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH1'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH1'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH1'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH1'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH1'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH1'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH2'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH2'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH2'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH2'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH2'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH2'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH2'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH2'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH2'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH2'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH2'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH3'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH3'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH3'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH3'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH3'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH3'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH3'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH3'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH3'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH3'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH3'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH4'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH4'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH4'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH4'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH4'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH4'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH4'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH4'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH4'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH4'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH4'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH5'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH5'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH5'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH5'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH5'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH5'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH5'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH5'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH5'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH5'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH5'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH6'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH6'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH6'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH6'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH6'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH6'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH6'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH6'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH6'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH6'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH6'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH7'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH7'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH7'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH7'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH7'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH7'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH7'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH7'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH7'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH7'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH7'].rolling(240).mean()
data['HIGH_MA3'] = data['HIGH8'].rolling(3).mean()
data['HIGH_MA6'] = data['HIGH8'].rolling(6).mean()
data['HIGH_MA7'] = data['HIGH8'].rolling(7).mean()
data['HIGH_MA12'] = data['HIGH8'].rolling(12).mean()
data['HIGH_MA16'] = data['HIGH8'].rolling(16).mean()
data['HIGH_MA24'] = data['HIGH8'].rolling(24).mean()
data['HIGH_MA36'] = data['HIGH8'].rolling(36).mean()
data['HIGH_MA48'] = data['HIGH8'].rolling(48).mean()
data['HIGH_MA72'] = data['HIGH8'].rolling(72).mean()
data['HIGH_MA120'] = data['HIGH8'].rolling(120).mean()
data['HIGH_MA240'] = data['HIGH8'].rolling(240).mean()

In [37]:
data.head(10)


Out[37]:
OPEN1 HIGH1 LOW1 CLOSE1 OPEN2 HIGH2 LOW2 CLOSE2 OPEN3 HIGH3 ... HIGH_MA6 HIGH_MA7 HIGH_MA12 HIGH_MA16 HIGH_MA24 HIGH_MA36 HIGH_MA48 HIGH_MA72 HIGH_MA120 HIGH_MA240
0 75.38 75.45 74.91 74.95 74.95 75.21 74.83 74.90 74.92 75.29 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 76.01 76.35 75.79 76.13 76.14 76.30 75.95 76.22 76.22 76.41 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 77.09 78.59 77.09 78.39 78.38 78.40 77.49 78.00 77.99 78.06 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 77.15 77.20 76.71 77.03 77.03 77.11 76.70 76.72 76.71 76.98 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 74.00 74.20 73.23 73.86 73.86 74.37 73.86 74.15 74.11 74.57 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
5 76.00 76.19 75.81 75.93 75.89 76.04 75.88 75.93 75.93 75.94 ... 76.076667 NaN NaN NaN NaN NaN NaN NaN NaN NaN
6 75.78 75.79 75.30 75.78 75.79 76.05 75.68 76.01 76.01 77.25 ... 76.391667 76.235714 NaN NaN NaN NaN NaN NaN NaN NaN
7 77.39 77.92 77.33 77.61 77.62 77.88 77.55 77.75 77.74 77.85 ... 76.745000 76.741429 NaN NaN NaN NaN NaN NaN NaN NaN
8 80.05 80.60 80.00 80.25 80.25 80.30 79.90 80.06 80.06 80.70 ... 77.591667 77.665714 NaN NaN NaN NaN NaN NaN NaN NaN
9 83.37 83.38 81.92 82.40 82.40 82.56 81.93 82.19 82.19 82.20 ... 78.603333 78.207143 NaN NaN NaN NaN NaN NaN NaN NaN

10 rows × 78 columns


In [38]:
data = data[241:]

Задача на регрессию:

Будет ли цена закрытия больше цены открытия ?


In [53]:
data = shuffle(data)

In [54]:
train_data = data[:1000]
test_data = data[1000:]

In [55]:
train_features = train_data.drop(["TARGET"], axis=1)
train_target = train_data["TARGET"]

test_features = test_data.drop(["TARGET"], axis=1)
test_target = test_data["TARGET"]

In [56]:
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)

In [67]:
acs = []
pss = []
rss = []
f1s = []
ras = []
result = []

for i in range(1, 1000, 1):    
    train_features_scaled = shuffle(train_features_scaled)
    lr = LogisticRegression()
    lr.fit(train_features_scaled, train_target)

    test_features_scaled = shuffle(test_features_scaled)
    predictions = lr.predict(test_features_scaled)

    acs.append(accuracy_score(test_target, predictions))
    pss.append(precision_score(test_target, predictions))
    rss.append(recall_score(test_target, predictions))
    f1s.append(f1_score(test_target, predictions))
    ras.append(roc_auc_score(test_target, predictions))
    
#     res = pd.DataFrame(test_data["CLOSE9"].tolist(), columns = ["ACTUAL_CLOSE"])
#     res["OPEN"] = pd.Series(test_features["OPEN9"].tolist())
#     res["PREDICTED_CLOSE"] = pd.Series(predictions)
#     res["RESULT"] = np.where(res['PREDICTED_CLOSE'] == True, res['ACTUAL_CLOSE'] - res['OPEN'], 0)
#     result.append(np.sum(res['RESULT']))

print("Доля правильных ответов:\t{}\nТочность\t\t\t{}\nПолнота\t\t\t{}\nF1 score\t\t{}\nROC\t\t\t{}\nResult:\t\t\t{}"
      .format(np.mean(acs), np.mean(pss), np.mean(rss), np.mean(f1s), np.mean(ras), np.mean(result)))


Доля правильных ответов:	0.489341943099
Точность			0.485677112018
Полнота			0.875035750036
F1 score		0.623967364842
ROC			0.5001760388
Result:			nan

In [68]:
h = .02  # step size in the mesh
names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes", "QDA"]
classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]
i = 1
# iterate over datasets
X, y = train_features, train_target
X = StandardScaler().fit_transform(X)

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=.4, random_state=42)

# iterate over classifiers
for name, clf in zip(names, classifiers):
    clf.fit(X_train, y_train)
    predictions = clf.predict(X_test)

    print("======{}====\nДоля правильных ответов:\t{}\nТочность\t\t\t{}\nПолнота\t\t\t{}\nF1 score\t\t{}\nROC\t\t\t{}\nResult:\t\t\t{}"
      .format(
          name,
          accuracy_score(y_test, predictions),
          precision_score(y_test, predictions),
          recall_score(y_test, predictions),
          f1_score(y_test, predictions),
          roc_auc_score(y_test, predictions),
          0))


======Nearest Neighbors====
Доля правильных ответов:	0.49
Точность			0.522935779817
Полнота			0.532710280374
F1 score		0.527777777778
ROC			0.486785247714
Result:			0
======Linear SVM====
Доля правильных ответов:	0.5
Точность			0.522292993631
Полнота			0.766355140187
F1 score		0.621212121212
ROC			0.479951763642
Result:			0
======RBF SVM====
Доля правильных ответов:	0.495
Точность			0.526086956522
Полнота			0.565420560748
F1 score		0.545045045045
ROC			0.489699527686
Result:			0
======Gaussian Process====
Доля правильных ответов:	0.525
Точность			0.548780487805
Полнота			0.630841121495
F1 score		0.586956521739
ROC			0.517033463973
Result:			0
======Decision Tree====
Доля правильных ответов:	0.545
Точность			0.547337278107
Полнота			0.864485981308
F1 score		0.670289855072
ROC			0.520952668074
Result:			0
======Random Forest====
Доля правильных ответов:	0.5125
Точность			0.541125541126
Полнота			0.584112149533
F1 score		0.561797752809
ROC			0.507109838207
Result:			0
======Neural Net====
Доля правильных ответов:	0.51
Точность			0.546875
Полнота			0.490654205607
F1 score		0.51724137931
ROC			0.511456135062
Result:			0
======AdaBoost====
Доля правильных ответов:	0.5175
Точность			0.54356846473
Полнота			0.61214953271
F1 score		0.575824175824
ROC			0.510375841624
Result:			0
======Naive Bayes====
Доля правильных ответов:	0.4875
Точность			0.516245487365
Полнота			0.668224299065
F1 score		0.582484725051
ROC			0.473897095769
Result:			0
======QDA====
Доля правильных ответов:	0.505
Точность			0.545454545455
Полнота			0.448598130841
F1 score		0.492307692308
ROC			0.50924530198
Result:			0