In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.utils import shuffle
In [2]:
data = pd.read_csv('SBERP_prepared.csv').dropna()
In [3]:
data.head()
Out[3]:
In [4]:
data = shuffle(data)
In [5]:
data['TARGET'] = data['CLOSE9'] > data['OPEN9']
data.head()
Out[5]:
In [6]:
train_data = data[:1000]
test_data = data[1000:]
In [7]:
train_features = train_data.drop(["DATE", "CLOSE9", "LOW9", "HIGH9", "VOL9",
"TIME1", "TIME2", "TIME3", "TIME4", "TIME5", "TIME6", "TIME7","TIME8","TIME9",
"VOL1", "VOL2", "VOL3", "VOL4", "VOL5", "VOL6", "VOL7","VOL8","VOL9"
], axis=1)
train_target = train_data["TARGET"]
test_features = test_data.drop(["DATE", "CLOSE9", "LOW9", "HIGH9", "VOL9",
"TIME1", "TIME2", "TIME3", "TIME4", "TIME5", "TIME6", "TIME7","TIME8","TIME9",
"VOL1", "VOL2", "VOL3", "VOL4", "VOL5", "VOL6", "VOL7","VOL8","VOL9"
], axis=1)
test_target = test_data["TARGET"]
In [8]:
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
test_features_scaled = scaler.transform(test_features)
In [9]:
acs = []
pss = []
rss = []
f1s = []
ras = []
result = []
for i in range(1, 1000, 1):
train_features_scaled = shuffle(train_features_scaled)
lr = LogisticRegression()
lr.fit(train_features_scaled, train_target)
test_features_scaled = shuffle(test_features_scaled)
predictions = lr.predict(test_features_scaled)
acs.append(accuracy_score(test_target, predictions))
pss.append(precision_score(test_target, predictions))
rss.append(recall_score(test_target, predictions))
f1s.append(f1_score(test_target, predictions))
ras.append(roc_auc_score(test_target, predictions))
res = pd.DataFrame(test_data["CLOSE9"].tolist(), columns = ["ACTUAL_CLOSE"])
res["OPEN"] = pd.Series(test_features["OPEN9"].tolist())
res["PREDICTED_CLOSE"] = pd.Series(predictions)
res["RESULT"] = np.where(res['PREDICTED_CLOSE'] == True, res['ACTUAL_CLOSE'] - res['OPEN'], 0)
result.append(np.sum(res['RESULT']))
print("Доля правильных ответов:\t{}\nТочность\t\t\t{}\nПолнота\t\t\t{}\nF1 score\t\t{}\nROC\t\t\t{}\nResult:\t\t\t{}"
.format(np.mean(acs), np.mean(pss), np.mean(rss), np.mean(f1s), np.mean(ras), np.mean(result)))