In [61]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
%matplotlib inline
In [108]:
df = pd.read_csv("/Users/abulbasar/workspace/python/machine-learning/data/istanbul-stock.csv")
df = df.set_index("date")
df.head()
Out[108]:
In [109]:
sns.pairplot(df.iloc[:, 1:])
Out[109]:
In [111]:
df.info()
In [112]:
plt.figure(figsize=(10, 5))
df.ISE.plot.line()
Out[112]:
In [12]:
df.columns
Out[12]:
In [115]:
n_train = 525
In [116]:
training = df.iloc[:n_train, :]
training.tail()
Out[116]:
In [140]:
testing = df.iloc[n_train:, :]
testing.head(10)
Out[140]:
In [118]:
X_train = training.iloc[:, 1:]
y_train = training.iloc[:, 0]
In [119]:
X_train.head()
Out[119]:
In [120]:
y_train
Out[120]:
In [121]:
X_train = X_train.values
In [123]:
X_test = testing.iloc[:, 1:].values
y_test = testing.iloc[:, 0].values
In [124]:
lr = LinearRegression()
lr.fit(X_train, y_train)
lr.intercept_, lr.coef_
Out[124]:
In [125]:
lr.score(X_test, y_test)
Out[125]:
In [126]:
y_test_pred = lr.predict(X_test)
In [127]:
pd.DataFrame({"true": y_test, "predict": y_test_pred})
Out[127]:
In [158]:
residuals = y_test_pred - y_test
plt.scatter(range(len(residuals)), residuals)
plt.xticks(range(len(residuals)),
df.iloc[n_train:, ].index, rotation = 90)
plt.tight_layout()
plt.hlines([0], xmin=0, xmax=10, linestyles = "--")
plt.xlabel("Date")
plt.ylabel("Residual")
Out[158]:
In [147]:
residuals
Out[147]:
In [129]:
def rmse(y_true, y_pred):
return np.sqrt(np.mean((y_true - y_pred) ** 2))
In [130]:
rmse(y_test, y_test_pred)
Out[130]:
In [131]:
df.columns
Out[131]:
In [132]:
df.head()
Out[132]:
In [133]:
df.loc[:,["ISE", "EU"]].shift(1).head()
Out[133]:
In [134]:
df.shape
Out[134]:
In [135]:
df_lagged = df.shift(1)
df_lagged.shape
Out[135]:
In [136]:
df_lagged.head()
Out[136]:
In [137]:
df_joined = pd.concat([df, df_lagged], axis=1).iloc[1:, :]
In [138]:
df_joined.head()
Out[138]:
In [143]:
df_joined.shape
Out[143]:
In [142]:
training = df_joined.iloc[:n_train, :]
testing = df_joined.iloc[n_train:, :]
X_train = training.iloc[:, 1:]
y_train = training.iloc[:, 0]
X_test = testing.iloc[:, 1:].values
y_test = testing.iloc[:, 0].values
lr.fit(X_train, y_train)
print("R2", lr.score(X_test, y_test))
y_test_pred = lr.predict(X_test)
rmse(y_test, y_test_pred)
Out[142]:
In [ ]: