In [2]:
import numpy as np
import pandas as pd
%matplotlib inline
In [3]:
df = pd.read_csv('simple_reg_15_feat_sample.csv')
df = df.drop(df.columns[[0]], axis=1)
df = df.reset_index(drop=True)
print('data-shape:', df.shape)
df.head()
Out[3]:
In [3]:
df.iloc[1:100][['y_plus30', 'y_now']].plot(grid=True, figsize=(12, 8), title='Sample of y_now and y_plus_30');
In [4]:
X = df.drop(['y_plus30', 'y_now'], axis=1)
y = df['y_plus30']
y_real = df['y_now']
X.shape
Out[4]:
In [5]:
y.plot(grid=True, figsize=(12, 8));
In [6]:
X_cv = X.iloc[-500:]
y_cv = y.iloc[-500:].as_matrix()
y_real_cv = y_real.iloc[-500:].as_matrix()
X = X.iloc[:-500]
y = y.iloc[:-500]
In [21]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.neural_network import MLPRegressor
#poly = PolynomialFeatures(degree=2)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)
#X_train = poly.fit_transform(X_train)
#X_test = poly.fit_transform(X_test)
model = linear_model.LinearRegression()
model.fit(X_train, y_train)
y_hat = model.predict(X_test)
# Measure
mae = mean_absolute_error(y_test, y_hat)
mse = mean_squared_error(y_test, y_hat)
r2 = r2_score(y_test, y_hat)
print('Variance score:', r2)
print('mae:', mae)
print('mse:', mse)
In [22]:
y_test_0 = y_test.reset_index(drop=True)
#print(y_test['y_plus30'])
Y_test_df = pd.DataFrame({'y_test': y_test_0 , 'y_pred_test':y_hat})
Y_test_df.iloc[-50:,].head()
Out[22]:
In [23]:
Y_test_df = pd.DataFrame({'y_test': y_test, 'y_pred_test':y_hat})
Y_test_df.head()
Out[23]:
In [24]:
Y_test_df.iloc[1000:1100].plot(figsize=(13, 10), grid=True);
In [25]:
X_cv = X_cv.reset_index(drop=True)
y_cv_hat = model.predict(X_cv)
In [26]:
Y_cv_df_out = pd.DataFrame({'y_cv_pred': y_cv_hat, 'y_cv':y_cv, 'y_real': y_real_cv})
Y_cv_df_out.head()
#Y_cv_df_out = Y_cv_df.reset_index(drop=True)
Out[26]:
In [27]:
Y_cv_df_out.iloc[0:100].plot(figsize=(13, 10), grid=True)
Out[27]: