In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
from scipy.io import loadmat
In [2]:
cols = ['quantity', 'price', 'interest', 'dipincome']
data = loadmat('./Data/jerryauto1.mat')
df = pd.DataFrame(data['DATA'], columns=cols)
full_df = np.log(df/df.shift(12))
full_df['quantity_1y_fwd'] = full_df['quantity'].shift(-12)
df = full_df.dropna().copy()
In [3]:
df.plot(subplots=True, figsize=(10,10));
In [4]:
df[cols].corr()
Out[4]:
In [5]:
df[cols].describe()[1:3]
Out[5]:
In [6]:
y = df[['quantity_1y_fwd']].values
x = df[cols].values
x = x[:100]
y = y[:100]
In [7]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.advanced_activations import *
from sklearn.metrics import mean_squared_error, r2_score
import keras.backend as Kb
In [8]:
def success_ratio(y_true, y_pred):
#np.mean(np.sign(keras_out_sample.flatten())==np.sign(Y_test))
return Kb.mean(Kb.equal(Kb.sign(y_true), Kb.sign(y_pred)))
In [ ]:
def r_coeff(y_true, y_pred):
rss = Kb.sum(Kb.square(y_true-y_pred))
sst = Kb.sum(Kb.square(y_true-Kb.mean(y_true)))
return 1-(rss/(sst+Kb.epsilon()))
In [ ]:
Y = y
n = 60
X_train = x[:n,:]
Y_train = Y[:n,:]
X_test = x[n:n+12,:]
Y_test = Y[n:n+12,:]
print(X_train.shape)
In [ ]:
model = Sequential()
model.add(Dense(4,
input_dim=X_train.shape[1],
activation='tanh',
)
)
model.add(Dropout(0.2))
#model.add(Dense(12))
#model.add(Dropout(0.2))
model.add(Dense(1))
#model.add(Activation('tanh'))
model.compile(loss='mse', optimizer='rmsprop', metrics=[success_ratio, r_coeff])
In [ ]:
history = model.fit(X_train, Y_train,
epochs=50000,
batch_size=len(X_train),
verbose=0,
validation_data=(X_test, Y_test),
shuffle=True,
)
In [ ]:
plt.figure(figsize=(15, 5))
plt.subplot(131)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best');
plt.subplot(132)
plt.plot(history.history['success_ratio'])
plt.plot(history.history['val_success_ratio'])
plt.title('model success ratio')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best');
plt.subplot(133)
plt.plot(history.history['r_coeff'])
plt.plot(history.history['val_r_coeff'])
plt.title('model R^2')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best');
In [ ]:
# make predictions
keras_in_sample = model.predict(X_train)
keras_out_sample = model.predict(X_test)
In [ ]:
results = pd.DataFrame(y[:60+12,:], columns=['quantity_1yr_fwd'])
results['in_sample'] = np.nan
results['out_sample'] = np.nan
results.loc[:59, 'in_sample'] = keras_in_sample.flatten()
results.loc[60:60+12, 'out_sample'] = keras_out_sample.flatten()
In [ ]:
r2_in = r2_score(Y_train.flatten(), keras_in_sample.flatten())
r2_out = r2_score(Y_test.flatten(), keras_out_sample.flatten())
sr_in = np.mean(np.sign(keras_in_sample)==np.sign(Y_train.flatten()))
sr_out = np.mean(np.sign(keras_out_sample)==np.sign(Y_test.flatten()))
rmse_in = np.sqrt(mean_squared_error(Y_train.flatten(), keras_in_sample.flatten()))
rmse_out = np.sqrt(mean_squared_error(Y_test.flatten(), keras_out_sample.flatten()))
In [ ]:
title = 'In (R2: %.2f, SR: %.2f, RMSE: %.2f) -- Out (R2: %.2f, SR: %.2f, RMSE: %.2f)' \
%(r2_in, sr_in, rmse_in, r2_out, sr_out, rmse_out)
results.plot(figsize=(15, 5), title=title);
In [ ]:
plt.figure(figsize=(15, 5))
plt.plot(Y_test.flatten(), label='model')
plt.plot(keras_out_sample.flatten(), label='predict')
plt.title('DL Out of Sample -- RMSE: %.2f, R2: %.2f, Sucess Ratio: %.2f ' % (rmse_out, r2_out, sr_out))
plt.legend(loc=0)
plt.show()
In [ ]: