In [1]:
from __future__ import absolute_import, division, print_function
from tensorflow import keras
from tensorflow.keras import layers
import bisect
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pathlib
import seaborn as sns
import tensorflow as tf
In [46]:
def read_data(dates):
t = []
for date in dates:
tmp = pd.read_csv('/Users/felipe/bitcoin/data/{}-training.csv'.format(str(date)),
index_col='time',
parse_dates=True)
t.append(tmp)
return pd.concat(t)
In [47]:
t = read_data([20190520, 20190521])
In [51]:
t.columns
Out[51]:
In [29]:
train_dataset = t.sample(frac=0.8,random_state=0)
test_dataset = t.drop(train_dataset.index)
train_stats = train_dataset.describe().transpose()
del t
In [30]:
x_cols = [i for i in train_dataset.columns if i not in ('longPnlAvg', 'shortPnlAvg')]
y_cols = ['longPnlAvg', 'shortPnlAvg']
train_labels = train_dataset[y_cols]
test_labels = test_dataset[y_cols]
train_dataset = train_dataset[x_cols]
test_dataset = test_dataset[x_cols]
In [31]:
def norm(xx):
x = xx.copy()
for c in x.columns:
x[c] -= train_stats['mean'][c]
x[c] /= train_stats['std'][c]
return x
In [32]:
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)
In [51]:
def build_model():
model = keras.Sequential([
#layers.Dense(64, activation=tf.nn.relu, input_shape=[len(train_dataset.keys())]),
#layers.Dense(64, activation=tf.nn.relu),
#layers.Dense(2)
layers.Dense(2, input_shape=[len(train_dataset.keys())])
])
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mean_squared_error',
optimizer=optimizer,
metrics=['mean_absolute_error', 'mean_squared_error'])
return model
model = build_model()
In [52]:
model.summary()
In [53]:
# sns.pairplot(train_dataset[['longPnlAvg', 'E2boughtSum', 'E2soldSum']], diag_kind="kde")
In [54]:
# test
example_batch = normed_train_data[:10]
example_result = model.predict(example_batch)
example_result
Out[54]:
In [55]:
def plot_history(history):
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Mean Abs Error [MPG]')
plt.plot(hist['epoch'], hist['mean_absolute_error'],
label='Train Error')
plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
label = 'Val Error')
plt.ylim([0,5])
plt.legend()
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Mean Square Error [$MPG^2$]')
plt.plot(hist['epoch'], hist['mean_squared_error'],
label='Train Error')
plt.plot(hist['epoch'], hist['val_mean_squared_error'],
label = 'Val Error')
plt.ylim([0,20])
plt.legend()
plt.show()
In [56]:
# Display training progress by printing a single dot for each completed epoch
EPOCHS = 500
class PrintDot(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0 and epoch > 0: print('{}%'.format(int(epoch*100/EPOCHS)))
print('.', end='')
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
history = model.fit(
normed_train_data, train_labels,
epochs=1000,
validation_split = 0.2,
verbose=0,
callbacks=[early_stop, PrintDot()])
plot_history(history)
In [57]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()
Out[57]:
In [59]:
loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0)
print("Testing set Mean Abs Error: {:5.2f} PNL".format(mae))
In [72]:
test_labels.as_matrix().flatten()
Out[72]:
In [73]:
test_predictions = model.predict(normed_test_data)
plt.scatter(test_labels.as_matrix().flatten(), test_predictions.flatten())
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])
In [79]:
error = test_predictions.flatten() - test_labels.as_matrix().flatten()
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [MPG]")
_ = plt.ylabel("Count")
In [87]:
max(error), min(error)
Out[87]:
In [82]:
sign_error = test_predictions.flatten() * test_labels.as_matrix().flatten()
In [86]:
sum(sign_error < 0) / len(sign_error)
Out[86]:
In [88]:
tf.trainable_variables()
Out[88]:
In [90]:
var = [v for v in tf.trainable_variables() if v.name.startswith("dense_3")][0]
In [125]:
pd.DataFrame(model.get_weights()[0])\
.set_index(test_dataset.keys())\
.rename(columns={0: 'buyPnl', 1: 'sellPnl'})\
.sort_values(by='buyPnl')
Out[125]:
In [ ]: