notebook.community

Edit and run



In [1]:

    
from __future__ import absolute_import, division, print_function

from tensorflow import keras
from tensorflow.keras import layers
import bisect
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pathlib
import seaborn as sns
import tensorflow as tf



In [46]:

    
def read_data(dates):
    t = []
    for date in dates:
        tmp = pd.read_csv('/Users/felipe/bitcoin/data/{}-training.csv'.format(str(date)),
                          index_col='time',
                          parse_dates=True)
        t.append(tmp)
    return pd.concat(t)



In [47]:

    
t = read_data([20190520, 20190521])



In [51]:

    
t.columns









    Out[51]:





Index(['longPnlAvg', 'shortPnlAvg', 'boughtSum', 'soldSum', 'bidPriceAvg',
       'askPriceAvg', 'bidSizeAvg', 'askSizeAvg', 'spreadAvg'],
      dtype='object')



In [29]:

    
train_dataset = t.sample(frac=0.8,random_state=0)
test_dataset = t.drop(train_dataset.index)
train_stats = train_dataset.describe().transpose()
del t



In [30]:

    
x_cols = [i for i in train_dataset.columns if i not in ('longPnlAvg', 'shortPnlAvg')]
y_cols = ['longPnlAvg', 'shortPnlAvg']

train_labels = train_dataset[y_cols]
test_labels = test_dataset[y_cols]
train_dataset = train_dataset[x_cols]
test_dataset = test_dataset[x_cols]



In [31]:

    
def norm(xx):
    x = xx.copy()
    for c in x.columns:
        x[c] -= train_stats['mean'][c]
        x[c] /= train_stats['std'][c]
    return x



In [32]:

    
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)



In [51]:

    
def build_model():
    model = keras.Sequential([
        #layers.Dense(64, activation=tf.nn.relu, input_shape=[len(train_dataset.keys())]),
        #layers.Dense(64, activation=tf.nn.relu),
        #layers.Dense(2)
        layers.Dense(2, input_shape=[len(train_dataset.keys())])
    ])

    optimizer = tf.keras.optimizers.RMSprop(0.001)

    model.compile(loss='mean_squared_error',
            optimizer=optimizer,
            metrics=['mean_absolute_error', 'mean_squared_error'])
    return model

model = build_model()



In [52]:

    
model.summary()









    



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_3 (Dense)              (None, 2)                 100       
=================================================================
Total params: 100
Trainable params: 100
Non-trainable params: 0
_________________________________________________________________



In [53]:

    
# sns.pairplot(train_dataset[['longPnlAvg', 'E2boughtSum', 'E2soldSum']], diag_kind="kde")



In [54]:

    
# test
example_batch = normed_train_data[:10]
example_result = model.predict(example_batch)
example_result









    Out[54]:





array([[-3.3640780e+00,  7.2535133e-01],
       [-1.8880553e+00, -1.4785385e-01],
       [ 2.6140866e+00, -5.9923881e-01],
       [ 2.9599340e+00, -1.3846033e+00],
       [-1.9504996e-03, -2.8883567e-01],
       [ 1.7633506e+00, -1.5136794e+00],
       [-6.6137528e-01, -5.7026852e-02],
       [ 2.4765110e+00, -8.5831612e-01],
       [ 9.6234155e-01, -3.0977446e-01],
       [ 5.2407843e-01, -1.4917796e+00]], dtype=float32)



In [55]:

    
def plot_history(history):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch
    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean Abs Error [MPG]')
    plt.plot(hist['epoch'], hist['mean_absolute_error'],
             label='Train Error')
    plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
             label = 'Val Error')
    plt.ylim([0,5])
    plt.legend()
  
    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean Square Error [$MPG^2$]')
    plt.plot(hist['epoch'], hist['mean_squared_error'],
           label='Train Error')
    plt.plot(hist['epoch'], hist['val_mean_squared_error'],
           label = 'Val Error')
    plt.ylim([0,20])
    plt.legend()
    plt.show()



In [56]:

    
# Display training progress by printing a single dot for each completed epoch
EPOCHS = 500

class PrintDot(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        if epoch % 100 == 0 and epoch > 0: print('{}%'.format(int(epoch*100/EPOCHS)))
        print('.', end='')
        
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(
    normed_train_data, train_labels,
    epochs=1000,
    validation_split = 0.2,
    verbose=0,
    callbacks=[early_stop, PrintDot()])

plot_history(history)









    



....................................................................................................20%
....................................................................................................40%
....................................................................................................60%
.................................................................



In [57]:

    
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()









    Out[57]:







  
    
      
      loss
      mean_absolute_error
      mean_squared_error
      val_loss
      val_mean_absolute_error
      val_mean_squared_error
      epoch
    
  
  
    
      360
      28.871658
      3.804974
      28.871655
      29.324542
      3.876092
      29.324543
      360
    
    
      361
      28.867584
      3.802163
      28.867584
      29.317340
      3.876479
      29.317341
      361
    
    
      362
      28.850018
      3.804214
      28.850010
      29.337546
      3.875551
      29.337542
      362
    
    
      363
      28.840884
      3.803473
      28.840885
      29.323044
      3.874372
      29.323044
      363
    
    
      364
      28.829051
      3.801877
      28.829048
      29.303933
      3.874685
      29.303936
      364



In [59]:

    
loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0)

print("Testing set Mean Abs Error: {:5.2f} PNL".format(mae))









    



Testing set Mean Abs Error:  3.81 PNL



In [72]:

    
test_labels.as_matrix().flatten()









    Out[72]:





array([-0.63846154,  0.87307692, -0.36206897, ...,  3.        ,
        5.        ,  2.74705882])



In [73]:

    
test_predictions = model.predict(normed_test_data)

plt.scatter(test_labels.as_matrix().flatten(), test_predictions.flatten())
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])



In [79]:

    
error = test_predictions.flatten() - test_labels.as_matrix().flatten()
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [MPG]")
_ = plt.ylabel("Count")



In [87]:

    
max(error), min(error)









    Out[87]:





(30.66749202843868, -30.870521545410156)



In [82]:

    
sign_error = test_predictions.flatten() * test_labels.as_matrix().flatten()



In [86]:

    
sum(sign_error < 0) / len(sign_error)









    Out[86]:





0.12760416666666666



In [88]:

    
tf.trainable_variables()









    Out[88]:





[<tf.Variable 'dense/kernel:0' shape=(49, 2) dtype=float32>,
 <tf.Variable 'dense/bias:0' shape=(2,) dtype=float32>,
 <tf.Variable 'RMSprop/lr:0' shape=() dtype=float32>,
 <tf.Variable 'RMSprop/rho:0' shape=() dtype=float32>,
 <tf.Variable 'RMSprop/decay:0' shape=() dtype=float32>,
 <tf.Variable 'RMSprop/iterations:0' shape=() dtype=int64>,
 <tf.Variable 'dense_1/kernel:0' shape=(49, 2) dtype=float32>,
 <tf.Variable 'dense_1/bias:0' shape=(2,) dtype=float32>,
 <tf.Variable 'RMSprop_1/lr:0' shape=() dtype=float32>,
 <tf.Variable 'RMSprop_1/rho:0' shape=() dtype=float32>,
 <tf.Variable 'RMSprop_1/decay:0' shape=() dtype=float32>,
 <tf.Variable 'RMSprop_1/iterations:0' shape=() dtype=int64>,
 <tf.Variable 'training/RMSprop/Variable:0' shape=(49, 2) dtype=float32>,
 <tf.Variable 'training/RMSprop/Variable_1:0' shape=(2,) dtype=float32>,
 <tf.Variable 'dense_2/kernel:0' shape=(49, 2) dtype=float32>,
 <tf.Variable 'dense_2/bias:0' shape=(2,) dtype=float32>,
 <tf.Variable 'RMSprop_2/lr:0' shape=() dtype=float32>,
 <tf.Variable 'RMSprop_2/rho:0' shape=() dtype=float32>,
 <tf.Variable 'RMSprop_2/decay:0' shape=() dtype=float32>,
 <tf.Variable 'RMSprop_2/iterations:0' shape=() dtype=int64>,
 <tf.Variable 'dense_3/kernel:0' shape=(49, 2) dtype=float32>,
 <tf.Variable 'dense_3/bias:0' shape=(2,) dtype=float32>,
 <tf.Variable 'RMSprop_3/lr:0' shape=() dtype=float32>,
 <tf.Variable 'RMSprop_3/rho:0' shape=() dtype=float32>,
 <tf.Variable 'RMSprop_3/decay:0' shape=() dtype=float32>,
 <tf.Variable 'RMSprop_3/iterations:0' shape=() dtype=int64>,
 <tf.Variable 'training_2/RMSprop/Variable:0' shape=(49, 2) dtype=float32>,
 <tf.Variable 'training_2/RMSprop/Variable_1:0' shape=(2,) dtype=float32>]



In [90]:

    
var = [v for v in tf.trainable_variables() if v.name.startswith("dense_3")][0]



In [125]:

    
pd.DataFrame(model.get_weights()[0])\
.set_index(test_dataset.keys())\
.rename(columns={0: 'buyPnl', 1: 'sellPnl'})\
.sort_values(by='buyPnl')









    Out[125]:







  
    
      
      buyPnl
      sellPnl
    
  
  
    
      E60soldSum
      -2.858211
      -0.857166
    
    
      bidPriceAvg
      -2.322055
      2.406816
    
    
      askPriceAvg
      -2.104322
      2.139055
    
    
      E2askPriceAvg
      -1.623266
      1.941769
    
    
      E2bidPriceAvg
      -1.621060
      1.932353
    
    
      E3askPriceAvg
      -1.422690
      1.408711
    
    
      E6spreadAvg
      -1.409747
      -0.803006
    
    
      E15boughtSum
      -1.321506
      4.287886
    
    
      E30bidSizeAvg
      -1.288543
      -0.718810
    
    
      E3bidPriceAvg
      -1.222031
      1.620211
    
    
      E15bidSizeAvg
      -1.161885
      0.951951
    
    
      E6boughtSum
      -1.139621
      -0.172826
    
    
      E6askPriceAvg
      -0.993003
      1.141211
    
    
      soldSum
      -0.558818
      0.161340
    
    
      E6bidPriceAvg
      -0.504406
      0.852740
    
    
      E3spreadAvg
      -0.482912
      0.083743
    
    
      askSizeAvg
      -0.356137
      0.296382
    
    
      E2bidSizeAvg
      -0.294426
      0.031896
    
    
      E2soldSum
      -0.261209
      0.185053
    
    
      E3boughtSum
      -0.253602
      0.174514
    
    
      E3soldSum
      -0.166614
      0.025568
    
    
      E3askSizeAvg
      -0.156774
      0.106761
    
    
      E2boughtSum
      -0.102804
      -0.126049
    
    
      E60spreadAvg
      -0.032863
      0.055209
    
    
      E60askSizeAvg
      -0.010683
      1.188601
    
    
      E15spreadAvg
      0.007101
      1.569976
    
    
      E30askSizeAvg
      0.011676
      -1.824158
    
    
      E6askSizeAvg
      0.039739
      0.806311
    
    
      E2askSizeAvg
      0.082898
      -0.243715
    
    
      E2spreadAvg
      0.179499
      0.474958
    
    
      spreadAvg
      0.219088
      -0.660516
    
    
      E6bidSizeAvg
      0.322776
      0.190580
    
    
      bidSizeAvg
      0.376939
      -0.409276
    
    
      E3bidSizeAvg
      0.401076
      0.086164
    
    
      boughtSum
      0.417500
      -0.404000
    
    
      E60boughtSum
      0.724551
      -1.522818
    
    
      E60bidSizeAvg
      0.729301
      -0.522080
    
    
      E30boughtSum
      0.799081
      4.448245
    
    
      E15askSizeAvg
      0.826259
      -0.945672
    
    
      E15askPriceAvg
      1.099388
      -0.530082
    
    
      E15bidPriceAvg
      1.240099
      -0.540232
    
    
      E6soldSum
      1.373520
      -0.955736
    
    
      E30spreadAvg
      1.821440
      2.949639
    
    
      E60bidPriceAvg
      2.071972
      -3.598100
    
    
      E30bidPriceAvg
      2.275282
      -2.527005
    
    
      E60askPriceAvg
      2.547004
      -3.579436
    
    
      E30askPriceAvg
      2.555838
      -2.419269
    
    
      E30soldSum
      3.961984
      -1.304081
    
    
      E15soldSum
      5.327722
      -2.113612



In [ ]:

	loss	mean_absolute_error	mean_squared_error	val_loss	val_mean_absolute_error	val_mean_squared_error	epoch
360	28.871658	3.804974	28.871655	29.324542	3.876092	29.324543	360
361	28.867584	3.802163	28.867584	29.317340	3.876479	29.317341	361
362	28.850018	3.804214	28.850010	29.337546	3.875551	29.337542	362
363	28.840884	3.803473	28.840885	29.323044	3.874372	29.323044	363
364	28.829051	3.801877	28.829048	29.303933	3.874685	29.303936	364

	buyPnl	sellPnl
E60soldSum	-2.858211	-0.857166
bidPriceAvg	-2.322055	2.406816
askPriceAvg	-2.104322	2.139055
E2askPriceAvg	-1.623266	1.941769
E2bidPriceAvg	-1.621060	1.932353
E3askPriceAvg	-1.422690	1.408711
E6spreadAvg	-1.409747	-0.803006
E15boughtSum	-1.321506	4.287886
E30bidSizeAvg	-1.288543	-0.718810
E3bidPriceAvg	-1.222031	1.620211
E15bidSizeAvg	-1.161885	0.951951
E6boughtSum	-1.139621	-0.172826
E6askPriceAvg	-0.993003	1.141211
soldSum	-0.558818	0.161340
E6bidPriceAvg	-0.504406	0.852740
E3spreadAvg	-0.482912	0.083743
askSizeAvg	-0.356137	0.296382
E2bidSizeAvg	-0.294426	0.031896
E2soldSum	-0.261209	0.185053
E3boughtSum	-0.253602	0.174514
E3soldSum	-0.166614	0.025568
E3askSizeAvg	-0.156774	0.106761
E2boughtSum	-0.102804	-0.126049
E60spreadAvg	-0.032863	0.055209
E60askSizeAvg	-0.010683	1.188601
E15spreadAvg	0.007101	1.569976
E30askSizeAvg	0.011676	-1.824158
E6askSizeAvg	0.039739	0.806311
E2askSizeAvg	0.082898	-0.243715
E2spreadAvg	0.179499	0.474958
spreadAvg	0.219088	-0.660516
E6bidSizeAvg	0.322776	0.190580
bidSizeAvg	0.376939	-0.409276
E3bidSizeAvg	0.401076	0.086164
boughtSum	0.417500	-0.404000
E60boughtSum	0.724551	-1.522818
E60bidSizeAvg	0.729301	-0.522080
E30boughtSum	0.799081	4.448245
E15askSizeAvg	0.826259	-0.945672
E15askPriceAvg	1.099388	-0.530082
E15bidPriceAvg	1.240099	-0.540232
E6soldSum	1.373520	-0.955736
E30spreadAvg	1.821440	2.949639
E60bidPriceAvg	2.071972	-3.598100
E30bidPriceAvg	2.275282	-2.527005
E60askPriceAvg	2.547004	-3.579436
E30askPriceAvg	2.555838	-2.419269
E30soldSum	3.961984	-1.304081
E15soldSum	5.327722	-2.113612