This notebook contains a resumed table of the q-learners results. The results are the ones evaluated on the test set, with the learned actions (without learning on the test set)


In [2]:
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
from time import time
import pickle

%matplotlib inline

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('../../')


Populating the interactive namespace from numpy and matplotlib

Predictor Results


In [3]:
pred_results_df = pd.DataFrame({
    1.0: {'train_r2': 0.983486, 'train_mre': 0.008762, 'test_r2': 0.976241, 'test_mre': 0.013906},
    7.0: {'train_r2': 0.906177, 'train_mre': 0.026232, 'test_r2': 0.874892, 'test_mre': 0.034764},
    14.0: {'train_r2': 0.826779, 'train_mre': 0.037349, 'test_r2': 0.758697, 'test_mre': 0.051755},
    28.0: {'train_r2': 0.696077, 'train_mre': 0.052396, 'test_r2': 0.515802, 'test_mre': 0.078545},
    56.0: {'train_r2': 0.494079, 'train_mre': 0.073589, 'test_r2': 0.152134, 'test_mre': 0.108190},
}).T
pred_results_df = pred_results_df[['train_r2', 'test_r2', 'train_mre', 'test_mre']]
pred_results_df.index.name = 'ahead_days'
pred_results_df


Out[3]:
train_r2 test_r2 train_mre test_mre
ahead_days
1.0 0.983486 0.976241 0.008762 0.013906
7.0 0.906177 0.874892 0.026232 0.034764
14.0 0.826779 0.758697 0.037349 0.051755
28.0 0.696077 0.515802 0.052396 0.078545
56.0 0.494079 0.152134 0.073589 0.108190

Automatic Trader Results


In [4]:
features = [
    'dyna',
    'states',
    'actions',
    'training_days',
    'epochs',
    'predictor',
    'random_decrease',
]

In [5]:
metrics =[
    'sharpe_ratio',
    'cumulative_return',
    'epoch_time'
]

In [6]:
names = [
    'simple_q_learner',
    'simple_q_learner_1000_states',
    'simple_q_learner_1000_states_4_actions_full_training',
    'simple_q_learner_1000_states_full_training',
    'simple_q_learner_100_epochs',
    'simple_q_learner_11_actions',
    'simple_q_learner_fast_learner',
    'simple_q_learner_fast_learner_1000_states',
    'simple_q_learner_fast_learner_11_actions',
    'simple_q_learner_fast_learner_3_actions',
    'simple_q_learner_fast_learner_full_training',
    'simple_q_learner_full_training',
    'dyna_q_1000_states_full_training',
    'dyna_q_learner',
    'dyna_q_with_predictor',
    'dyna_q_with_predictor_full_training',
    'dyna_q_with_predictor_full_training_dyna1',
]

feat_data = np.array([
    # (dyna, states, actions, training_days, epochs, predictor, random_decrease)
    [0, 125, 2, 512, 15, False, 0.9999],  # simple_q_learner
    [0, 1000, 2, 512, 15, False, 0.9999],  # simple_q_learner_1000_states
    [0, 1000, 4, 5268, 7, False, 0.9999],  # simple_q_learner_1000_states_4_actions_full_training
    [0, 1000, 2, 5268, 15, False, 0.9999],  # simple_q_learner_1000_states_full_training
    [0, 125, 2, 512, 100, False, 0.9999], # simple_q_learner_100_epochs
    [0, 125, 11, 512, 10, False, 0.9999],  # simple_q_learner_11_actions
    [0, 125, 2, 512, 4, False, 0.999],  # simple_q_learner_fast_learner
    [0, 1000, 2, 512, 4, False, 0.999],  # simple_q_learner_fast_learner_1000_states
    [0, 125, 11, 512, 4, False, 0.999],  # simple_q_learner_fast_learner_11_actions
    [0, 125, 3, 512, 4, False, 0.999],  # simple_q_learner_fast_learner_3_actions
    [0, 125, 2, 5268, 4, False, 0.999],  # simple_q_learner_fast_learner_full_training
    [0, 125, 2, 5268, 15, False, 0.9999],  # simple_q_learner_full_training
    [20, 1000, 2, 5268, 7, False, 0.9999],  # dyna_q_1000_states_full_training
    [20, 125, 2, 512, 4, False, 0.9999],  # dyna_q_learner
    [20, 125, 2, 512, 4, True, 0.9999],  # dyna_q_with_predictor
    [20, 125, 2, 5268, 4, True, 0.9999],  # dyna_q_with_predictor_full_training
    [1, 125, 2, 5268, 4, True, 0.9999],  # dyna_q_with_predictor_full_training_dyna1
])
experiments_df = pd.DataFrame(feat_data, columns=features, index=names)
experiments_df.index.name = 'nb_name'

train_res_data = {
    'simple_q_learner': {'sharpe': 1.9858481612185834, 'cum_ret': 0.38359700000000174, 'epoch_time': 18.330891609191895},
    'simple_q_learner_1000_states': {'sharpe': 3.4470302925746776, 'cum_ret': 0.7292610000000004, 'epoch_time': 18.28188133239746},
    'simple_q_learner_1000_states_4_actions_full_training': {'sharpe': 2.2430093688893264, 'cum_ret': 30.14936200000002, 'epoch_time': 157.69741320610046},
    'simple_q_learner_1000_states_full_training': {'sharpe': 2.366638028444387, 'cum_ret': 79.61800199999992, 'epoch_time': 159.31651139259338},
    'simple_q_learner_100_epochs': {'sharpe': 4.093353629096188, 'cum_ret': 0.6627280000000009, 'epoch_time': 9.004882335662842},
    'simple_q_learner_11_actions': {'sharpe': 1.5440407782808305, 'cum_ret': 0.2412700000000001, 'epoch_time': 11.08903431892395},
    'simple_q_learner_fast_learner': {'sharpe': 2.8787265519379908, 'cum_ret':0.5468269999999986, 'epoch_time': 18.931288242340088},
    'simple_q_learner_fast_learner_1000_states': {'sharpe': 2.031446601959524, 'cum_ret': 0.3971230000000021, 'epoch_time': 19.006957530975342},
    'simple_q_learner_fast_learner_11_actions': {'sharpe': 3.241438316121647, 'cum_ret': 0.541966, 'epoch_time': 18.913504123687744},
    'simple_q_learner_fast_learner_3_actions': {'sharpe': 2.9448069674427555, 'cum_ret': 0.4873689999999995, 'epoch_time': 18.46741485595703},
    'simple_q_learner_fast_learner_full_training': {'sharpe': 1.0444534903132408, 'cum_ret': 0.7844770000000019, 'epoch_time': 143.5039553642273},
    'simple_q_full_training': {'sharpe': 1.2592450659232495, 'cum_ret': 1.7391450000000006, 'epoch_time': 115.70198798179626},
    'dyna_q_1000_states_full_training': {'sharpe': 2.2964510954840325, 'cum_ret': 94.75696199999993, 'epoch_time': 242.88240551948547},
    'dyna_q_learner': {'sharpe': 3.706435588713091, 'cum_ret': 0.4938250000000006
, 'epoch_time': 18.87182092666626},
    'dyna_q_with_predictor': {'sharpe': 3.2884867210125845, 'cum_ret': 0.5397989999999993, 'epoch_time': 458.8401937484741},
    'dyna_q_with_predictor_full_training': {'sharpe': 1.0037137587999854, 'cum_ret': 2.565081999999997, 'epoch_time': 7850.391056537628},
    'dyna_q_with_predictor_full_training_dyna1': {'sharpe': 0.48228187419119906, 'cum_ret': 0.1737430000000002, 'epoch_time': 730.5918335914612},
}
train_res_data_df = pd.DataFrame(train_res_data).T

test_res_data_no_learning = {
    'simple_q_learner': {'sharpe': 0.3664203166030617, 'cum_ret': 0.06372499999999937, 'epoch_time': 17.75287628173828},
    'simple_q_learner_1000_states': {'sharpe': -0.013747768227987086, 'cum_ret': -0.013047000000000142, 'epoch_time': 17.661759853363037},
    'simple_q_learner_1000_states_4_actions_full_training': {'sharpe': 0.9400492987950515, 'cum_ret': 0.10791900000000054, 'epoch_time': 13.83948016166687},
    'simple_q_learner_1000_states_full_training': {'sharpe': 1.4827065747174577, 'cum_ret': 0.22123900000000085, 'epoch_time': 9.844955205917358},
    'simple_q_learner_100_epochs': {'sharpe': 0.6420028402682839, 'cum_ret': 0.10032399999999986, 'epoch_time': 9.116246461868286},
    'simple_q_learner_11_actions': {'sharpe': 0.15616450321809833, 'cum_ret': 0.019991000000000758, 'epoch_time': 10.187344551086426},
    'simple_q_learner_fast_learner': {'sharpe': 0.9643510680410812, 'cum_ret': 0.18794100000000125, 'epoch_time': 18.13912320137024},
    'simple_q_learner_fast_learner_1000_states': {'sharpe': 0.8228017709095453, 'cum_ret': 0.16162700000000063, 'epoch_time': 19.452654361724854},
    'simple_q_learner_fast_learner_11_actions': {'sharpe': 0.8238261816524384, 'cum_ret': 0.12766000000000033, 'epoch_time': 18.901001930236816},
    'simple_q_learner_fast_learner_3_actions': {'sharpe': 0.6332862559879147, 'cum_ret': 0.08036399999999966, 'epoch_time': 19.221533060073853},
    'simple_q_learner_fast_learner_full_training': {'sharpe': 1.2605807833904492, 'cum_ret': 0.056606000000000156, 'epoch_time': 11.412826538085938},
    'simple_q_full_training': {'sharpe': -0.2562905901467118, 'cum_ret': -0.027945999999999693, 'epoch_time': 8.009900569915771},
    'dyna_q_1000_states_full_training': {'sharpe': 0.4267994866360769, 'cum_ret': 0.0652820000000005, 'epoch_time': 14.224964618682861},
    'dyna_q_learner': {'sharpe': 0.5191712068491942, 'cum_ret': 0.07307299999999883, 'epoch_time': 16.431984901428223},
    'dyna_q_with_predictor': {'sharpe': 0.7435489843809434, 'cum_ret': 0.10403399999999974, 'epoch_time': 6.692898988723755},
    'dyna_q_with_predictor_full_training': {'sharpe': -0.33503797163532956, 'cum_ret': -0.029740999999999795, 'epoch_time': 8.51533818244934},
    'dyna_q_with_predictor_full_training_dyna1': {'sharpe': 0.20288841658633258, 'cum_ret': 0.008380000000000276, 'epoch_time': 10.236766338348389},
}
test_res_data_no_learning_df = pd.DataFrame(test_res_data_no_learning).T

test_res_data_learning = {
    'simple_q_learner': {'sharpe': 0.9735950444291429, 'cum_ret': 0.1953619999999998, 'epoch_time': 18.097697019577026},
    'simple_q_learner_1000_states': {'sharpe': -0.0867440896667206, 'cum_ret': -0.027372000000001173, 'epoch_time': 17.762672901153564},
    'simple_q_learner_1000_states_4_actions_full_training': {'sharpe': 1.109613523501088, 'cum_ret': 0.12868000000000057, 'epoch_time': 9.899595499038696},
    'simple_q_learner_1000_states_full_training': {'sharpe': 1.5176752934460862, 'cum_ret': 0.2069550000000011, 'epoch_time': 9.233611106872559},
    'simple_q_learner_100_epochs': {'sharpe': 0.09274627213069256, 'cum_ret': 0.008058000000000565, 'epoch_time': 8.653764009475708},
    'simple_q_learner_11_actions': {'sharpe': 0.4691456599751897, 'cum_ret': 0.07124699999999917, 'epoch_time': 10.827114582061768},
    'simple_q_learner_fast_learner': {'sharpe': 0.6020182964860242, 'cum_ret': 0.09249299999999816, 'epoch_time': 17.882429122924805},
    'simple_q_learner_fast_learner_1000_states': {'sharpe': 0.17618139275375405, 'cum_ret': 0.02545300000000017, 'epoch_time': 15.724592685699463},
    'simple_q_learner_fast_learner_11_actions': {'sharpe': 0.9608337022400049, 'cum_ret': 0.1406880000000006, 'epoch_time': 17.67305564880371},
    'simple_q_learner_fast_learner_3_actions': {'sharpe': 0.3254406127664859, 'cum_ret': 0.04086700000000043, 'epoch_time': 18.100637197494507},
    'simple_q_learner_fast_learner_full_training': {'sharpe': 1.2605807833904492, 'cum_ret': 0.056606000000000156, 'epoch_time': 12.214732885360718},
    'simple_q_full_training': {'sharpe': 0.3139835605580342, 'cum_ret': 0.02497299999999969, 'epoch_time': 7.958802700042725},
    'dyna_q_1000_states_full_training': {'sharpe': 0.48863969848043476, 'cum_ret': 0.06846099999999988, 'epoch_time': 18.820592880249023},
    'dyna_q_learner': {'sharpe': 0.0700928915599047, 'cum_ret': 0.004358999999999114, 'epoch_time': 18.085463523864746},
    'dyna_q_with_predictor': {'sharpe': 0.6954014537549168, 'cum_ret': 0.09154599999999946, 'epoch_time': 338.36568880081177},
    'dyna_q_with_predictor_full_training': {'sharpe': -0.8531759696425502, 'cum_ret': -0.07708900000000052, 'epoch_time': 375.830899477005},
    'dyna_q_with_predictor_full_training_dyna1': {'sharpe': -0.15635735184097058, 'cum_ret': -0.006745999999999919, 'epoch_time': 38.24271035194397},
}
test_res_data_learning_df = pd.DataFrame(test_res_data_learning).T

train_benchmark_data = {
    'simple_q_learner': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
    'simple_q_learner_1000_states': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
    'simple_q_learner_1000_states_4_actions_full_training': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
    'simple_q_learner_1000_states_full_training': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
    'simple_q_learner_100_epochs': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
    'simple_q_learner_11_actions': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
    'simple_q_learner_fast_learner': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
    'simple_q_learner_fast_learner_1000_states': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
    'simple_q_learner_fast_learner_11_actions': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
    'simple_q_learner_fast_learner_3_actions': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
    'simple_q_learner_fast_learner_full_training': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
    'simple_q_full_training': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
    'dyna_q_1000_states_full_training': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
    'dyna_q_learner': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
    'dyna_q_with_predictor': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
    'dyna_q_with_predictor_full_training': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
    'dyna_q_with_predictor_full_training_dyna1': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
}
train_benchmark_data_df = pd.DataFrame(train_benchmark_data).T

test_benchmark_data = {
    'simple_q_learner': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'simple_q_learner_1000_states': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'simple_q_learner_1000_states_4_actions_full_training': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'simple_q_learner_1000_states_full_training': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'simple_q_learner_100_epochs': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'simple_q_learner_11_actions': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'simple_q_learner_fast_learner': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'simple_q_learner_fast_learner_1000_states': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'simple_q_learner_fast_learner_11_actions': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'simple_q_learner_fast_learner_3_actions': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'simple_q_learner_fast_learner_full_training': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'simple_q_full_training': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'dyna_q_1000_states_full_training': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'dyna_q_learner': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
    'dyna_q_with_predictor': {'sharpe_bench': 0.2930367522823553, 'cum_ret_bench': 0.05002151977428149},
    'dyna_q_with_predictor_full_training': {'sharpe_bench': 0.2930367522823553, 'cum_ret_bench': 0.05002151977428149},
    'dyna_q_with_predictor_full_training_dyna1': {'sharpe_bench': 0.3772011734533203, 'cum_ret_bench': 0.07288030223327424},
}
test_benchmark_data_df = pd.DataFrame(test_benchmark_data).T

Features that were used in the experiments


In [7]:
print(experiments_df.shape)
experiments_df


(17, 7)
Out[7]:
dyna states actions training_days epochs predictor random_decrease
nb_name
simple_q_learner 0.0 125.0 2.0 512.0 15.0 0.0 0.9999
simple_q_learner_1000_states 0.0 1000.0 2.0 512.0 15.0 0.0 0.9999
simple_q_learner_1000_states_4_actions_full_training 0.0 1000.0 4.0 5268.0 7.0 0.0 0.9999
simple_q_learner_1000_states_full_training 0.0 1000.0 2.0 5268.0 15.0 0.0 0.9999
simple_q_learner_100_epochs 0.0 125.0 2.0 512.0 100.0 0.0 0.9999
simple_q_learner_11_actions 0.0 125.0 11.0 512.0 10.0 0.0 0.9999
simple_q_learner_fast_learner 0.0 125.0 2.0 512.0 4.0 0.0 0.9990
simple_q_learner_fast_learner_1000_states 0.0 1000.0 2.0 512.0 4.0 0.0 0.9990
simple_q_learner_fast_learner_11_actions 0.0 125.0 11.0 512.0 4.0 0.0 0.9990
simple_q_learner_fast_learner_3_actions 0.0 125.0 3.0 512.0 4.0 0.0 0.9990
simple_q_learner_fast_learner_full_training 0.0 125.0 2.0 5268.0 4.0 0.0 0.9990
simple_q_learner_full_training 0.0 125.0 2.0 5268.0 15.0 0.0 0.9999
dyna_q_1000_states_full_training 20.0 1000.0 2.0 5268.0 7.0 0.0 0.9999
dyna_q_learner 20.0 125.0 2.0 512.0 4.0 0.0 0.9999
dyna_q_with_predictor 20.0 125.0 2.0 512.0 4.0 1.0 0.9999
dyna_q_with_predictor_full_training 20.0 125.0 2.0 5268.0 4.0 1.0 0.9999
dyna_q_with_predictor_full_training_dyna1 1.0 125.0 2.0 5268.0 4.0 1.0 0.9999

In [8]:
experiments_df.to_csv('../../data/experiments_df.csv')

Training Results


In [10]:
training_res_df = train_res_data_df.join(train_benchmark_data_df)
training_res_df.index.name = 'nb_name'
training_res_df['sharpe_increase'] = training_res_df['sharpe'] - training_res_df['sharpe_bench']
training_res_df['cum_ret_increase'] = training_res_df['cum_ret'] - training_res_df['cum_ret_bench']
print(training_res_df.shape)
training_res_df


(17, 7)
Out[10]:
cum_ret epoch_time sharpe cum_ret_bench sharpe_bench sharpe_increase cum_ret_increase
nb_name
dyna_q_1000_states_full_training 94.756962 242.882406 2.296451 3.304503 0.456677 1.839774 91.452459
dyna_q_learner 0.493825 18.871821 3.706436 0.424492 1.601692 2.104744 0.069333
dyna_q_with_predictor 0.539799 458.840194 3.288487 0.424492 1.601692 1.686795 0.115307
dyna_q_with_predictor_full_training 2.565082 7850.391057 1.003714 3.304503 0.456677 0.547037 -0.739421
dyna_q_with_predictor_full_training_dyna1 0.173743 730.591834 0.482282 3.304503 0.456677 0.025605 -3.130760
simple_q_full_training 1.739145 115.701988 1.259245 3.304503 0.456677 0.802568 -1.565358
simple_q_learner 0.383597 18.330892 1.985848 0.424492 1.601692 0.384157 -0.040895
simple_q_learner_1000_states 0.729261 18.281881 3.447030 0.424492 1.601692 1.845339 0.304769
simple_q_learner_1000_states_4_actions_full_training 30.149362 157.697413 2.243009 3.304503 0.456677 1.786332 26.844859
simple_q_learner_1000_states_full_training 79.618002 159.316511 2.366638 3.304503 0.456677 1.909961 76.313499
simple_q_learner_100_epochs 0.662728 9.004882 4.093354 0.424492 1.601692 2.491662 0.238236
simple_q_learner_11_actions 0.241270 11.089034 1.544041 0.424492 1.601692 -0.057651 -0.183222
simple_q_learner_fast_learner 0.546827 18.931288 2.878727 0.424492 1.601692 1.277035 0.122335
simple_q_learner_fast_learner_1000_states 0.397123 19.006958 2.031447 0.424492 1.601692 0.429755 -0.027369
simple_q_learner_fast_learner_11_actions 0.541966 18.913504 3.241438 0.424492 1.601692 1.639747 0.117474
simple_q_learner_fast_learner_3_actions 0.487369 18.467415 2.944807 0.424492 1.601692 1.343115 0.062877
simple_q_learner_fast_learner_full_training 0.784477 143.503955 1.044453 3.304503 0.456677 0.587776 -2.520026

In [11]:
training_res_df.to_csv('../../data/training_res_df.csv')

Test Results without learning in the test set


In [12]:
test_no_learn_res_df = test_res_data_no_learning_df.join(test_benchmark_data_df)
test_no_learn_res_df.index.name = 'nb_name'
test_no_learn_res_df['sharpe_increase'] = test_no_learn_res_df['sharpe'] - test_no_learn_res_df['sharpe_bench']
test_no_learn_res_df['cum_ret_increase'] = test_no_learn_res_df['cum_ret'] - test_no_learn_res_df['cum_ret_bench']
print(test_no_learn_res_df.shape)
test_no_learn_res_df


(17, 7)
Out[12]:
cum_ret epoch_time sharpe cum_ret_bench sharpe_bench sharpe_increase cum_ret_increase
nb_name
dyna_q_1000_states_full_training 0.065282 14.224965 0.426799 0.107023 0.442715 -0.015916 -0.041741
dyna_q_learner 0.073073 16.431985 0.519171 0.107023 0.442715 0.076456 -0.033950
dyna_q_with_predictor 0.104034 6.692899 0.743549 0.050022 0.293037 0.450512 0.054012
dyna_q_with_predictor_full_training -0.029741 8.515338 -0.335038 0.050022 0.293037 -0.628075 -0.079763
dyna_q_with_predictor_full_training_dyna1 0.008380 10.236766 0.202888 0.072880 0.377201 -0.174313 -0.064500
simple_q_full_training -0.027946 8.009901 -0.256291 0.107023 0.442715 -0.699006 -0.134969
simple_q_learner 0.063725 17.752876 0.366420 0.107023 0.442715 -0.076295 -0.043298
simple_q_learner_1000_states -0.013047 17.661760 -0.013748 0.107023 0.442715 -0.456463 -0.120070
simple_q_learner_1000_states_4_actions_full_training 0.107919 13.839480 0.940049 0.107023 0.442715 0.497334 0.000896
simple_q_learner_1000_states_full_training 0.221239 9.844955 1.482707 0.107023 0.442715 1.039991 0.114216
simple_q_learner_100_epochs 0.100324 9.116246 0.642003 0.107023 0.442715 0.199287 -0.006699
simple_q_learner_11_actions 0.019991 10.187345 0.156165 0.107023 0.442715 -0.286551 -0.087032
simple_q_learner_fast_learner 0.187941 18.139123 0.964351 0.107023 0.442715 0.521636 0.080918
simple_q_learner_fast_learner_1000_states 0.161627 19.452654 0.822802 0.107023 0.442715 0.380086 0.054604
simple_q_learner_fast_learner_11_actions 0.127660 18.901002 0.823826 0.107023 0.442715 0.381111 0.020637
simple_q_learner_fast_learner_3_actions 0.080364 19.221533 0.633286 0.107023 0.442715 0.190571 -0.026659
simple_q_learner_fast_learner_full_training 0.056606 11.412827 1.260581 0.107023 0.442715 0.817865 -0.050417

In [13]:
test_no_learn_res_df.to_csv('../../data/test_no_learn_res_df.csv')

Test Results with learning in the test set (always keeping causality)


In [15]:
test_learn_res_df = test_res_data_learning_df.join(test_benchmark_data_df)
test_learn_res_df.index.name = 'nb_name'
test_learn_res_df['sharpe_increase'] = test_learn_res_df['sharpe'] - test_learn_res_df['sharpe_bench']
test_learn_res_df['cum_ret_increase'] = test_learn_res_df['cum_ret'] - test_learn_res_df['cum_ret_bench']
print(test_learn_res_df.shape)
test_learn_res_df


(17, 7)
Out[15]:
cum_ret epoch_time sharpe cum_ret_bench sharpe_bench sharpe_increase cum_ret_increase
nb_name
dyna_q_1000_states_full_training 0.068461 18.820593 0.488640 0.107023 0.442715 0.045924 -0.038562
dyna_q_learner 0.004359 18.085464 0.070093 0.107023 0.442715 -0.372623 -0.102664
dyna_q_with_predictor 0.091546 338.365689 0.695401 0.050022 0.293037 0.402365 0.041524
dyna_q_with_predictor_full_training -0.077089 375.830899 -0.853176 0.050022 0.293037 -1.146213 -0.127111
dyna_q_with_predictor_full_training_dyna1 -0.006746 38.242710 -0.156357 0.072880 0.377201 -0.533559 -0.079626
simple_q_full_training 0.024973 7.958803 0.313984 0.107023 0.442715 -0.128732 -0.082050
simple_q_learner 0.195362 18.097697 0.973595 0.107023 0.442715 0.530880 0.088339
simple_q_learner_1000_states -0.027372 17.762673 -0.086744 0.107023 0.442715 -0.529460 -0.134395
simple_q_learner_1000_states_4_actions_full_training 0.128680 9.899595 1.109614 0.107023 0.442715 0.666898 0.021657
simple_q_learner_1000_states_full_training 0.206955 9.233611 1.517675 0.107023 0.442715 1.074960 0.099932
simple_q_learner_100_epochs 0.008058 8.653764 0.092746 0.107023 0.442715 -0.349969 -0.098965
simple_q_learner_11_actions 0.071247 10.827115 0.469146 0.107023 0.442715 0.026430 -0.035776
simple_q_learner_fast_learner 0.092493 17.882429 0.602018 0.107023 0.442715 0.159303 -0.014530
simple_q_learner_fast_learner_1000_states 0.025453 15.724593 0.176181 0.107023 0.442715 -0.266534 -0.081570
simple_q_learner_fast_learner_11_actions 0.140688 17.673056 0.960834 0.107023 0.442715 0.518118 0.033665
simple_q_learner_fast_learner_3_actions 0.040867 18.100637 0.325441 0.107023 0.442715 -0.117275 -0.066156
simple_q_learner_fast_learner_full_training 0.056606 12.214733 1.260581 0.107023 0.442715 0.817865 -0.050417

In [16]:
test_learn_res_df.to_csv('../../data/test_learn_res_df.csv')

Sharpe increases resumed


In [17]:
SHARPE_Q = 'sharpe_increase'
sharpe_q_df = pd.DataFrame(training_res_df[SHARPE_Q]).rename(columns={SHARPE_Q:'sharpe_i_train'})
sharpe_q_df = sharpe_q_df.join(test_no_learn_res_df[SHARPE_Q].rename('sharpe_i_test_no_learn'))
sharpe_q_df = sharpe_q_df.join(test_learn_res_df[SHARPE_Q].rename('sharpe_i_test_learn'))
print(sharpe_q_df.shape)
sharpe_q_df


(17, 3)
Out[17]:
sharpe_i_train sharpe_i_test_no_learn sharpe_i_test_learn
nb_name
dyna_q_1000_states_full_training 1.839774 -0.015916 0.045924
dyna_q_learner 2.104744 0.076456 -0.372623
dyna_q_with_predictor 1.686795 0.450512 0.402365
dyna_q_with_predictor_full_training 0.547037 -0.628075 -1.146213
dyna_q_with_predictor_full_training_dyna1 0.025605 -0.174313 -0.533559
simple_q_full_training 0.802568 -0.699006 -0.128732
simple_q_learner 0.384157 -0.076295 0.530880
simple_q_learner_1000_states 1.845339 -0.456463 -0.529460
simple_q_learner_1000_states_4_actions_full_training 1.786332 0.497334 0.666898
simple_q_learner_1000_states_full_training 1.909961 1.039991 1.074960
simple_q_learner_100_epochs 2.491662 0.199287 -0.349969
simple_q_learner_11_actions -0.057651 -0.286551 0.026430
simple_q_learner_fast_learner 1.277035 0.521636 0.159303
simple_q_learner_fast_learner_1000_states 0.429755 0.380086 -0.266534
simple_q_learner_fast_learner_11_actions 1.639747 0.381111 0.518118
simple_q_learner_fast_learner_3_actions 1.343115 0.190571 -0.117275
simple_q_learner_fast_learner_full_training 0.587776 0.817865 0.817865

Best Agent

The Agent with the best sharpe test increase (learning or not learning) was chosen as the "best".


In [18]:
best_agent_name = 'simple_q_learner_1000_states_full_training'

In [19]:
pd.DataFrame(experiments_df.loc[best_agent_name]).T


Out[19]:
dyna states actions training_days epochs predictor random_decrease
simple_q_learner_1000_states_full_training 0.0 1000.0 2.0 5268.0 15.0 0.0 0.9999

In [20]:
indexes = ['training', 'test_no_learn', 'test_learn']

best_agent_df = pd.concat([
    training_res_df.loc[best_agent_name],
    test_no_learn_res_df.loc[best_agent_name],
    test_learn_res_df.loc[best_agent_name],
], axis=1).T
best_agent_df.index = indexes
best_agent_df


Out[20]:
cum_ret epoch_time sharpe cum_ret_bench sharpe_bench sharpe_increase cum_ret_increase
training 79.618002 159.316511 2.366638 3.304503 0.456677 1.909961 76.313499
test_no_learn 0.221239 9.844955 1.482707 0.107023 0.442715 1.039991 0.114216
test_learn 0.206955 9.233611 1.517675 0.107023 0.442715 1.074960 0.099932

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: