In [2]:
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
from time import time
import pickle
%matplotlib inline
%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)
%load_ext autoreload
%autoreload 2
sys.path.append('../../')
In [3]:
pred_results_df = pd.DataFrame({
1.0: {'train_r2': 0.983486, 'train_mre': 0.008762, 'test_r2': 0.976241, 'test_mre': 0.013906},
7.0: {'train_r2': 0.906177, 'train_mre': 0.026232, 'test_r2': 0.874892, 'test_mre': 0.034764},
14.0: {'train_r2': 0.826779, 'train_mre': 0.037349, 'test_r2': 0.758697, 'test_mre': 0.051755},
28.0: {'train_r2': 0.696077, 'train_mre': 0.052396, 'test_r2': 0.515802, 'test_mre': 0.078545},
56.0: {'train_r2': 0.494079, 'train_mre': 0.073589, 'test_r2': 0.152134, 'test_mre': 0.108190},
}).T
pred_results_df = pred_results_df[['train_r2', 'test_r2', 'train_mre', 'test_mre']]
pred_results_df.index.name = 'ahead_days'
pred_results_df
Out[3]:
In [4]:
features = [
'dyna',
'states',
'actions',
'training_days',
'epochs',
'predictor',
'random_decrease',
]
In [5]:
metrics =[
'sharpe_ratio',
'cumulative_return',
'epoch_time'
]
In [6]:
names = [
'simple_q_learner',
'simple_q_learner_1000_states',
'simple_q_learner_1000_states_4_actions_full_training',
'simple_q_learner_1000_states_full_training',
'simple_q_learner_100_epochs',
'simple_q_learner_11_actions',
'simple_q_learner_fast_learner',
'simple_q_learner_fast_learner_1000_states',
'simple_q_learner_fast_learner_11_actions',
'simple_q_learner_fast_learner_3_actions',
'simple_q_learner_fast_learner_full_training',
'simple_q_learner_full_training',
'dyna_q_1000_states_full_training',
'dyna_q_learner',
'dyna_q_with_predictor',
'dyna_q_with_predictor_full_training',
'dyna_q_with_predictor_full_training_dyna1',
]
feat_data = np.array([
# (dyna, states, actions, training_days, epochs, predictor, random_decrease)
[0, 125, 2, 512, 15, False, 0.9999], # simple_q_learner
[0, 1000, 2, 512, 15, False, 0.9999], # simple_q_learner_1000_states
[0, 1000, 4, 5268, 7, False, 0.9999], # simple_q_learner_1000_states_4_actions_full_training
[0, 1000, 2, 5268, 15, False, 0.9999], # simple_q_learner_1000_states_full_training
[0, 125, 2, 512, 100, False, 0.9999], # simple_q_learner_100_epochs
[0, 125, 11, 512, 10, False, 0.9999], # simple_q_learner_11_actions
[0, 125, 2, 512, 4, False, 0.999], # simple_q_learner_fast_learner
[0, 1000, 2, 512, 4, False, 0.999], # simple_q_learner_fast_learner_1000_states
[0, 125, 11, 512, 4, False, 0.999], # simple_q_learner_fast_learner_11_actions
[0, 125, 3, 512, 4, False, 0.999], # simple_q_learner_fast_learner_3_actions
[0, 125, 2, 5268, 4, False, 0.999], # simple_q_learner_fast_learner_full_training
[0, 125, 2, 5268, 15, False, 0.9999], # simple_q_learner_full_training
[20, 1000, 2, 5268, 7, False, 0.9999], # dyna_q_1000_states_full_training
[20, 125, 2, 512, 4, False, 0.9999], # dyna_q_learner
[20, 125, 2, 512, 4, True, 0.9999], # dyna_q_with_predictor
[20, 125, 2, 5268, 4, True, 0.9999], # dyna_q_with_predictor_full_training
[1, 125, 2, 5268, 4, True, 0.9999], # dyna_q_with_predictor_full_training_dyna1
])
experiments_df = pd.DataFrame(feat_data, columns=features, index=names)
experiments_df.index.name = 'nb_name'
train_res_data = {
'simple_q_learner': {'sharpe': 1.9858481612185834, 'cum_ret': 0.38359700000000174, 'epoch_time': 18.330891609191895},
'simple_q_learner_1000_states': {'sharpe': 3.4470302925746776, 'cum_ret': 0.7292610000000004, 'epoch_time': 18.28188133239746},
'simple_q_learner_1000_states_4_actions_full_training': {'sharpe': 2.2430093688893264, 'cum_ret': 30.14936200000002, 'epoch_time': 157.69741320610046},
'simple_q_learner_1000_states_full_training': {'sharpe': 2.366638028444387, 'cum_ret': 79.61800199999992, 'epoch_time': 159.31651139259338},
'simple_q_learner_100_epochs': {'sharpe': 4.093353629096188, 'cum_ret': 0.6627280000000009, 'epoch_time': 9.004882335662842},
'simple_q_learner_11_actions': {'sharpe': 1.5440407782808305, 'cum_ret': 0.2412700000000001, 'epoch_time': 11.08903431892395},
'simple_q_learner_fast_learner': {'sharpe': 2.8787265519379908, 'cum_ret':0.5468269999999986, 'epoch_time': 18.931288242340088},
'simple_q_learner_fast_learner_1000_states': {'sharpe': 2.031446601959524, 'cum_ret': 0.3971230000000021, 'epoch_time': 19.006957530975342},
'simple_q_learner_fast_learner_11_actions': {'sharpe': 3.241438316121647, 'cum_ret': 0.541966, 'epoch_time': 18.913504123687744},
'simple_q_learner_fast_learner_3_actions': {'sharpe': 2.9448069674427555, 'cum_ret': 0.4873689999999995, 'epoch_time': 18.46741485595703},
'simple_q_learner_fast_learner_full_training': {'sharpe': 1.0444534903132408, 'cum_ret': 0.7844770000000019, 'epoch_time': 143.5039553642273},
'simple_q_full_training': {'sharpe': 1.2592450659232495, 'cum_ret': 1.7391450000000006, 'epoch_time': 115.70198798179626},
'dyna_q_1000_states_full_training': {'sharpe': 2.2964510954840325, 'cum_ret': 94.75696199999993, 'epoch_time': 242.88240551948547},
'dyna_q_learner': {'sharpe': 3.706435588713091, 'cum_ret': 0.4938250000000006
, 'epoch_time': 18.87182092666626},
'dyna_q_with_predictor': {'sharpe': 3.2884867210125845, 'cum_ret': 0.5397989999999993, 'epoch_time': 458.8401937484741},
'dyna_q_with_predictor_full_training': {'sharpe': 1.0037137587999854, 'cum_ret': 2.565081999999997, 'epoch_time': 7850.391056537628},
'dyna_q_with_predictor_full_training_dyna1': {'sharpe': 0.48228187419119906, 'cum_ret': 0.1737430000000002, 'epoch_time': 730.5918335914612},
}
train_res_data_df = pd.DataFrame(train_res_data).T
test_res_data_no_learning = {
'simple_q_learner': {'sharpe': 0.3664203166030617, 'cum_ret': 0.06372499999999937, 'epoch_time': 17.75287628173828},
'simple_q_learner_1000_states': {'sharpe': -0.013747768227987086, 'cum_ret': -0.013047000000000142, 'epoch_time': 17.661759853363037},
'simple_q_learner_1000_states_4_actions_full_training': {'sharpe': 0.9400492987950515, 'cum_ret': 0.10791900000000054, 'epoch_time': 13.83948016166687},
'simple_q_learner_1000_states_full_training': {'sharpe': 1.4827065747174577, 'cum_ret': 0.22123900000000085, 'epoch_time': 9.844955205917358},
'simple_q_learner_100_epochs': {'sharpe': 0.6420028402682839, 'cum_ret': 0.10032399999999986, 'epoch_time': 9.116246461868286},
'simple_q_learner_11_actions': {'sharpe': 0.15616450321809833, 'cum_ret': 0.019991000000000758, 'epoch_time': 10.187344551086426},
'simple_q_learner_fast_learner': {'sharpe': 0.9643510680410812, 'cum_ret': 0.18794100000000125, 'epoch_time': 18.13912320137024},
'simple_q_learner_fast_learner_1000_states': {'sharpe': 0.8228017709095453, 'cum_ret': 0.16162700000000063, 'epoch_time': 19.452654361724854},
'simple_q_learner_fast_learner_11_actions': {'sharpe': 0.8238261816524384, 'cum_ret': 0.12766000000000033, 'epoch_time': 18.901001930236816},
'simple_q_learner_fast_learner_3_actions': {'sharpe': 0.6332862559879147, 'cum_ret': 0.08036399999999966, 'epoch_time': 19.221533060073853},
'simple_q_learner_fast_learner_full_training': {'sharpe': 1.2605807833904492, 'cum_ret': 0.056606000000000156, 'epoch_time': 11.412826538085938},
'simple_q_full_training': {'sharpe': -0.2562905901467118, 'cum_ret': -0.027945999999999693, 'epoch_time': 8.009900569915771},
'dyna_q_1000_states_full_training': {'sharpe': 0.4267994866360769, 'cum_ret': 0.0652820000000005, 'epoch_time': 14.224964618682861},
'dyna_q_learner': {'sharpe': 0.5191712068491942, 'cum_ret': 0.07307299999999883, 'epoch_time': 16.431984901428223},
'dyna_q_with_predictor': {'sharpe': 0.7435489843809434, 'cum_ret': 0.10403399999999974, 'epoch_time': 6.692898988723755},
'dyna_q_with_predictor_full_training': {'sharpe': -0.33503797163532956, 'cum_ret': -0.029740999999999795, 'epoch_time': 8.51533818244934},
'dyna_q_with_predictor_full_training_dyna1': {'sharpe': 0.20288841658633258, 'cum_ret': 0.008380000000000276, 'epoch_time': 10.236766338348389},
}
test_res_data_no_learning_df = pd.DataFrame(test_res_data_no_learning).T
test_res_data_learning = {
'simple_q_learner': {'sharpe': 0.9735950444291429, 'cum_ret': 0.1953619999999998, 'epoch_time': 18.097697019577026},
'simple_q_learner_1000_states': {'sharpe': -0.0867440896667206, 'cum_ret': -0.027372000000001173, 'epoch_time': 17.762672901153564},
'simple_q_learner_1000_states_4_actions_full_training': {'sharpe': 1.109613523501088, 'cum_ret': 0.12868000000000057, 'epoch_time': 9.899595499038696},
'simple_q_learner_1000_states_full_training': {'sharpe': 1.5176752934460862, 'cum_ret': 0.2069550000000011, 'epoch_time': 9.233611106872559},
'simple_q_learner_100_epochs': {'sharpe': 0.09274627213069256, 'cum_ret': 0.008058000000000565, 'epoch_time': 8.653764009475708},
'simple_q_learner_11_actions': {'sharpe': 0.4691456599751897, 'cum_ret': 0.07124699999999917, 'epoch_time': 10.827114582061768},
'simple_q_learner_fast_learner': {'sharpe': 0.6020182964860242, 'cum_ret': 0.09249299999999816, 'epoch_time': 17.882429122924805},
'simple_q_learner_fast_learner_1000_states': {'sharpe': 0.17618139275375405, 'cum_ret': 0.02545300000000017, 'epoch_time': 15.724592685699463},
'simple_q_learner_fast_learner_11_actions': {'sharpe': 0.9608337022400049, 'cum_ret': 0.1406880000000006, 'epoch_time': 17.67305564880371},
'simple_q_learner_fast_learner_3_actions': {'sharpe': 0.3254406127664859, 'cum_ret': 0.04086700000000043, 'epoch_time': 18.100637197494507},
'simple_q_learner_fast_learner_full_training': {'sharpe': 1.2605807833904492, 'cum_ret': 0.056606000000000156, 'epoch_time': 12.214732885360718},
'simple_q_full_training': {'sharpe': 0.3139835605580342, 'cum_ret': 0.02497299999999969, 'epoch_time': 7.958802700042725},
'dyna_q_1000_states_full_training': {'sharpe': 0.48863969848043476, 'cum_ret': 0.06846099999999988, 'epoch_time': 18.820592880249023},
'dyna_q_learner': {'sharpe': 0.0700928915599047, 'cum_ret': 0.004358999999999114, 'epoch_time': 18.085463523864746},
'dyna_q_with_predictor': {'sharpe': 0.6954014537549168, 'cum_ret': 0.09154599999999946, 'epoch_time': 338.36568880081177},
'dyna_q_with_predictor_full_training': {'sharpe': -0.8531759696425502, 'cum_ret': -0.07708900000000052, 'epoch_time': 375.830899477005},
'dyna_q_with_predictor_full_training_dyna1': {'sharpe': -0.15635735184097058, 'cum_ret': -0.006745999999999919, 'epoch_time': 38.24271035194397},
}
test_res_data_learning_df = pd.DataFrame(test_res_data_learning).T
train_benchmark_data = {
'simple_q_learner': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
'simple_q_learner_1000_states': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
'simple_q_learner_1000_states_4_actions_full_training': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
'simple_q_learner_1000_states_full_training': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
'simple_q_learner_100_epochs': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
'simple_q_learner_11_actions': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
'simple_q_learner_fast_learner': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
'simple_q_learner_fast_learner_1000_states': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
'simple_q_learner_fast_learner_11_actions': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
'simple_q_learner_fast_learner_3_actions': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
'simple_q_learner_fast_learner_full_training': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
'simple_q_full_training': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
'dyna_q_1000_states_full_training': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
'dyna_q_learner': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
'dyna_q_with_predictor': {'sharpe_bench': 1.601691549431671, 'cum_ret_bench': 0.4244923418116293},
'dyna_q_with_predictor_full_training': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
'dyna_q_with_predictor_full_training_dyna1': {'sharpe_bench': 0.4566770027925799, 'cum_ret_bench': 3.304502617801047},
}
train_benchmark_data_df = pd.DataFrame(train_benchmark_data).T
test_benchmark_data = {
'simple_q_learner': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'simple_q_learner_1000_states': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'simple_q_learner_1000_states_4_actions_full_training': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'simple_q_learner_1000_states_full_training': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'simple_q_learner_100_epochs': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'simple_q_learner_11_actions': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'simple_q_learner_fast_learner': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'simple_q_learner_fast_learner_1000_states': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'simple_q_learner_fast_learner_11_actions': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'simple_q_learner_fast_learner_3_actions': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'simple_q_learner_fast_learner_full_training': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'simple_q_full_training': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'dyna_q_1000_states_full_training': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'dyna_q_learner': {'sharpe_bench': 0.44271542660031676, 'cum_ret_bench': 0.1070225832012679},
'dyna_q_with_predictor': {'sharpe_bench': 0.2930367522823553, 'cum_ret_bench': 0.05002151977428149},
'dyna_q_with_predictor_full_training': {'sharpe_bench': 0.2930367522823553, 'cum_ret_bench': 0.05002151977428149},
'dyna_q_with_predictor_full_training_dyna1': {'sharpe_bench': 0.3772011734533203, 'cum_ret_bench': 0.07288030223327424},
}
test_benchmark_data_df = pd.DataFrame(test_benchmark_data).T
In [7]:
print(experiments_df.shape)
experiments_df
Out[7]:
In [8]:
experiments_df.to_csv('../../data/experiments_df.csv')
In [10]:
training_res_df = train_res_data_df.join(train_benchmark_data_df)
training_res_df.index.name = 'nb_name'
training_res_df['sharpe_increase'] = training_res_df['sharpe'] - training_res_df['sharpe_bench']
training_res_df['cum_ret_increase'] = training_res_df['cum_ret'] - training_res_df['cum_ret_bench']
print(training_res_df.shape)
training_res_df
Out[10]:
In [11]:
training_res_df.to_csv('../../data/training_res_df.csv')
In [12]:
test_no_learn_res_df = test_res_data_no_learning_df.join(test_benchmark_data_df)
test_no_learn_res_df.index.name = 'nb_name'
test_no_learn_res_df['sharpe_increase'] = test_no_learn_res_df['sharpe'] - test_no_learn_res_df['sharpe_bench']
test_no_learn_res_df['cum_ret_increase'] = test_no_learn_res_df['cum_ret'] - test_no_learn_res_df['cum_ret_bench']
print(test_no_learn_res_df.shape)
test_no_learn_res_df
Out[12]:
In [13]:
test_no_learn_res_df.to_csv('../../data/test_no_learn_res_df.csv')
In [15]:
test_learn_res_df = test_res_data_learning_df.join(test_benchmark_data_df)
test_learn_res_df.index.name = 'nb_name'
test_learn_res_df['sharpe_increase'] = test_learn_res_df['sharpe'] - test_learn_res_df['sharpe_bench']
test_learn_res_df['cum_ret_increase'] = test_learn_res_df['cum_ret'] - test_learn_res_df['cum_ret_bench']
print(test_learn_res_df.shape)
test_learn_res_df
Out[15]:
In [16]:
test_learn_res_df.to_csv('../../data/test_learn_res_df.csv')
In [17]:
SHARPE_Q = 'sharpe_increase'
sharpe_q_df = pd.DataFrame(training_res_df[SHARPE_Q]).rename(columns={SHARPE_Q:'sharpe_i_train'})
sharpe_q_df = sharpe_q_df.join(test_no_learn_res_df[SHARPE_Q].rename('sharpe_i_test_no_learn'))
sharpe_q_df = sharpe_q_df.join(test_learn_res_df[SHARPE_Q].rename('sharpe_i_test_learn'))
print(sharpe_q_df.shape)
sharpe_q_df
Out[17]:
In [18]:
best_agent_name = 'simple_q_learner_1000_states_full_training'
In [19]:
pd.DataFrame(experiments_df.loc[best_agent_name]).T
Out[19]:
In [20]:
indexes = ['training', 'test_no_learn', 'test_learn']
best_agent_df = pd.concat([
training_res_df.loc[best_agent_name],
test_no_learn_res_df.loc[best_agent_name],
test_learn_res_df.loc[best_agent_name],
], axis=1).T
best_agent_df.index = indexes
best_agent_df
Out[20]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: