In [ ]:
from logbook import INFO, WARNING, DEBUG
import warnings
warnings.filterwarnings("ignore") # suppress h5py deprecation warning
import numpy as np
import os
import backtrader as bt
from btgym.research.casual_conv.strategy import CasualConvStrategyMulti
from btgym.research.casual_conv.networks import conv_1d_casual_attention_encoder
from btgym.algorithms.policy import StackedLstmPolicy
from btgym.algorithms.launcher.base import Launcher
from btgym.algorithms.aac import A3C
from btgym import PortfolioEnv
from btgym.datafeed.casual import BTgymCasualDataDomain
from btgym.datafeed.multi import BTgymMultiData
This setup closely relates to continuous portfolio optimisation problem definition; it differs from discrete actions setup in:
1. base broker actions are real numbers: `a[i] in [0,1], 0<=i<=K, SUM{a[i]} = 1` for `K` risky assets added;
each action is a market target order to adjust portfolio to get share `a[i]*100%` for `i`-th asset;
2. entire single-step broker action is dictionary of form:
`{cash_name: a[0], asset_name_1: a[1], ..., asset_name_K: a[K]}`;
3. short selling is not permitted;
K+1
dim vector.0. Problem definition.
Again consider setup with one riskless asset acting as broker account cash and K risky assets.
For every risky asset there exists track of historic price records referred as `data-line`.
Apart from assets data lines there possibly exists number of exogenous data lines.
It is supposed for this setup that:
i. there is no interest rate for base (riskless) asset;
ii. short selling is not permitted;
iii. transaction costs are modelled via broker commission;
iv. 'market liquidity' and 'capital impact' assumptions are met;
v. time indexes match for all data lines provided;
1. Assets and datalines.
This environment expects Dataset to be instance of `btgym.datafeed.multi.BTgymMultiData`, which sets
number, specifications and sampling synchronisation for historic data for all assets and data lines.
Namely, one should define data_config dictionary of `data lines` and list of `assets`.
`data_config` specifies all data sources used by strategy,
while `assets` defines subset of `data lines` which is supposed to hold historic data
for risky portfolio assets.
Internally every episodic asset data is converted to single bt.feed and added to environment strategy
as separate named data_line (see backtrader docs for extensive explanation of data_lines concept).
Every non-asset data line as also added as bt.feed with difference that it is not 'tradable' i.e.
it is impossible to issue trade orders on such line.
Strategy is expected to properly handle all received data-lines.
Example::
1. Four data streams added via Dataset.data_config,
portfolio consists of four assets, added via strategy_params, cash is EUR:
data_config = {
'usd': {'filename': '.../DAT_ASCII_EURUSD_M1_2017.csv'},
'gbp': {'filename': '.../DAT_ASCII_EURGBP_M1_2017.csv'},
'jpy': {'filename': '.../DAT_ASCII_EURJPY_M1_2017.csv'},
'chf': {'filename': '.../DAT_ASCII_EURCHF_M1_2017.csv'},
}
cash_name = 'eur'
assets_names = ['usd', 'gbp', 'jpy', 'chf']
2. Three streams added, only two of them form portfolio; DXY stream is `decision-making` only:
data_config = {
'usd': {'filename': '.../DAT_ASCII_EURUSD_M1_2017.csv'},
'gbp': {'filename': '.../DAT_ASCII_EURGBP_M1_2017.csv'},
'DXY': {'filename': '.../DAT_ASCII_DXY_M1_2017.csv'},
}
cash_name = 'eur'
assets_names = ['usd', 'gbp']
2. btgym.spaces.ActionDictSpace and order execution.
ActionDictSpace is an extension of OpenAI Gym DictSpace providing domain-specific functionality.
Strategy expects to receive separate action for every K+1 asset in form of dictionary:
`{cash_name: a[0], asset_name_1: a[1], ..., asset_name_K: a[K]}` for K risky assets added,
where base actions are real numbers: `a[i] in [0,1], 0<=i<=K, SUM{a[i]} = 1`. Whole action should be
interpreted as order to adjust portfolio to have share `a[i] * 100% for i-th asset`.
Therefore, base actions are gym.spaces.Box and for K assets environment action space will be a shallow
DictSpace of K+1 continuous spaces: `{cash_name: gym.spaces.Box(low=0, high=1),
asset_name_1: gym.spaces.Box(low=0, high=1), ..., asset_name_K: gym.spaces.Box(low=0, high=1)}`
3. TODO: refine order execution control,
see: https://community.backtrader.com/topic/152/multi-asset-ranking-and-rebalancing/2?page=1
In [ ]:
engine = bt.Cerebro()
num_features = 16
engine.addstrategy(
CasualConvStrategyMulti,
cash_name='EUR',
start_cash=2000,
commission=0.0001,
leverage=10.0,
asset_names={'USD', 'CHF', 'GBP', 'JPY'},
drawdown_call=10, # max % to loose, in percent of initial cash
target_call=10, # max % to win, same
skip_frame=10,
gamma=0.99,
state_ext_scale = {
'USD': np.linspace(1, 2, num=num_features),
'GBP': np.linspace(1, 2, num=num_features),
'CHF': np.linspace(1, 2, num=num_features),
'JPY': np.linspace(5e-3, 1e-2, num=num_features),
},
cwt_signal_scale=4e3,
cwt_lower_bound=4.0, # CWT scales TODO: 8.? (was : 3.)
cwt_upper_bound=90.0,
reward_scale=7,
)
data_config = {
'USD': {'filename': './data/DAT_ASCII_EURUSD_M1_2017.csv'},
'GBP': {'filename': './data/DAT_ASCII_EURGBP_M1_2017.csv'},
'JPY': {'filename': './data/DAT_ASCII_EURJPY_M1_2017.csv'},
'CHF': {'filename': './data/DAT_ASCII_EURCHF_M1_2017.csv'},
}
dataset = BTgymMultiData(
data_class_ref=BTgymCasualDataDomain,
data_config=data_config,
trial_params=dict(
start_weekdays={0, 1, 2, 3, 4, 5, 6},
sample_duration={'days': 30, 'hours': 0, 'minutes': 0},
start_00=False,
time_gap={'days': 15, 'hours': 0},
test_period={'days': 7, 'hours': 0, 'minutes': 0},
expanding=True,
),
episode_params=dict(
start_weekdays={0, 1, 2, 3, 4, 5, 6},
sample_duration={'days': 2, 'hours': 23, 'minutes': 55},
start_00=False,
time_gap={'days': 2, 'hours': 15},
),
frozen_time_split={'year': 2017, 'month': 3, 'day': 1},
)
#########################
env_config = dict(
class_ref=PortfolioEnv,
kwargs=dict(
dataset=dataset,
engine=engine,
render_modes=['episode'],
render_state_as_image=True,
render_size_episode=(12,16),
render_size_human=(9, 4),
render_size_state=(11, 3),
render_dpi=75,
port=5000,
data_port=4999,
connect_timeout=90,
verbose=0,
)
)
cluster_config = dict(
host='127.0.0.1',
port=12230,
num_workers=4, # Set according CPU's available or so
num_ps=1,
num_envs=1,
log_dir=os.path.expanduser('~/tmp/multi_continiuous'),
)
policy_config = dict(
class_ref=StackedLstmPolicy,
kwargs={
'action_dp_alpha': 200, # Dirichlet process sampling inverse variance
'lstm_layers': (256, 256),
'dropout_keep_prob': 1.0,
'encode_internal_state': False,
'conv_1d_num_filters': 64,
'share_encoder_params': False,
'state_encoder_class_ref': conv_1d_casual_attention_encoder,
}
)
trainer_config = dict(
class_ref=A3C,
kwargs=dict(
opt_learn_rate=1e-4,
opt_end_learn_rate=1e-5,
opt_decay_steps=50*10**6,
model_gamma=0.99,
model_gae_lambda=1.0,
model_beta=0.001, # entropy reg
rollout_length=20,
time_flat=True,
model_summary_freq=10,
episode_summary_freq=1,
env_render_freq=5,
)
)
In [ ]:
env = PortfolioEnv(**env_config['kwargs'])
In [ ]:
o = env.reset()
In [ ]:
# Should manually normalize actions as we see it as sum of assets shares (or prob. distribution),
# but do not pose this constraint to action_space:
a = env.action_space.sample()
norm = np.asarray(list(a.values())).sum()
a_norm = {k: v/norm for k, v in a.items()}
for k, v in a_norm.items(): print('{}: {:1.2f}'.format(k, v[0]))
In [ ]:
# NOTE: to see actual order execution feedback via `info` part one need to set strategy param: skip_frame=2,
o, r, d, i = env.step(a_norm)
for k, v in i[0].items(): print('{}: {}'.format(k, v))
In [ ]:
env.close()
In [ ]:
launcher = Launcher(
cluster_config=cluster_config,
env_config=env_config,
trainer_config=trainer_config,
policy_config=policy_config,
test_mode=False,
max_env_steps=100*10**6,
root_random_seed=0,
purge_previous=1, # ask to override previously saved model and logs
verbose=0
)
# Train it:
launcher.run()
In [ ]: