After playing with different preprocessing/scaling/normalising routines it seems that one simple solution based on std normalisation is quite robust to different levels of price or volatility regimes and requires no manual tuning (contrary to manual scaling setup described here).
See implementation example code here: https://github.com/Kismuz/btgym/blob/master/btgym/research/model_based/strategy.py#L189
In [1]:
import numpy as np
from btgym import BTgymEnv
from gym import spaces
import backtrader as bt
from btgym.research import DevStrat_4_11, DevStrat_4_12
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
In [2]:
class SignalProvider():
def __init__(self, env):
self.env = env
self.done = True
def get_batch(self, batch_size):
batch_ext = []
batch_int = []
batch_r = []
while len(batch_r) < batch_size:
if not self.done:
o, r, self.done, i = env.step(env.action_space.sample())
else:
o = env.reset()
r = 0
self.done = False
batch_ext.append(o['external'])
batch_int.append(o['internal'])
batch_r.append(r)
return batch_ext, batch_int, batch_r
def close(self):
self.env.close()
self.done = True
In [ ]:
# Our Scale-state-before-tanh parameter of interest:
# either a scalar or vector of scaling wieghts, size equal to obs. space num channels:
# here, 6 channels for DevStrat_4_12; 5 for DevStrat_4_11
# Try both to ensure weighted scaling gives much evenly distributed values across channels
# Todo: here weights are linearly decayed. Can do better?
state_ext_scale = np.linspace(3e3, 1e3, num=6)
#state_ext_scale = 3e3
# additional params exist but not used with this example:
state_int_scale = 1.0
reward_scale = 10.0
engine = bt.Cerebro()
engine.addstrategy(
DevStrat_4_12,
drawdown_call=10, # max % to loose, in percent of initial cash
target_call=10, # max % to win, same
skip_frame=10,
gamma=0.99,
reward_scale=reward_scale,
state_ext_scale=state_ext_scale,
#state_int_scale=state_int_scale,
)
env = BTgymEnv(
filename='../examples/data/DAT_ASCII_EURUSD_M1_2016.csv',
#filename='../examples/data/DAT_ASCII_EURUSD_M1_201703_1_10.csv', # ten days
#filename='../examples/data/test_bent_sine_1min_period1500_300_delta0002.csv', # increasing sine freq
#filename='../examples/data/test_sine_1min_period256_delta0002.csv', # simple sine
#state_shape=dict(raw_state=spaces.Box(low=0,high=1,shape=(30,4))),
engine=engine,
skip_frame=10,
port=5050,
data_port=5049,
verbose=0
)
signal_provider = SignalProvider(env)
In [ ]:
# Make it 256 -:- 1024 (takes a while):
batch_size = 256
batch_ext, _, _ = signal_provider.get_batch(batch_size)
signal_provider.close()
# Distribution of values per channel:
distr_ext = np.asarray(batch_ext)
num_channels = distr_ext.shape[-1]
distr_ext = np.reshape(distr_ext[...,0,:], [-1, num_channels])
for i in range(num_channels):
plt.figure(num=i, figsize=(10, 5))
plt.title('External state channel: {}, points: {}'.format(i, distr_ext.shape[0]))
plt.grid(True)
fig = plt.hist(distr_ext[:,i], histtype='stepfilled', bins=500)
In [5]:
signal_provider.close()
In [ ]: