9.12.2018: UPDATE ON SIGNAL SCALING:

After playing with different preprocessing/scaling/normalising routines it seems that one simple solution based on std normalisation is quite robust to different levels of price or volatility regimes and requires no manual tuning (contrary to manual scaling setup described here).

See implementation example code here: https://github.com/Kismuz/btgym/blob/master/btgym/research/model_based/strategy.py#L189



In [1]:

    
import numpy as np

from btgym import BTgymEnv
from gym import spaces
import backtrader as bt
from btgym.research import DevStrat_4_11, DevStrat_4_12

%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')









    



/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters



In [2]:

    
class SignalProvider():
    
    def __init__(self, env):
        self.env = env
        self.done = True
    
    def get_batch(self, batch_size):
        batch_ext = []
        batch_int = []
        batch_r = []
        while len(batch_r) < batch_size:
            if not self.done:
                o, r, self.done, i = env.step(env.action_space.sample())
            else:
                o = env.reset()
                r = 0
                self.done = False
            batch_ext.append(o['external'])
            batch_int.append(o['internal'])
            batch_r.append(r)
        return batch_ext, batch_int, batch_r
    
    def close(self):
        self.env.close()
        self.done = True



In [ ]:

    
# Our Scale-state-before-tanh parameter of interest:
# either a scalar or vector of scaling wieghts, size equal to obs. space num channels:
# here, 6 channels for DevStrat_4_12; 5 for DevStrat_4_11

# Try both to ensure weighted scaling gives much evenly distributed values across channels
# Todo: here weights are linearly decayed. Can do better?

state_ext_scale = np.linspace(3e3, 1e3, num=6)
#state_ext_scale = 3e3

# additional params exist but not used with this example:
state_int_scale = 1.0
reward_scale = 10.0

engine = bt.Cerebro()
engine.addstrategy(
    DevStrat_4_12,
    drawdown_call=10, # max % to loose, in percent of initial cash
    target_call=10,  # max % to win, same
    skip_frame=10,
    gamma=0.99,
    reward_scale=reward_scale,
    state_ext_scale=state_ext_scale,
    #state_int_scale=state_int_scale,
)
env = BTgymEnv(
    filename='../examples/data/DAT_ASCII_EURUSD_M1_2016.csv',
    #filename='../examples/data/DAT_ASCII_EURUSD_M1_201703_1_10.csv',  # ten days
    #filename='../examples/data/test_bent_sine_1min_period1500_300_delta0002.csv',  # increasing sine freq
    #filename='../examples/data/test_sine_1min_period256_delta0002.csv',  # simple sine 
    #state_shape=dict(raw_state=spaces.Box(low=0,high=1,shape=(30,4))),
    engine=engine,
    skip_frame=10,
    port=5050,
    data_port=5049,
    verbose=0
) 
signal_provider = SignalProvider(env)









    



/Users/muzikin/anaconda/envs/tensorforce/lib/python3.6/site-packages/matplotlib/__init__.py:1405: UserWarning: 
This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

  warnings.warn(_use_error_msg)



In [ ]:

    
# Make it  256 -:- 1024 (takes a while):
batch_size = 256

batch_ext, _, _ = signal_provider.get_batch(batch_size)
signal_provider.close()


# Distribution of values per channel:
distr_ext = np.asarray(batch_ext)
num_channels = distr_ext.shape[-1]
distr_ext = np.reshape(distr_ext[...,0,:], [-1, num_channels])


for i in range(num_channels):
    plt.figure(num=i, figsize=(10, 5))
    plt.title('External state channel: {}, points: {}'.format(i, distr_ext.shape[0]))
    plt.grid(True)
    fig = plt.hist(distr_ext[:,i], histtype='stepfilled', bins=500)



In [5]:

    
signal_provider.close()

Properly scaled:

Overscaled:

Underscaled:



In [ ]: