In [ ]:
import gc
gc.collect()
import sys
sys.path.insert(0, '../')
import logging
logging.basicConfig(level=logging.ERROR)
import pandas as pd
import numpy as np
np.random.seed(42)
from datetime import datetime, timedelta
from time import time
from cryptotrader.exchange_api.poloniex import Poloniex
from cryptotrader.envs.trading import BacktestDataFeed, BacktestEnvironment
from cryptotrader.envs.utils import make_balance, convert_to
from cryptotrader.agents import cn_agents
from cryptotrader.utils import array_normalize
from cryptotrader.agents.apriori import TCOTrader
import chainer as cn
from chainerrl import misc
from chainerrl.optimizers.nonbias_weight_decay import NonbiasWeightDecay
from chainerrl.optimizers import rmsprop_async
from chainerrl import experiments
from chainerrl.agents import a3c
from chainerrl.experiments.hooks import LinearInterpolationHook
from cryptotrader.agents.cn_agents import A3CEIIE, phi, PrintProgress, PortfolioVector, ProcessObs, batch_states, VisionModel, EIIE
import matplotlib.pyplot as plt
from bokeh.io import output_notebook
from jupyterthemes import jtplot
output_notebook()
jtplot.style()
%matplotlib inline
# %load_ext line_profiler
In [ ]:
# Simulation Params
test_name = 'EIIE_PTP_agent'
obs_steps = 50 # Observation steps, number of candles required by the agent for calculations
period = 120 # Observation period in minutes, also trading frequency
pairs = ["USDT_BTC", "USDT_ETH", "USDT_LTC", "USDT_XRP", "USDT_XMR", "USDT_ETC", "USDT_ZEC", "USDT_DASH"] # Universe, some survivor bias here...
fiat_symbol = 'USDT' # Quote symbol
init_funds = make_balance(crypto=0.0, fiat=100.0, pairs=pairs) # Initial equally distributed portfolio
data_dir = './data' # Data directory for offline testing
In [ ]:
papi = Poloniex()
def make_env(process_idx, test):
tapi = BacktestDataFeed(papi, period, pairs=pairs, balance=init_funds, load_dir=data_dir)
tapi.load_data('/train')
# Environment setup
env = BacktestEnvironment(period, obs_steps, tapi, fiat_symbol, test_name)
obs = env.reset();
return env
In [ ]:
## Environment setup
# Data feed setup
papi = Poloniex()
tapi = BacktestDataFeed(papi, period, pairs=pairs, balance=init_funds, load_dir=data_dir)
# Download new data from the exchange
# tapi.download_data(end=datetime.timestamp(datetime.utcnow() - timedelta(days=100)),
# start=datetime.timestamp(datetime.utcnow() - timedelta(days=300)))
# # And save it to disk, if you want to
# tapi.save_data(data_dir + '/train')
# Or load data from disk
tapi.load_data('/train')
# Environment setup
env = BacktestEnvironment(period, obs_steps, tapi, fiat_symbol, test_name)
env.add_pairs(pairs)
env.fiat = fiat_symbol
obs = env.reset();
# Setup eval ent
# Or load data from disk
tapi = BacktestDataFeed(papi, period, pairs=pairs, balance=init_funds, load_dir=data_dir)
tapi.load_data('/eval')
# Environment setup
eval_env = BacktestEnvironment(period, obs_steps, tapi, fiat_symbol, test_name)
eval_env.add_pairs(pairs)
eval_env.fiat = fiat_symbol
eval_env.reset();
In [ ]:
# NN params
timesteps = obs.shape[0] - 1
n_filters_in = 8
n_filters_out = 64
processes = 8
seed = 42
outdir = './save'
# Training params
t_max = 8 # Timesteps before update main model
max_episode_len = 4 # Max timesteps per episode
beta = (1e-3, 0.) # entropy regularization weight for policy
reward_scale_factor = 1. # reward scale factor
gamma = 0.99 # discount factor
alpha = 0.99 # Exponential decay rate of the second order moment for rmsprop optimizer
rmsprop_epsilon = 1e-7 # fuzz factor, for numerical estability
lr = (1e-3, 1e-5) # learning rate
weight_decay = 1e-8 # l2 regularization coef
grad_noise = 1e-7 # gradient gaussian noise, improve learning in deep neuronetworks
clip_grad = 1. # clip gradient norm
steps = 3e2 # Training steps
eval_interval = None
eval_n_runs = None
profile = False
render = False
demo= False
load = False
# load = outdir + "/%.1f_finish" % (steps)
# logging.getLogger().setLevel(logging.DEBUG)
In [ ]:
model = cn_agents.A3CEIIE(timesteps, len(env.symbols), n_filters_in, n_filters_out)#.to_gpu(0)
opt = rmsprop_async.RMSpropAsync(lr=lr[0], eps=rmsprop_epsilon, alpha=alpha)
opt.setup(model)
opt.add_hook(cn.optimizer.GradientClipping(clip_grad))
# opt.add_hook(optimizer.GradientNoise(grad_noise))
opt.add_hook(cn.optimizer.WeightDecay(weight_decay))
agent = a3c.A3C(model,
opt,
t_max=t_max,
gamma=gamma,
beta=beta[0],
phi=phi,
normalize_grad_by_t_max=True,
act_deterministically=False,
v_loss_coef=1.0
)
In [ ]:
load = False
# load = outdir + "/%.1f_finish" % (steps)
if load:
agent.load(load)
print("Model loaded from %s" % (load))
# else:
pp = PrintProgress(time())
def lr_setter(env, agent, value):
agent.optimizer.lr = value
def beta_setter(env, agent, value):
agent.beta = value
lr_decay = LinearInterpolationHook(steps, lr[0], lr[1], lr_setter)
beta_decay = LinearInterpolationHook(steps / 3, beta[0], beta[1], beta_setter)
try:
with np.errstate(divide='ignore'):
agent = experiments.train_agent_async(
agent=agent,
outdir=outdir,
processes=processes,
make_env=make_env,
profile=profile,
steps=steps,
eval_n_runs=eval_n_runs,
eval_interval=eval_interval,
max_episode_len=max_episode_len,
global_step_hooks=[pp, lr_decay, beta_decay]
)
except KeyboardInterrupt:
#load = outdir + 0/"%.1f_finish" % (agent.t)
pass
In [ ]:
load = outdir + "/%.1f_finish" % (steps)
if load:
agent.load(load)
print("Model loaded from %s" % (load))
agent.act_deterministically = False
eval_stats = experiments.eval_performance(
env=eval_env,
agent=agent,
n_runs=1,
max_episode_len=eval_env.data_length)
print('mean: {} median: {} stdev {}'.format(eval_stats['mean'], eval_stats['median'], eval_stats['stdev']))
eval_env.plot_results();
In [ ]: