In [ ]:
import gc
gc.collect()

import sys
sys.path.insert(0, '../')
import logging
logging.basicConfig(level=logging.ERROR)

import pandas as pd
import numpy as np
np.random.seed(42)
from datetime import datetime, timedelta
from time import time

from cryptotrader.exchange_api.poloniex import Poloniex
from cryptotrader.envs.trading import BacktestDataFeed, BacktestEnvironment
from cryptotrader.envs.utils import make_balance, convert_to
from cryptotrader.agents import cn_agents
from cryptotrader.utils import array_normalize
from cryptotrader.agents.apriori import TCOTrader

import chainer as cn
from chainerrl import misc
from chainerrl.optimizers.nonbias_weight_decay import NonbiasWeightDecay
from chainerrl.optimizers import rmsprop_async
from chainerrl import experiments
from chainerrl.agents import a3c
from chainerrl.experiments.hooks import LinearInterpolationHook

from cryptotrader.agents.cn_agents import A3CEIIE, phi, PrintProgress, PortfolioVector, ProcessObs, batch_states, VisionModel, EIIE

import matplotlib.pyplot as plt
from bokeh.io import output_notebook
from jupyterthemes import jtplot
output_notebook()
jtplot.style()
%matplotlib inline
# %load_ext line_profiler

In [ ]:
# Simulation Params
test_name = 'EIIE_PTP_agent'
obs_steps = 50 # Observation steps, number of candles required by the agent for calculations
period = 120 # Observation period in minutes, also trading frequency
pairs = ["USDT_BTC", "USDT_ETH", "USDT_LTC", "USDT_XRP", "USDT_XMR", "USDT_ETC", "USDT_ZEC", "USDT_DASH"] # Universe, some survivor bias here...
fiat_symbol = 'USDT' # Quote symbol
init_funds = make_balance(crypto=0.0, fiat=100.0, pairs=pairs) # Initial equally distributed portfolio
data_dir = './data' # Data directory for offline testing

In [ ]:
papi = Poloniex()
def make_env(process_idx, test):
    tapi = BacktestDataFeed(papi, period, pairs=pairs, balance=init_funds, load_dir=data_dir)
    tapi.load_data('/train')

    # Environment setup
    env = BacktestEnvironment(period, obs_steps, tapi, fiat_symbol, test_name)
    obs = env.reset();
    return env

In [ ]:
## Environment setup
# Data feed setup
papi = Poloniex()
tapi = BacktestDataFeed(papi, period, pairs=pairs, balance=init_funds, load_dir=data_dir)

# Download new data from the exchange
# tapi.download_data(end=datetime.timestamp(datetime.utcnow() - timedelta(days=100)),
#                        start=datetime.timestamp(datetime.utcnow() - timedelta(days=300)))

# # And save it to disk, if you want to
# tapi.save_data(data_dir + '/train')

# Or load data from disk
tapi.load_data('/train')

# Environment setup
env = BacktestEnvironment(period, obs_steps, tapi, fiat_symbol, test_name)
env.add_pairs(pairs)
env.fiat = fiat_symbol
obs = env.reset();

# Setup eval ent
# Or load data from disk
tapi = BacktestDataFeed(papi, period, pairs=pairs, balance=init_funds, load_dir=data_dir)
tapi.load_data('/eval')

# Environment setup
eval_env = BacktestEnvironment(period, obs_steps, tapi, fiat_symbol, test_name)
eval_env.add_pairs(pairs)
eval_env.fiat = fiat_symbol
eval_env.reset();

In [ ]:
# NN params
timesteps = obs.shape[0] - 1
n_filters_in = 8
n_filters_out = 64

processes = 8
seed = 42
outdir = './save'

# Training params
t_max = 8 # Timesteps before update main model
max_episode_len = 4 # Max timesteps per episode
beta = (1e-3, 0.) # entropy regularization weight for policy
reward_scale_factor = 1. # reward scale factor
gamma = 0.99 # discount factor
alpha = 0.99 # Exponential decay rate of the second order moment for rmsprop optimizer
rmsprop_epsilon = 1e-7 # fuzz factor, for numerical estability
lr = (1e-3, 1e-5) # learning rate
weight_decay = 1e-8 # l2 regularization coef
grad_noise = 1e-7 # gradient gaussian noise, improve learning in deep neuronetworks
clip_grad = 1. # clip gradient norm
steps = 3e2 # Training steps
eval_interval = None
eval_n_runs = None

profile = False
render = False
demo= False
load = False
# load = outdir + "/%.1f_finish" % (steps)
# logging.getLogger().setLevel(logging.DEBUG)

In [ ]:
model = cn_agents.A3CEIIE(timesteps, len(env.symbols), n_filters_in, n_filters_out)#.to_gpu(0)

opt = rmsprop_async.RMSpropAsync(lr=lr[0], eps=rmsprop_epsilon, alpha=alpha)
opt.setup(model)
opt.add_hook(cn.optimizer.GradientClipping(clip_grad))
# opt.add_hook(optimizer.GradientNoise(grad_noise))
opt.add_hook(cn.optimizer.WeightDecay(weight_decay))

agent = a3c.A3C(model,
                opt,
                t_max=t_max,
                gamma=gamma,
                beta=beta[0],
                phi=phi,
                normalize_grad_by_t_max=True,
                act_deterministically=False,
                v_loss_coef=1.0
                )

In [ ]:
load = False
# load = outdir + "/%.1f_finish" % (steps)
if load:
    agent.load(load)
    print("Model loaded from %s" % (load))

# else:
pp = PrintProgress(time())

def lr_setter(env, agent, value):
    agent.optimizer.lr = value
    
def beta_setter(env, agent, value):
    agent.beta = value

lr_decay = LinearInterpolationHook(steps, lr[0], lr[1], lr_setter)
beta_decay = LinearInterpolationHook(steps / 3, beta[0], beta[1], beta_setter)
    
try:
    with np.errstate(divide='ignore'):
        agent = experiments.train_agent_async(
            agent=agent,
            outdir=outdir,
            processes=processes,
            make_env=make_env,
            profile=profile,
            steps=steps,
            eval_n_runs=eval_n_runs,
            eval_interval=eval_interval,
            max_episode_len=max_episode_len,
            global_step_hooks=[pp, lr_decay, beta_decay]
            )
except KeyboardInterrupt:
    #load = outdir + 0/"%.1f_finish" % (agent.t)
    pass

In [ ]:
load = outdir + "/%.1f_finish" % (steps)
if load:
    agent.load(load)
    print("Model loaded from %s" % (load))

agent.act_deterministically = False
eval_stats = experiments.eval_performance(
    env=eval_env,
    agent=agent,
    n_runs=1,
    max_episode_len=eval_env.data_length)

print('mean: {} median: {} stdev {}'.format(eval_stats['mean'], eval_stats['median'], eval_stats['stdev']))
eval_env.plot_results();

In [ ]: