In [1]:
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
from time import time
from sklearn.metrics import r2_score, median_absolute_error
from multiprocessing import Pool
%matplotlib inline
%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)
%load_ext autoreload
%autoreload 2
sys.path.append('../../')
import recommender.simulator as sim
from utils.analysis import value_eval
from recommender.agent import Agent
from functools import partial
In [11]:
NUM_THREADS = 1
LOOKBACK = 252*2 + 28
STARTING_DAYS_AHEAD = 20
POSSIBLE_FRACTIONS = [0.0, 1.0]
DYNA = 20
# Get the data
SYMBOL = 'SPY'
total_data_train_df = pd.read_pickle('../../data/data_train_val_df.pkl').stack(level='feature')
data_train_df = total_data_train_df[SYMBOL].unstack()
total_data_test_df = pd.read_pickle('../../data/data_test_df.pkl').stack(level='feature')
data_test_df = total_data_test_df[SYMBOL].unstack()
if LOOKBACK == -1:
total_data_in_df = total_data_train_df
data_in_df = data_train_df
else:
data_in_df = data_train_df.iloc[-LOOKBACK:]
total_data_in_df = total_data_train_df.loc[data_in_df.index[0]:]
# Create many agents
index = np.arange(NUM_THREADS).tolist()
env, num_states, num_actions = sim.initialize_env(total_data_in_df,
SYMBOL,
starting_days_ahead=STARTING_DAYS_AHEAD,
possible_fractions=POSSIBLE_FRACTIONS)
agents = [Agent(num_states=num_states,
num_actions=num_actions,
random_actions_rate=0.98,
random_actions_decrease=0.999,
dyna_iterations=DYNA,
name='Agent_{}'.format(i)) for i in index]
In [3]:
def show_results(results_list, data_in_df, graph=False):
for values in results_list:
total_value = values.sum(axis=1)
print('Sharpe ratio: {}\nCum. Ret.: {}\nAVG_DRET: {}\nSTD_DRET: {}\nFinal value: {}'.format(*value_eval(pd.DataFrame(total_value))))
print('-'*100)
initial_date = total_value.index[0]
compare_results = data_in_df.loc[initial_date:, 'Close'].copy()
compare_results.name = SYMBOL
compare_results_df = pd.DataFrame(compare_results)
compare_results_df['portfolio'] = total_value
std_comp_df = compare_results_df / compare_results_df.iloc[0]
if graph:
plt.figure()
std_comp_df.plot()
In [4]:
print('Sharpe ratio: {}\nCum. Ret.: {}\nAVG_DRET: {}\nSTD_DRET: {}\nFinal value: {}'.format(*value_eval(pd.DataFrame(data_in_df['Close'].iloc[STARTING_DAYS_AHEAD:]))))
In [5]:
# Simulate (with new envs, each time)
n_epochs = 4
for i in range(n_epochs):
tic = time()
env.reset(STARTING_DAYS_AHEAD)
results_list = sim.simulate_period(total_data_in_df,
SYMBOL,
agents[0],
starting_days_ahead=STARTING_DAYS_AHEAD,
possible_fractions=POSSIBLE_FRACTIONS,
verbose=False,
other_env=env)
toc = time()
print('Epoch: {}'.format(i))
print('Elapsed time: {} seconds.'.format((toc-tic)))
print('Random Actions Rate: {}'.format(agents[0].random_actions_rate))
show_results([results_list], data_in_df)
In [6]:
env.reset(STARTING_DAYS_AHEAD)
results_list = sim.simulate_period(total_data_in_df,
SYMBOL, agents[0],
learn=False,
starting_days_ahead=STARTING_DAYS_AHEAD,
possible_fractions=POSSIBLE_FRACTIONS,
other_env=env)
show_results([results_list], data_in_df, graph=True)
In [12]:
TEST_DAYS_AHEAD = 20
env.set_test_data(total_data_test_df, TEST_DAYS_AHEAD)
tic = time()
results_list = sim.simulate_period(total_data_test_df,
SYMBOL,
agents[0],
learn=False,
starting_days_ahead=TEST_DAYS_AHEAD,
possible_fractions=POSSIBLE_FRACTIONS,
verbose=False,
other_env=env)
toc = time()
print('Epoch: {}'.format(i))
print('Elapsed time: {} seconds.'.format((toc-tic)))
print('Random Actions Rate: {}'.format(agents[0].random_actions_rate))
show_results([results_list], data_test_df, graph=True)
In [13]:
env.set_test_data(total_data_test_df, TEST_DAYS_AHEAD)
tic = time()
results_list = sim.simulate_period(total_data_test_df,
SYMBOL,
agents[0],
learn=True,
starting_days_ahead=TEST_DAYS_AHEAD,
possible_fractions=POSSIBLE_FRACTIONS,
verbose=False,
other_env=env)
toc = time()
print('Epoch: {}'.format(i))
print('Elapsed time: {} seconds.'.format((toc-tic)))
print('Random Actions Rate: {}'.format(agents[0].random_actions_rate))
show_results([results_list], data_test_df, graph=True)
In [9]:
print('Sharpe ratio: {}\nCum. Ret.: {}\nAVG_DRET: {}\nSTD_DRET: {}\nFinal value: {}'.format(*value_eval(pd.DataFrame(data_test_df['Close'].iloc[STARTING_DAYS_AHEAD:]))))
In [10]:
import pickle
with open('../../data/simple_q_learner_fast_learner_full_training.pkl', 'wb') as best_agent:
pickle.dump(agents[0], best_agent)
In [ ]: