In this notebook a simple Q learner will be trained and evaluated. The Q learner recommends when to buy or sell shares of one particular stock, and in which quantity (in fact it determines the desired fraction of shares in the total portfolio value). One initial attempt was made to train the Q-learner with multiple processes, but it was unsuccessful.


In [1]:
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
from time import time
from sklearn.metrics import r2_score, median_absolute_error
from multiprocessing import Pool

%matplotlib inline

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('../../')

import recommender.simulator as sim
from utils.analysis import value_eval
from recommender.agent import Agent
from functools import partial


Populating the interactive namespace from numpy and matplotlib

In [2]:
NUM_THREADS = 1
LOOKBACK = -1  # 252*4 + 28
STARTING_DAYS_AHEAD = 252
POSSIBLE_FRACTIONS = [0.0, 1.0]

# Get the data
SYMBOL = 'SPY'
total_data_train_df = pd.read_pickle('../../data/data_train_val_df.pkl').stack(level='feature')
data_train_df = total_data_train_df[SYMBOL].unstack()
total_data_test_df = pd.read_pickle('../../data/data_test_df.pkl').stack(level='feature')
data_test_df = total_data_test_df[SYMBOL].unstack()
if LOOKBACK == -1:
    total_data_in_df = total_data_train_df
    data_in_df = data_train_df
else:
    data_in_df = data_train_df.iloc[-LOOKBACK:]
    total_data_in_df = total_data_train_df.loc[data_in_df.index[0]:]

# Create many agents
index = np.arange(NUM_THREADS).tolist()
env, num_states, num_actions = sim.initialize_env(total_data_in_df, 
                                                  SYMBOL, 
                                                  starting_days_ahead=STARTING_DAYS_AHEAD,
                                                  possible_fractions=POSSIBLE_FRACTIONS,
                                                  n_levels=10)
agents = [Agent(num_states=num_states, 
                num_actions=num_actions, 
                random_actions_rate=0.98, 
                random_actions_decrease=0.9999,
                dyna_iterations=0,
                name='Agent_{}'.format(i)) for i in index]

In [3]:
def show_results(results_list, data_in_df, graph=False):
    for values in results_list:
        total_value = values.sum(axis=1)
        print('Sharpe ratio: {}\nCum. Ret.: {}\nAVG_DRET: {}\nSTD_DRET: {}\nFinal value: {}'.format(*value_eval(pd.DataFrame(total_value))))
        print('-'*100)
        initial_date = total_value.index[0]
        compare_results = data_in_df.loc[initial_date:, 'Close'].copy()
        compare_results.name = SYMBOL
        compare_results_df = pd.DataFrame(compare_results)
        compare_results_df['portfolio'] = total_value
        std_comp_df = compare_results_df / compare_results_df.iloc[0]
        if graph:
            plt.figure()
            std_comp_df.plot()

Let's show the symbols data, to see how good the recommender has to be.


In [4]:
print('Sharpe ratio: {}\nCum. Ret.: {}\nAVG_DRET: {}\nSTD_DRET: {}\nFinal value: {}'.format(*value_eval(pd.DataFrame(data_in_df['Close'].iloc[STARTING_DAYS_AHEAD:]))))


Sharpe ratio: 0.4566770027925799
Cum. Ret.: 3.304502617801047
AVG_DRET: 0.0003519913231219332
STD_DRET: 0.012235538451970583
Final value: 205.54

In [5]:
# Simulate (with new envs, each time)
n_epochs = 7

for i in range(n_epochs):
    tic = time()
    env.reset(STARTING_DAYS_AHEAD)
    results_list = sim.simulate_period(total_data_in_df, 
                                       SYMBOL,
                                       agents[0],
                                       starting_days_ahead=STARTING_DAYS_AHEAD,
                                       possible_fractions=POSSIBLE_FRACTIONS,
                                       verbose=False,
                                       other_env=env)
    toc = time()
    print('Epoch: {}'.format(i))
    print('Elapsed time: {} seconds.'.format((toc-tic)))
    print('Random Actions Rate: {}'.format(agents[0].random_actions_rate))
    show_results([results_list], data_in_df)


Starting simulation for agent: Agent_0. 5268 days of simulation to go.
Date 2014-12-22 00:00:00 (simulating until 2014-12-31 00:00:00).  Time: 0.2978975772857666s.  Value: 16604.359999999964..Epoch: 0
Elapsed time: 128.22320747375488 seconds.
Random Actions Rate: 0.5787243211477068
Sharpe ratio: 0.2508012760602103
Cum. Ret.: 0.6592509999999963
AVG_DRET: 0.00013022172232749157
STD_DRET: 0.008242405253815461
Final value: 16592.509999999962
----------------------------------------------------------------------------------------------------
Starting simulation for agent: Agent_0. 5268 days of simulation to go.
Date 2014-12-22 00:00:00 (simulating until 2014-12-31 00:00:00).  Time: 0.2765538692474365s.  Value: 69726.39000000006...Epoch: 1
Elapsed time: 132.7022294998169 seconds.
Random Actions Rate: 0.3417569794774213
Sharpe ratio: 0.7829091180956399
Cum. Ret.: 5.945217000000006
AVG_DRET: 0.00040097436053126105
STD_DRET: 0.008130280377824952
Final value: 69452.17000000006
----------------------------------------------------------------------------------------------------
Starting simulation for agent: Agent_0. 5268 days of simulation to go.
Date 2014-12-22 00:00:00 (simulating until 2014-12-31 00:00:00).  Time: 0.39356374740600586s.  Value: 153482.41999999995.Epoch: 2
Elapsed time: 129.54077124595642 seconds.
Random Actions Rate: 0.2018194652505709
Sharpe ratio: 1.114338659989379
Cum. Ret.: 14.287979999999996
AVG_DRET: 0.0005481908130378231
STD_DRET: 0.007809348886749788
Final value: 152879.79999999996
----------------------------------------------------------------------------------------------------
Starting simulation for agent: Agent_0. 5268 days of simulation to go.
Date 2014-12-22 00:00:00 (simulating until 2014-12-31 00:00:00).  Time: 0.2911410331726074s.  Value: 283086.39000000036..Epoch: 3
Elapsed time: 131.97630739212036 seconds.
Random Actions Rate: 0.1191814622668657
Sharpe ratio: 1.496906954091488
Cum. Ret.: 27.34955900000004
AVG_DRET: 0.0006594479741703172
STD_DRET: 0.006993361895224532
Final value: 283495.5900000004
----------------------------------------------------------------------------------------------------
Starting simulation for agent: Agent_0. 5268 days of simulation to go.
Date 2014-12-22 00:00:00 (simulating until 2014-12-31 00:00:00).  Time: 0.28876709938049316s.  Value: 782692.8600000003..Epoch: 4
Elapsed time: 134.58703351020813 seconds.
Random Actions Rate: 0.0703808273916143
Sharpe ratio: 2.046343353408099
Cum. Ret.: 76.95963400000004
AVG_DRET: 0.0008487927548590697
STD_DRET: 0.006584509506433706
Final value: 779596.3400000003
----------------------------------------------------------------------------------------------------
Starting simulation for agent: Agent_0. 5268 days of simulation to go.
Date 2014-12-22 00:00:00 (simulating until 2014-12-31 00:00:00).  Time: 0.28897929191589355s.  Value: 725048.3800000001..Epoch: 5
Elapsed time: 132.39003920555115 seconds.
Random Actions Rate: 0.04156234342247517
Sharpe ratio: 2.097700381304864
Cum. Ret.: 71.94158800000001
AVG_DRET: 0.0008344282626597273
STD_DRET: 0.006314599614692466
Final value: 729415.8800000001
----------------------------------------------------------------------------------------------------
Starting simulation for agent: Agent_0. 5268 days of simulation to go.
Date 2014-12-22 00:00:00 (simulating until 2014-12-31 00:00:00).  Time: 0.29326343536376953s.  Value: 788791.1600000012..Epoch: 6
Elapsed time: 125.35012602806091 seconds.
Random Actions Rate: 0.024544019369877303
Sharpe ratio: 2.2297824386646186
Cum. Ret.: 78.35424100000012
AVG_DRET: 0.0008487527104372262
STD_DRET: 0.006042531928147476
Final value: 793542.4100000012
----------------------------------------------------------------------------------------------------

In [6]:
env.reset(STARTING_DAYS_AHEAD)
results_list = sim.simulate_period(total_data_in_df, 
                                   SYMBOL, agents[0], 
                                   learn=False, 
                                   starting_days_ahead=STARTING_DAYS_AHEAD,
                                   possible_fractions=POSSIBLE_FRACTIONS,
                                   other_env=env)
show_results([results_list], data_in_df, graph=True)


Starting simulation for agent: Agent_0. 5268 days of simulation to go.
Date 2014-12-22 00:00:00 (simulating until 2014-12-31 00:00:00).  Time: 0.2823922634124756s.  Value: 877288.3300000005...Sharpe ratio: 2.2911390004146073
Cum. Ret.: 87.25733300000006
AVG_DRET: 0.000868830678214477
STD_DRET: 0.0060198265724509415
Final value: 882573.3300000005
----------------------------------------------------------------------------------------------------
<matplotlib.figure.Figure at 0x7f3a77b96d68>

Ok, let's save that


In [7]:
import pickle
with open('../../data/simple_q_learner_1000_states_full_training.pkl', 'wb') as best_agent:
    pickle.dump(agents[0], best_agent)

Let's run the trained agent, with the test set

First a non-learning test: this scenario would be worse than what is possible (in fact, the q-learner can learn from past samples in the test set without compromising the causality).


In [8]:
TEST_DAYS_AHEAD = 20

env.set_test_data(total_data_test_df, TEST_DAYS_AHEAD)
tic = time()
results_list = sim.simulate_period(total_data_test_df, 
                                    SYMBOL,
                                    agents[0],
                                    learn=False,
                                    starting_days_ahead=TEST_DAYS_AHEAD,
                                    possible_fractions=POSSIBLE_FRACTIONS,
                                    verbose=False,
                                    other_env=env)
toc = time()
print('Epoch: {}'.format(i))
print('Elapsed time: {} seconds.'.format((toc-tic)))
print('Random Actions Rate: {}'.format(agents[0].random_actions_rate))
show_results([results_list], data_test_df, graph=True)


Starting simulation for agent: Agent_0. 484 days of simulation to go.
Date 2016-12-28 00:00:00 (simulating until 2016-12-30 00:00:00).  Time: 0.17544817924499512s.  Value: 12099.470000000001.Epoch: 6
Elapsed time: 8.874800443649292 seconds.
Random Actions Rate: 0.024544019369877303
Sharpe ratio: 1.3139195350252832
Cum. Ret.: 0.2099470000000001
AVG_DRET: 0.00040755281995981493
STD_DRET: 0.004923970055971514
Final value: 12099.470000000001
----------------------------------------------------------------------------------------------------
<matplotlib.figure.Figure at 0x7f3a77ae31d0>

And now a "realistic" test, in which the learner continues to learn from past samples in the test set (it even makes some random moves, though very few).


In [9]:
env.set_test_data(total_data_test_df, TEST_DAYS_AHEAD)
tic = time()
results_list = sim.simulate_period(total_data_test_df, 
                                    SYMBOL,
                                    agents[0],
                                    learn=True,
                                    starting_days_ahead=TEST_DAYS_AHEAD,
                                    possible_fractions=POSSIBLE_FRACTIONS,
                                    verbose=False,
                                    other_env=env)
toc = time()
print('Epoch: {}'.format(i))
print('Elapsed time: {} seconds.'.format((toc-tic)))
print('Random Actions Rate: {}'.format(agents[0].random_actions_rate))
show_results([results_list], data_test_df, graph=True)


Starting simulation for agent: Agent_0. 484 days of simulation to go.
Date 2016-12-28 00:00:00 (simulating until 2016-12-30 00:00:00).  Time: 0.17123079299926758s.  Value: 10595.430000000008.Epoch: 6
Elapsed time: 9.082789421081543 seconds.
Random Actions Rate: 0.02338666058186899
Sharpe ratio: 0.45880561807905557
Cum. Ret.: 0.05954300000000079
AVG_DRET: 0.000130165710975031
STD_DRET: 0.004503686357325763
Final value: 10595.430000000008
----------------------------------------------------------------------------------------------------
<matplotlib.figure.Figure at 0x7f3a779e3438>

What are the metrics for "holding the position"?


In [10]:
print('Sharpe ratio: {}\nCum. Ret.: {}\nAVG_DRET: {}\nSTD_DRET: {}\nFinal value: {}'.format(*value_eval(pd.DataFrame(data_test_df['Close'].iloc[TEST_DAYS_AHEAD:]))))


Sharpe ratio: 0.44271542660031676
Cum. Ret.: 0.1070225832012679
AVG_DRET: 0.00025103195406808796
STD_DRET: 0.009001287260690292
Final value: 223.53

In [ ]:


In [ ]:


In [ ]: