In this notebook a Q learner with dyna and a custom predictor will be trained and evaluated. The Q learner recommends when to buy or sell shares of one particular stock, and in which quantity (in fact it determines the desired fraction of shares in the total portfolio value).



In [69]:

    
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
from time import time
from sklearn.metrics import r2_score, median_absolute_error
from multiprocessing import Pool

%matplotlib inline

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('../../')

import recommender.simulator as sim
from utils.analysis import value_eval
from recommender.agent_predictor import AgentPredictor
from functools import partial
from sklearn.externals import joblib









    



Populating the interactive namespace from numpy and matplotlib
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload



In [70]:

    
NUM_THREADS = 1
LOOKBACK = 252*5
STARTING_DAYS_AHEAD = 252
POSSIBLE_FRACTIONS = [0.0, 1.0]
DYNA = 20
BASE_DAYS = 112

# Get the data
SYMBOL = 'SPY'
total_data_train_df = pd.read_pickle('../../data/data_train_val_df.pkl').stack(level='feature')
data_train_df = total_data_train_df[SYMBOL].unstack()
total_data_test_df = pd.read_pickle('../../data/data_test_df.pkl').stack(level='feature')
data_test_df = total_data_test_df[SYMBOL].unstack()
if LOOKBACK == -1:
    total_data_in_df = total_data_train_df
    data_in_df = data_train_df
else:
    data_in_df = data_train_df.iloc[-LOOKBACK:]
    total_data_in_df = total_data_train_df.loc[data_in_df.index[0]:]

# Create many agents
index = np.arange(NUM_THREADS).tolist()
env, num_states, num_actions = sim.initialize_env(total_data_in_df, 
                                                  SYMBOL, 
                                                  starting_days_ahead=STARTING_DAYS_AHEAD,
                                                  possible_fractions=POSSIBLE_FRACTIONS)

estimator_close = joblib.load('../../data/best_predictor.pkl')
estimator_volume = joblib.load('../../data/best_volume_predictor.pkl')

agents = [AgentPredictor(num_states=num_states, 
                         num_actions=num_actions, 
                         random_actions_rate=0.98, 
                         random_actions_decrease=0.999,
                         dyna_iterations=DYNA,
                         name='Agent_{}'.format(i),
                         estimator_close=estimator_close,
                         estimator_volume=estimator_volume,
                         env=env,
                         prediction_window=BASE_DAYS) for i in index]



In [71]:

    
def show_results(results_list, data_in_df, graph=False):
    for values in results_list:
        total_value = values.sum(axis=1)
        print('Sharpe ratio: {}\nCum. Ret.: {}\nAVG_DRET: {}\nSTD_DRET: {}\nFinal value: {}'.format(*value_eval(pd.DataFrame(total_value))))
        print('-'*100)
        initial_date = total_value.index[0]
        compare_results = data_in_df.loc[initial_date:, 'Close'].copy()
        compare_results.name = SYMBOL
        compare_results_df = pd.DataFrame(compare_results)
        compare_results_df['portfolio'] = total_value
        std_comp_df = compare_results_df / compare_results_df.iloc[0]
        if graph:
            plt.figure()
            std_comp_df.plot()

Let's show the symbols data, to see how good the recommender has to be.



In [72]:

    
print('Sharpe ratio: {}\nCum. Ret.: {}\nAVG_DRET: {}\nSTD_DRET: {}\nFinal value: {}'.format(*value_eval(pd.DataFrame(data_in_df['Close'].iloc[STARTING_DAYS_AHEAD:]))))









    



Sharpe ratio: 1.3647675162985047
Cum. Ret.: 0.656912535268037
AVG_DRET: 0.0006759463733835631
STD_DRET: 0.007862376480527653
Final value: 205.54



In [79]:

    
# Simulate (with new envs, each time)
n_epochs = 4

for i in range(n_epochs):
    tic = time()
    results_list = sim.simulate_period(total_data_in_df, 
                                       SYMBOL,
                                       agents[0],
                                       other_env=env,
                                       starting_days_ahead=STARTING_DAYS_AHEAD,
                                       possible_fractions=POSSIBLE_FRACTIONS,
                                       verbose=False)
    toc = time()
    print('Epoch: {}'.format(i))
    print('Elapsed time: {} seconds.'.format((toc-tic)))
    print('Random Actions Rate: {}'.format(agents[0].random_actions_rate))
    show_results([results_list], data_in_df)









    



Starting simulation for agent: Agent_0
Date: 2011-12-08 00:00:00, Value: 10450.980000000001
Date: 2011-12-09 00:00:00, Value: 10450.980000000001
Date: 2011-12-12 00:00:00, Value: 10450.980000000001
Date: 2011-12-13 00:00:00, Value: 10356.060000000001
Date: 2011-12-14 00:00:00, Value: 10356.060000000001
Date: 2011-12-15 00:00:00, Value: 10356.060000000001
Date: 2011-12-16 00:00:00, Value: 10356.060000000001
Date: 2011-12-19 00:00:00, Value: 10249.810000000001
Date: 2011-12-20 00:00:00, Value: 10552.410000000002
Date: 2011-12-21 00:00:00, Value: 10552.410000000002
Date: 2011-12-22 00:00:00, Value: 10552.410000000002
Date: 2011-12-23 00:00:00, Value: 10641.450000000003
Date: 2011-12-27 00:00:00, Value: 10649.850000000002
Date: 2011-12-28 00:00:00, Value: 10649.850000000002
Date: 2011-12-29 00:00:00, Value: 10750.150000000001
Date: 2011-12-30 00:00:00, Value: 10750.150000000001
Date: 2012-01-03 00:00:00, Value: 10919.300000000001
Date: 2012-01-04 00:00:00, Value: 10931.2
Date: 2012-01-05 00:00:00, Value: 10931.2
Date: 2012-01-06 00:00:00, Value: 10907.4
Date: 2012-01-09 00:00:00, Value: 10921.85
Date: 2012-01-10 00:00:00, Value: 10921.85
Date: 2012-01-11 00:00:00, Value: 10921.85
Date: 2012-01-12 00:00:00, Value: 10921.85
Date: 2012-01-13 00:00:00, Value: 10921.85
Date: 2012-01-17 00:00:00, Value: 10947.050000000001
Date: 2012-01-18 00:00:00, Value: 10947.050000000001
Date: 2012-01-19 00:00:00, Value: 11003.489999999998
Date: 2012-01-20 00:00:00, Value: 11042.499999999998
Date: 2012-01-23 00:00:00, Value: 11042.499999999998
Date: 2012-01-24 00:00:00, Value: 11032.539999999997
Date: 2012-01-25 00:00:00, Value: 11124.669999999996
Date: 2012-01-26 00:00:00, Value: 11069.059999999998
Date: 2012-01-27 00:00:00, Value: 11069.059999999998
Date: 2012-01-30 00:00:00, Value: 11069.059999999998
Date: 2012-01-31 00:00:00, Value: 11069.059999999998
Date: 2012-02-01 00:00:00, Value: 11174.899999999996
Date: 2012-02-03 00:00:00, Value: 11351.299999999996
Date: 2012-02-06 00:00:00, Value: 11342.059999999998
Date: 2012-02-07 00:00:00, Value: 11369.779999999995
Date: 2012-02-08 00:00:00, Value: 11369.779999999995
Date: 2012-02-09 00:00:00, Value: 11382.379999999996
Date: 2012-02-10 00:00:00, Value: 11305.939999999997
Date: 2012-02-13 00:00:00, Value: 11305.939999999997
Date: 2012-02-14 00:00:00, Value: 11305.939999999997
Date: 2012-02-15 00:00:00, Value: 11251.989999999996
Date: 2012-02-16 00:00:00, Value: 11251.989999999996
Date: 2012-02-17 00:00:00, Value: 11251.989999999996
Date: 2012-02-21 00:00:00, Value: 11251.989999999996
Date: 2012-02-22 00:00:00, Value: 11215.909999999996
Date: 2012-02-23 00:00:00, Value: 11215.909999999996
Date: 2012-02-24 00:00:00, Value: 11215.909999999996
Date: 2012-02-27 00:00:00, Value: 11234.539999999995
Date: 2012-02-28 00:00:00, Value: 11234.539999999995
Date: 2012-02-29 00:00:00, Value: 11234.539999999995
Date: 2012-03-01 00:00:00, Value: 11234.539999999995
Date: 2012-03-02 00:00:00, Value: 11234.539999999995
Date: 2012-03-05 00:00:00, Value: 11234.539999999995
Date: 2012-03-06 00:00:00, Value: 11063.979999999994
Date: 2012-03-07 00:00:00, Value: 11063.979999999994
Date: 2012-03-08 00:00:00, Value: 11063.979999999994
Date: 2012-03-09 00:00:00, Value: 11063.979999999994






    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-79-91386291b150> in <module>()
     10                                        starting_days_ahead=STARTING_DAYS_AHEAD,
     11                                        possible_fractions=POSSIBLE_FRACTIONS,
---> 12                                        verbose=False)
     13     toc = time()
     14     print('Epoch: {}'.format(i))

/home/miguel/udacity/Machine Learning Nanodegree/projects/capstone/capstone/recommender/simulator.py in simulate_period(data_df, symbol, agent, other_env, verbose, learn, starting_days_ahead, possible_fractions)
    127         recorded_cash_value[env.portfolio.current_date] = pos.loc['CASH', 'value']
    128         if learn:
--> 129             fraction_index = agent.play(reward, new_state)
    130         else:
    131             fraction_index = agent.play_learned_response(new_state)

/home/miguel/udacity/Machine Learning Nanodegree/projects/capstone/capstone/recommender/agent_predictor.py in play(self, reward, new_state)
    190         self.set_history()
    191         # Hallucinate!
--> 192         self.hallucinate(new_state)
    193         # End of Update Q -----------------------------------
    194 

/home/miguel/udacity/Machine Learning Nanodegree/projects/capstone/capstone/recommender/agent_predictor.py in hallucinate(self, s)
    118         if self.history_df is not None:
    119             h_history_df = self.history_df.copy()  # Initially, it is filled with the real values
--> 120             h_history_df = h_history_df.append(self.predict_steps(h_history_df, self.dyna_iterations))
    121             stacked_h_history_df = pd.DataFrame(h_history_df.stack(), columns=[self.env.symbol])
    122             internal_env = self.env.clone_with_new_data(stacked_h_history_df)

/home/miguel/udacity/Machine Learning Nanodegree/projects/capstone/capstone/recommender/agent_predictor.py in predict_steps(self, h_history_df, n_steps)
    233 
    234         for i in range(n_steps):
--> 235             h_history_df = self.predict_one_step(h_history_df.copy())
    236             predicted_df = predicted_df.append(h_history_df.iloc[-1])
    237         return predicted_df

/home/miguel/udacity/Machine Learning Nanodegree/projects/capstone/capstone/recommender/agent_predictor.py in predict_one_step(self, h_history_df)
    224         estimated_volume = self.estimator_volume.predict(volume_sample).iloc[0, 0] * \
    225             h_history_df['Volume'].mean()
--> 226         predicted_date = fe.add_market_days(h_history_df.index[-1], 1)
    227         h_history_df = h_history_df.drop(h_history_df.index[0])
    228         h_history_df.loc[predicted_date, :] = {'Close': estimated_close, 'Volume': estimated_volume}

/home/miguel/udacity/Machine Learning Nanodegree/projects/capstone/capstone/predictor/feature_extraction.py in add_market_days(base, delta)
     88 
     89 def add_market_days(base, delta):
---> 90     return add_index_days(base, delta, SPY_DF)
     91 
     92 

/home/miguel/udacity/Machine Learning Nanodegree/projects/capstone/capstone/predictor/feature_extraction.py in add_index_days(base, delta, data_df)
     79     if base not in market_days:
     80         raise Exception('The base date is not in the market days list.')
---> 81     base_index = market_days.tolist().index(base)
     82     if base_index + delta >= len(market_days):
     83         return market_days[-1]

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/indexes/datetimelike.py in tolist(self)
    469         return a list of the underlying data
    470         """
--> 471         return list(self.asobject)
    472 
    473     def min(self, axis=None, *args, **kwargs):

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/indexes/datetimelike.py in asobject(self)
    430         """
    431         from pandas.core.index import Index
--> 432         return Index(self._box_values(self.asi8), name=self.name, dtype=object)
    433 
    434     def _convert_tolerance(self, tolerance):

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/indexes/datetimelike.py in _box_values(self, values)
    242         apply box func to passed values
    243         """
--> 244         return lib.map_infer(values, self._box_func)
    245 
    246     def _format_with_header(self, header, **kwargs):

pandas/_libs/src/inference.pyx in pandas._libs.lib.map_infer (pandas/_libs/lib.c:66440)()

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/indexes/datetimes.py in <lambda>(x)
    543     @property
    544     def _box_func(self):
--> 545         return lambda x: Timestamp(x, freq=self.offset, tz=self.tz)
    546 
    547     def _convert_for_op(self, value):

KeyboardInterrupt:



In [ ]:

    
results_list = sim.simulate_period(total_data_in_df, 
                                   SYMBOL, 
                                   agents[0], 
                                   learn=False, 
                                   starting_days_ahead=STARTING_DAYS_AHEAD,
                                   possible_fractions=POSSIBLE_FRACTIONS,)
show_results([results_list], data_in_df, graph=True)

Let's run the trained agent, with the test set

First a non-learning test: this scenario would be worse than what is possible (in fact, the q-learner can learn from past samples in the test set without compromising the causality).



In [ ]:

    
env, num_states, num_actions = sim.initialize_env(total_data_test_df, 
                                                  SYMBOL,
                                                  starting_days_ahead=STARTING_DAYS_AHEAD,
                                                  possible_fractions=POSSIBLE_FRACTIONS)
tic = time()
results_list = sim.simulate_period(total_data_test_df, 
                                    SYMBOL,
                                    agents[0],
                                    learn=False,
                                    starting_days_ahead=STARTING_DAYS_AHEAD,
                                    possible_fractions=POSSIBLE_FRACTIONS,
                                    verbose=False)
toc = time()
print('Epoch: {}'.format(i))
print('Elapsed time: {} seconds.'.format((toc-tic)))
print('Random Actions Rate: {}'.format(agents[0].random_actions_rate))
show_results([results_list], data_test_df, graph=True)

And now a "realistic" test, in which the learner continues to learn from past samples in the test set (it even makes some random moves, though very few).



In [ ]:

    
env, num_states, num_actions = sim.initialize_env(total_data_test_df, 
                                                  SYMBOL,
                                                  starting_days_ahead=STARTING_DAYS_AHEAD,
                                                  possible_fractions=POSSIBLE_FRACTIONS)
tic = time()
results_list = sim.simulate_period(total_data_test_df, 
                                    SYMBOL,
                                    agents[0],
                                    learn=True,
                                    starting_days_ahead=STARTING_DAYS_AHEAD,
                                    possible_fractions=POSSIBLE_FRACTIONS,
                                    verbose=False)
toc = time()
print('Epoch: {}'.format(i))
print('Elapsed time: {} seconds.'.format((toc-tic)))
print('Random Actions Rate: {}'.format(agents[0].random_actions_rate))
show_results([results_list], data_test_df, graph=True)

What are the metrics for "holding the position"?



In [ ]:

    
print('Sharpe ratio: {}\nCum. Ret.: {}\nAVG_DRET: {}\nSTD_DRET: {}\nFinal value: {}'.format(*value_eval(pd.DataFrame(data_test_df['Close'].iloc[STARTING_DAYS_AHEAD:]))))

Conclusion:



In [ ]:

    
import pickle
with open('../../data/simple_q_learner_fast_learner_full_training.pkl', 'wb') as best_agent:
    pickle.dump(agents[0], best_agent)



In [ ]: