In this notebook a complete simulation will be run



In [1]:

    
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
from time import time
from sklearn.metrics import r2_score, median_absolute_error

%matplotlib inline

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('../../')









    



Populating the interactive namespace from numpy and matplotlib



In [2]:

    
from recommender.indicator import Indicator
import recommender.indicator_functions as indf

total_data_df = pd.read_pickle('../../data/data_df.pkl')
SYMBOL = 'AAPL'
data_df = total_data_df[SYMBOL].unstack()
data_df.head()



In [3]:

    
close_df = total_data_df.xs('Close', level='feature')
close_df.head()









    Out[3]:







  
    
      
      SPY
      MMM
      ABT
      ABBV
      ACN
      ATVI
      AYI
      ADBE
      AMD
      AAP
      ...
      XEL
      XRX
      XLNX
      XL
      XYL
      YHOO
      YUM
      ZBH
      ZION
      ZTS
    
    
      date
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1993-01-29
      43.94
      24.50
      6.88
      NaN
      NaN
      NaN
      NaN
      2.59
      18.75
      NaN
      ...
      22.00
      14.28
      2.50
      NaN
      NaN
      NaN
      NaN
      NaN
      10.94
      NaN
    
    
      1993-02-01
      44.25
      24.69
      6.88
      NaN
      NaN
      NaN
      NaN
      2.72
      19.12
      NaN
      ...
      22.19
      14.09
      2.62
      NaN
      NaN
      NaN
      NaN
      NaN
      11.06
      NaN
    
    
      1993-02-02
      44.34
      24.72
      6.53
      NaN
      NaN
      NaN
      NaN
      2.84
      20.25
      NaN
      ...
      22.06
      14.09
      2.64
      NaN
      NaN
      NaN
      NaN
      NaN
      11.12
      NaN
    
    
      1993-02-03
      44.81
      25.19
      6.91
      NaN
      NaN
      NaN
      NaN
      2.70
      20.50
      NaN
      ...
      22.38
      14.03
      2.68
      NaN
      NaN
      NaN
      NaN
      NaN
      11.25
      NaN
    
    
      1993-02-04
      45.00
      26.06
      6.84
      NaN
      NaN
      NaN
      NaN
      2.73
      20.12
      NaN
      ...
      22.81
      14.15
      2.67
      NaN
      NaN
      NaN
      NaN
      NaN
      11.69
      NaN
    
  

5 rows × 503 columns



In [4]:

    
total_data_df.head()









    Out[4]:







  
    
      
      
      SPY
      MMM
      ABT
      ABBV
      ACN
      ATVI
      AYI
      ADBE
      AMD
      AAP
      ...
      XEL
      XRX
      XLNX
      XL
      XYL
      YHOO
      YUM
      ZBH
      ZION
      ZTS
    
    
      date
      feature
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1993-01-29
      Open
      0.00
      0.00
      0.00
      NaN
      NaN
      NaN
      NaN
      0.00
      0.00
      NaN
      ...
      0.00
      0.00
      0.00
      NaN
      NaN
      NaN
      NaN
      NaN
      0.00
      NaN
    
    
      High
      43.97
      24.62
      6.88
      NaN
      NaN
      NaN
      NaN
      2.64
      19.12
      NaN
      ...
      22.00
      14.32
      2.50
      NaN
      NaN
      NaN
      NaN
      NaN
      10.94
      NaN
    
    
      Low
      43.75
      24.47
      6.75
      NaN
      NaN
      NaN
      NaN
      2.56
      18.62
      NaN
      ...
      21.88
      13.84
      2.46
      NaN
      NaN
      NaN
      NaN
      NaN
      10.62
      NaN
    
    
      Close
      43.94
      24.50
      6.88
      NaN
      NaN
      NaN
      NaN
      2.59
      18.75
      NaN
      ...
      22.00
      14.28
      2.50
      NaN
      NaN
      NaN
      NaN
      NaN
      10.94
      NaN
    
    
      Volume
      1003200.00
      1242800.00
      4638400.00
      NaN
      NaN
      NaN
      NaN
      4990400.00
      730600.00
      NaN
      ...
      87800.00
      7633602.00
      1745196.00
      NaN
      NaN
      NaN
      NaN
      NaN
      33600.00
      NaN
    
  

5 rows × 503 columns



In [5]:

    
ind1 = Indicator(indf.z_score, indf.z_score_vec, q_levels=arange(-2.0, 2.0, 0.2).tolist(), data_df=data_df)
ind2 = Indicator(indf.rsi, indf.rsi_vec, q_levels=arange(-2.0, 2.0, 0.2).tolist(), data_df=data_df)
ind3 = Indicator(indf.on_volume_balance, indf.on_volume_balance_vec, q_levels=arange(-2.0, 2.0, 0.2).tolist(), data_df=data_df)
indicators = {'ind1': ind1,
              'ind2': ind2,
              'ind3': ind3}

A very simple environment with a simple agent



In [6]:

    
from recommender.environment import Environment
INITIAL_CAP = 100

env = Environment(total_data_df, indicators=indicators, initial_cap=INITIAL_CAP)



In [7]:

    
from recommender.agent import Agent

num_states = (len(ind1.q_levels)+1) * (len(ind2.q_levels)+1) * (len(ind3.q_levels)+1)
num_actions = 3  # Buy or sell all, or do nothing.

agent = Agent(num_states=num_states, 
              num_actions=num_actions, 
              random_actions_rate=0.98, 
              random_actions_decrease=0.999,
              dyna_iterations=20)



In [8]:

    
from recommender.order import Order

action_0 = [Order(['AAPL',Order.BUY, 100])]
action_1 = [Order(['AAPL',Order.SELL, 100])]
action_2 = [Order(['AAPL',Order.NOTHING, 0])]

actions = {0: action_0,
           1: action_1,
           2: action_2,}



In [9]:

    
data_df.index[252]









    Out[9]:





Timestamp('1994-01-27 00:00:00')



In [10]:

    
# Initialization

env.portfolio.set_current_date(data_df.index[252])
initial_state = env.get_state()
action = agent.play_learned_response(initial_state)



In [11]:

    
actions[action]









    Out[11]:





[symbol       AAPL
 order     NOTHING
 shares          0
 dtype: object]



In [12]:

    
import time
# TODO: Change N_iters to the whole period of data_df
# TODO: copy the code to a function that can be called from an outside loop.
N_iters = 5000

tic = time.time()
for i in range(N_iters):
# while X
    reward, new_state = env.get_consequences(actions[action])
    action = agent.play(reward, new_state)

# Show results
    #print('date={}\n'.format(env.portfolio.current_date))
    #print('reward={} \n\nnew_state={} \n\naction=\n{}'.format(reward, new_state, actions[action]))
    #print('-'*70 + '\n\n')
    #time.sleep(0.5)
toc = time.time()
print('Elapsed time: {}s'.format(toc-tic))









    



Elapsed time: 259.70663475990295s



In [13]:

    
env.portfolio.get_positions()



In [14]:

    
env.portfolio.get_positions()['value'].sum()









    Out[14]:





-373421.0



In [15]:

    
env.portfolio.current_date









    Out[15]:





Timestamp('2013-12-09 00:00:00')



In [16]:

    
(data_df.loc[env.portfolio.current_date] / data_df.iloc[0])['Close']









    Out[16]:





38.169811320754718



In [17]:

    
env.portfolio.get_positions()['value'].sum() / INITIAL_CAP









    Out[17]:





-3734.21

Let's resume that in a function



In [18]:

    
def simulate_one_step(action):
    reward, new_state = env.get_consequences(actions[action])
    return agent.play(reward, new_state)



In [19]:

    
from recommender.indicator import Indicator
import recommender.indicator_functions as indf


def create_indicators(data_df):
    """
    Particular function to create a series of indicators.
    To remove one just comment it's line.
    """
    indicators = {}
    indicators['z_score'] = Indicator(indf.z_score, indf.z_score_vec, q_levels=arange(-2.0, 2.0, 0.4).tolist(), data_df=data_df)
    indicators['rsi'] = Indicator(indf.rsi, indf.rsi_vec, q_levels=arange(-2.0, 2.0, 0.4).tolist(), data_df=data_df)
    indicators['on_volume_balance'] = Indicator(indf.on_volume_balance, indf.on_volume_balance_vec, q_levels=arange(-2.0, 2.0, 0.4).tolist(), data_df=data_df)
    return indicators



In [20]:

    
def get_num_states(indicators):
    acum = 1
    for ind in indicators.values():
        acum *= len(ind.q_levels) + 1
    return acum



In [21]:

    
print(num_states)
get_num_states(indicators)



In [22]:

    
current_price = data_df['Close'].iloc[-1]
current_price









    Out[22]:





115.81999999999999



In [23]:

    
capital = 10000



In [24]:

    
import recommender.portfolio as pf

positions = env.portfolio.get_positions()
positions.loc[SYMBOL, pf.SHARES]









    Out[24]:





-8300.0



In [25]:

    
self = env.portfolio
current_price = self.close_df.loc[self.current_date][SYMBOL]
current_price









    Out[25]:





80.920000000000002



In [26]:

    
self.get_total_value()









    Out[26]:





-373421.0



In [27]:

    
np.round(-1.2)









    Out[27]:





-1.0



In [28]:

    
target_fraction = 0.3



In [29]:

    
np.round(self.get_total_value()*target_fraction/current_price)









    Out[29]:





-1384.0



In [30]:

    
# Instance function of the class portfolio (positions are previously defined in the instance)

def order_target(target_fraction):
    current_price = self.close_df.loc[self.current_date][SYMBOL]
    return np.round(self.get_total_value()*target_fraction/current_price)



In [31]:

    
order_target(0.3)









    Out[31]:





-1384.0



In [32]:

    
order_target(0.7)









    Out[32]:





-3230.0



In [33]:

    
self = env



In [34]:

    
from recommender.quantizer import Quantizer

actions_fractions = Quantizer(np.arange(-1.0, 1.0, 0.1).round(decimals=3).tolist())



In [35]:

    
from recommender.portfolio import SHARES

previous_shares = self.portfolio.get_positions().loc[SYMBOL, SHARES]
previous_shares









    Out[35]:





-8300.0



In [36]:

    
# Instance function of the class Environment (positions are previously defined in the instance)

def act_to_target(target_fraction):
    current_price = self.portfolio.close_df.loc[self.portfolio.current_date][self.symbol]
    wanted_shares = np.fix(self.portfolio.get_total_value() * \
                           actions_fractions.get_quantized_value(target_fraction)/current_price)
    previous_shares = self.portfolio.get_positions().loc[self.symbol, pf.SHARES]
    shares_increase = wanted_shares - previous_shares
    action = [Order([self.symbol, Order.BUY, shares_increase])]
    return self.get_consequences(action)



In [37]:

    
print(act_to_target(0.61))
print(env.portfolio.get_positions())
print(env.portfolio.get_positions().loc[self.symbol, pf.VALUE] / env.portfolio.get_total_value())









    



(359.8399999999674, 6543)
         shares      value
AAPL   -2768.00 -223626.72
CASH -149434.44 -149434.44
0.599437153951



In [38]:

    
print(env.actions_fractions.q_levels)
len(env.actions_fractions.q_levels)









    



[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]






    Out[38]:





11



In [ ]:



In [63]:

    
from recommender.environment import Environment
from recommender.agent import Agent
from logging import Logger

INITIAL_CAP = 10000
symbol = 'AAPL'
log = Logger('simulate_period')
log.setLevel('DEBUG')

# Initialization
total_data_df = pd.read_pickle('../../data/data_df.pkl')
data_df = total_data_df[symbol].unstack()
indicators = create_indicators(data_df)
env = Environment(total_data_df, indicators=indicators, initial_cap=INITIAL_CAP)
env.portfolio.set_current_date(data_df.index[252])

num_states = get_num_states(indicators)
num_actions = len(env.actions_fractions.q_levels)  # All the possible fractions of total value
agent = Agent(num_states=num_states, 
              num_actions=num_actions, 
              random_actions_rate=0.98, 
              random_actions_decrease=0.999,
              dyna_iterations=20)



In [64]:

    
agent.Q.shape









    Out[64]:





(1331, 11)



In [65]:

    
# Loop and play
N_iters = data_df.shape[0]
fraction_index = 0



In [93]:

    
print('date={}\n'.format(env.portfolio.current_date))
print(env.portfolio.get_positions())
print('-'*70 + '\n\n')
reward, new_state = env.get_consequences_from_fraction_index(fraction_index)

print('date={}\n'.format(env.portfolio.current_date))
print('reward = {} \n\nnew_state = {} \n\naction = {} ({})'.format(reward, 
                                                                   new_state,
                                                                   fraction_index,
                                                                   env.actions_fractions.interval_to_value(fraction_index)))
pos = env.portfolio.positions_df
print(env.portfolio.get_positions())
print(pos.loc[symbol, 'value'] / pos['value'].sum())
print('-'*70 + '\n\n')


fraction_index = agent.play(reward, new_state)









    



date=1994-03-08 00:00:00

       shares    value
AAPL  3080.00  4065.60
CASH  6239.02  6239.02
----------------------------------------------------------------------


date=1994-03-09 00:00:00

reward = 62.44000000000051 

new_state = 1039 

action = 5 (0.4)
       shares    value
AAPL  3122.00  4183.48
CASH  6183.58  6183.58
0.403535814397
----------------------------------------------------------------------



In [ ]:

Let's test the act_to_target function



In [55]:

    
env.actions_fractions.q_levels









    Out[55]:





[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]



In [62]:

    
env.act_to_target(0.8)
pos = env.portfolio.get_positions()
print(pos)
print(pos.loc[symbol, 'value'] / pos['value'].sum())
print('-'*70 + '\n\n')









    



       shares    value
AAPL  6565.00  8665.80
CASH  2134.64  2134.64
0.802356200303
----------------------------------------------------------------------



In [116]:

    
def initialize_env(data_df, symbol):    
    # Initialization
    total_data_df = pd.read_pickle('../../data/data_df.pkl')
    data_df = total_data_df[symbol].unstack()
    indicators = create_indicators(data_df)
    env = Environment(total_data_df, indicators=indicators, initial_cap=INITIAL_CAP)
    env.portfolio.set_current_date(data_df.index[252])
    num_states = get_num_states(indicators)
    num_actions = len(env.actions_fractions.q_levels)  # All the possible fractions of total value
    return env, num_states, num_actions



In [203]:

    
from recommender.environment import Environment
from recommender.agent import Agent
import sys

INITIAL_CAP = 10000


def simulate_period(data_df, symbol, agent, other_env=None, verbose=False):
    """ 
    Simulate the market and one Agent for the entire period. 
    data_df format is like below:
    feature     Close   High    Low     Open    Volume
    date
    1993-01-29  2.12    2.19    2.11    0.0     66525200.0
    1993-02-01  2.19    2.19    2.12    0.0     60138400.0
    1993-02-02  2.15    2.20    2.15    0.0     45584000.0
    """
    if other_env is None:
        env, num_states, num_actions = initialize_env(data_df, symbol)
    else:
        env = other_env
        

    # Loop and play
    N_iters = data_df.shape[0]
    fraction_index = 0
    recorded_stock_value = {}
    recorded_cash_value = {}
    for i in range(N_iters):
        reward, new_state = env.get_consequences_from_fraction_index(fraction_index)
        
        if verbose:
            print('date={}\n'.format(env.portfolio.current_date))
            print('reward = {} \n\nnew_state = {} \n\naction = {} ({})'.format(reward, 
                                                                       new_state,
                                                                       fraction_index,
                                                                       env.actions_fractions.interval_to_value(fraction_index)))
            pos = env.portfolio.positions_df
            print(env.portfolio.get_positions())
            print(pos.loc[symbol, 'value'] / pos['value'].sum())
            print('-'*70 + '\n\n')
            
        pos = env.portfolio.positions_df
        recorded_stock_value[env.portfolio.current_date] = pos.loc[symbol, 'value']
        recorded_cash_value[env.portfolio.current_date] = pos.loc['CASH', 'value']
        fraction_index = agent.play(reward, new_state)
        if i%100 == 0:
            sys.stdout.write('\rIteration {}/{}'.format(i, N_iters))
    
    return pd.DataFrame({'stock_value':recorded_stock_value, 'cash':recorded_cash_value})



In [204]:

    
env, num_states, num_actions = initialize_env(data_df, symbol)
agent = Agent(num_states=num_states, 
              num_actions=num_actions, 
              random_actions_rate=0.98, 
              random_actions_decrease=0.999,
              dyna_iterations=20)



In [205]:

    
values = simulate_period(data_df, symbol, agent, other_env=env)









    



Iteration 6000/6024



In [156]:

    
print(type(value_eval(pd.DataFrame(values))))
len(value_eval(pd.DataFrame(values)))









    



<class 'tuple'>






    Out[156]:





5



In [227]:

    
print(values.shape)
values.head()









    



(5771, 2)






    Out[227]:







  
    
      
      cash
      stock_value
    
  
  
    
      1994-01-28
      10000.00
      0.00
    
    
      1994-01-31
      10000.00
      0.00
    
    
      1994-02-01
      3001.06
      7118.58
    
    
      1994-02-02
      6072.45
      4013.18
    
    
      1994-02-03
      5043.49
      5127.60



In [208]:

    
from utils.analysis import value_eval
total_value = values.sum(axis=1)
print('Sharpe ratio: {}\nCum. Ret.: {}\nAVG_DRET: {}\nSTD_DRET: {}\nFinal value: {}'.format(*value_eval(pd.DataFrame(total_value))))









    



Sharpe ratio: 0.21875070056678433
Cum. Ret.: 1.3737920000000057
AVG_DRET: 0.0016708708021690355
STD_DRET: 0.12125333369916133
Final value: 23737.920000000056



In [352]:

    
values_df = simulate_period(data_df, symbol, agent)
total_value = values_df.sum(axis=1)
value_eval(pd.DataFrame(total_value))









    



Iteration 6000/6024





    Out[352]:





(1.6986094239704916,
 47.609569,
 0.00069403003155876579,
 0.0064861203758870807,
 486095.69)



In [353]:

    
total_value.plot()









    Out[353]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fa0ef4d45f8>



In [354]:

    
total_value[-1]









    Out[354]:





486095.69



In [355]:

    
print(total_value.shape)
total_value.head()









    



(5771,)






    Out[355]:





1994-01-28    10000.00
1994-01-31    10000.00
1994-02-01    10017.08
1994-02-02    10017.08
1994-02-03    10017.08
dtype: float64



In [356]:

    
total_value.index[0]









    Out[356]:





Timestamp('1994-01-28 00:00:00')



In [357]:

    
initial_date = total_value.index[0]
compare_results = data_df.loc[initial_date:, 'Close'].copy()



In [358]:

    
compare_results.shape









    Out[358]:





(5771,)



In [359]:

    
compare_results.name='AAPL'
compare_results_df = pd.DataFrame(compare_results)
compare_results_df['portfolio'] = total_value
compare_results_df.head()



In [360]:

    
std_comp_df = compare_results_df / compare_results_df.iloc[0]
std_comp_df.plot()









    Out[360]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fa0efa0ff98>



In [361]:

    
value_eval(pd.DataFrame(compare_results_df['AAPL']))









    Out[361]:





(0.66696407892698417,
 94.719008264462801,
 0.0012196011129682511,
 0.029027901311892094,
 115.81999999999999)



In [ ]:



In [362]:

    
a_dict = {'col1':{'index1':89, 'index2':98}, 'col2':{'index1':90, 'index2':80}}



In [363]:

    
pd.DataFrame(a_dict)



In [364]:

    
pd.DataFrame(a_dict).sum(axis=1)









    Out[364]:





index1    179
index2    178
dtype: int64



In [ ]:



In [ ]:



In [ ]:



In [ ]:

feature	Close	High	Low	Open	Volume
date
1993-01-29	2.12	2.19	2.11	0.0	66525200.0
1993-02-01	2.19	2.19	2.12	0.0	60138400.0
1993-02-02	2.15	2.20	2.15	0.0	45584000.0
1993-02-03	2.14	2.18	2.09	0.0	66046400.0
1993-02-04	2.12	2.15	2.11	0.0	52038000.0

	SPY	MMM	ABT	ABBV	ACN	ATVI	AYI	ADBE	AMD	AAP	...	XEL	XRX	XLNX	XL	XYL	YHOO	YUM	ZBH	ZION	ZTS
date
1993-01-29	43.94	24.50	6.88	NaN	NaN	NaN	NaN	2.59	18.75	NaN	...	22.00	14.28	2.50	NaN	NaN	NaN	NaN	NaN	10.94	NaN
1993-02-01	44.25	24.69	6.88	NaN	NaN	NaN	NaN	2.72	19.12	NaN	...	22.19	14.09	2.62	NaN	NaN	NaN	NaN	NaN	11.06	NaN
1993-02-02	44.34	24.72	6.53	NaN	NaN	NaN	NaN	2.84	20.25	NaN	...	22.06	14.09	2.64	NaN	NaN	NaN	NaN	NaN	11.12	NaN
1993-02-03	44.81	25.19	6.91	NaN	NaN	NaN	NaN	2.70	20.50	NaN	...	22.38	14.03	2.68	NaN	NaN	NaN	NaN	NaN	11.25	NaN
1993-02-04	45.00	26.06	6.84	NaN	NaN	NaN	NaN	2.73	20.12	NaN	...	22.81	14.15	2.67	NaN	NaN	NaN	NaN	NaN	11.69	NaN

		SPY	MMM	ABT	ABBV	ACN	ATVI	AYI	ADBE	AMD	AAP	...	XEL	XRX	XLNX	XL	XYL	YHOO	YUM	ZBH	ZION	ZTS
date	feature
1993-01-29	Open	0.00	0.00	0.00	NaN	NaN	NaN	NaN	0.00	0.00	NaN	...	0.00	0.00	0.00	NaN	NaN	NaN	NaN	NaN	0.00	NaN
	High	43.97	24.62	6.88	NaN	NaN	NaN	NaN	2.64	19.12	NaN	...	22.00	14.32	2.50	NaN	NaN	NaN	NaN	NaN	10.94	NaN
	Low	43.75	24.47	6.75	NaN	NaN	NaN	NaN	2.56	18.62	NaN	...	21.88	13.84	2.46	NaN	NaN	NaN	NaN	NaN	10.62	NaN
	Close	43.94	24.50	6.88	NaN	NaN	NaN	NaN	2.59	18.75	NaN	...	22.00	14.28	2.50	NaN	NaN	NaN	NaN	NaN	10.94	NaN
	Volume	1003200.00	1242800.00	4638400.00	NaN	NaN	NaN	NaN	4990400.00	730600.00	NaN	...	87800.00	7633602.00	1745196.00	NaN	NaN	NaN	NaN	NaN	33600.00	NaN

	cash	stock_value
1994-01-28	10000.00	0.00
1994-01-31	10000.00	0.00
1994-02-01	3001.06	7118.58
1994-02-02	6072.45	4013.18
1994-02-03	5043.49	5127.60

	AAPL	portfolio
date
1994-01-28	1.21	10000.00
1994-01-31	1.17	10000.00
1994-02-01	1.19	10017.08
1994-02-02	1.18	10017.08
1994-02-03	1.20	10017.08