In this notebook a complete simulation will be run


In [1]:
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
from time import time
from sklearn.metrics import r2_score, median_absolute_error

%matplotlib inline

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('../../')


Populating the interactive namespace from numpy and matplotlib

In [2]:
from recommender.indicator import Indicator
import recommender.indicator_functions as indf

total_data_df = pd.read_pickle('../../data/data_df.pkl')
SYMBOL = 'AAPL'
data_df = total_data_df[SYMBOL].unstack()
data_df.head()


Out[2]:
feature Close High Low Open Volume
date
1993-01-29 2.12 2.19 2.11 0.0 66525200.0
1993-02-01 2.19 2.19 2.12 0.0 60138400.0
1993-02-02 2.15 2.20 2.15 0.0 45584000.0
1993-02-03 2.14 2.18 2.09 0.0 66046400.0
1993-02-04 2.12 2.15 2.11 0.0 52038000.0

In [3]:
close_df = total_data_df.xs('Close', level='feature')
close_df.head()


Out[3]:
SPY MMM ABT ABBV ACN ATVI AYI ADBE AMD AAP ... XEL XRX XLNX XL XYL YHOO YUM ZBH ZION ZTS
date
1993-01-29 43.94 24.50 6.88 NaN NaN NaN NaN 2.59 18.75 NaN ... 22.00 14.28 2.50 NaN NaN NaN NaN NaN 10.94 NaN
1993-02-01 44.25 24.69 6.88 NaN NaN NaN NaN 2.72 19.12 NaN ... 22.19 14.09 2.62 NaN NaN NaN NaN NaN 11.06 NaN
1993-02-02 44.34 24.72 6.53 NaN NaN NaN NaN 2.84 20.25 NaN ... 22.06 14.09 2.64 NaN NaN NaN NaN NaN 11.12 NaN
1993-02-03 44.81 25.19 6.91 NaN NaN NaN NaN 2.70 20.50 NaN ... 22.38 14.03 2.68 NaN NaN NaN NaN NaN 11.25 NaN
1993-02-04 45.00 26.06 6.84 NaN NaN NaN NaN 2.73 20.12 NaN ... 22.81 14.15 2.67 NaN NaN NaN NaN NaN 11.69 NaN

5 rows × 503 columns


In [4]:
total_data_df.head()


Out[4]:
SPY MMM ABT ABBV ACN ATVI AYI ADBE AMD AAP ... XEL XRX XLNX XL XYL YHOO YUM ZBH ZION ZTS
date feature
1993-01-29 Open 0.00 0.00 0.00 NaN NaN NaN NaN 0.00 0.00 NaN ... 0.00 0.00 0.00 NaN NaN NaN NaN NaN 0.00 NaN
High 43.97 24.62 6.88 NaN NaN NaN NaN 2.64 19.12 NaN ... 22.00 14.32 2.50 NaN NaN NaN NaN NaN 10.94 NaN
Low 43.75 24.47 6.75 NaN NaN NaN NaN 2.56 18.62 NaN ... 21.88 13.84 2.46 NaN NaN NaN NaN NaN 10.62 NaN
Close 43.94 24.50 6.88 NaN NaN NaN NaN 2.59 18.75 NaN ... 22.00 14.28 2.50 NaN NaN NaN NaN NaN 10.94 NaN
Volume 1003200.00 1242800.00 4638400.00 NaN NaN NaN NaN 4990400.00 730600.00 NaN ... 87800.00 7633602.00 1745196.00 NaN NaN NaN NaN NaN 33600.00 NaN

5 rows × 503 columns


In [5]:
ind1 = Indicator(indf.z_score, indf.z_score_vec, q_levels=arange(-2.0, 2.0, 0.2).tolist(), data_df=data_df)
ind2 = Indicator(indf.rsi, indf.rsi_vec, q_levels=arange(-2.0, 2.0, 0.2).tolist(), data_df=data_df)
ind3 = Indicator(indf.on_volume_balance, indf.on_volume_balance_vec, q_levels=arange(-2.0, 2.0, 0.2).tolist(), data_df=data_df)
indicators = {'ind1': ind1,
              'ind2': ind2,
              'ind3': ind3}

A very simple environment with a simple agent


In [6]:
from recommender.environment import Environment
INITIAL_CAP = 100

env = Environment(total_data_df, indicators=indicators, initial_cap=INITIAL_CAP)

In [7]:
from recommender.agent import Agent

num_states = (len(ind1.q_levels)+1) * (len(ind2.q_levels)+1) * (len(ind3.q_levels)+1)
num_actions = 3  # Buy or sell all, or do nothing.

agent = Agent(num_states=num_states, 
              num_actions=num_actions, 
              random_actions_rate=0.98, 
              random_actions_decrease=0.999,
              dyna_iterations=20)

In [8]:
from recommender.order import Order

action_0 = [Order(['AAPL',Order.BUY, 100])]
action_1 = [Order(['AAPL',Order.SELL, 100])]
action_2 = [Order(['AAPL',Order.NOTHING, 0])]

actions = {0: action_0,
           1: action_1,
           2: action_2,}

In [9]:
data_df.index[252]


Out[9]:
Timestamp('1994-01-27 00:00:00')

In [10]:
# Initialization

env.portfolio.set_current_date(data_df.index[252])
initial_state = env.get_state()
action = agent.play_learned_response(initial_state)

In [11]:
actions[action]


Out[11]:
[symbol       AAPL
 order     NOTHING
 shares          0
 dtype: object]

In [12]:
import time
# TODO: Change N_iters to the whole period of data_df
# TODO: copy the code to a function that can be called from an outside loop.
N_iters = 5000

tic = time.time()
for i in range(N_iters):
# while X
    reward, new_state = env.get_consequences(actions[action])
    action = agent.play(reward, new_state)

# Show results
    #print('date={}\n'.format(env.portfolio.current_date))
    #print('reward={} \n\nnew_state={} \n\naction=\n{}'.format(reward, new_state, actions[action]))
    #print('-'*70 + '\n\n')
    #time.sleep(0.5)
toc = time.time()
print('Elapsed time: {}s'.format(toc-tic))


Elapsed time: 259.70663475990295s

In [13]:
env.portfolio.get_positions()


Out[13]:
shares value
AAPL -8300.0 -671636.0
CASH 298215.0 298215.0

In [14]:
env.portfolio.get_positions()['value'].sum()


Out[14]:
-373421.0

In [15]:
env.portfolio.current_date


Out[15]:
Timestamp('2013-12-09 00:00:00')

In [16]:
(data_df.loc[env.portfolio.current_date] / data_df.iloc[0])['Close']


Out[16]:
38.169811320754718

In [17]:
env.portfolio.get_positions()['value'].sum() / INITIAL_CAP


Out[17]:
-3734.21

Let's resume that in a function


In [18]:
def simulate_one_step(action):
    reward, new_state = env.get_consequences(actions[action])
    return agent.play(reward, new_state)

In [19]:
from recommender.indicator import Indicator
import recommender.indicator_functions as indf


def create_indicators(data_df):
    """
    Particular function to create a series of indicators.
    To remove one just comment it's line.
    """
    indicators = {}
    indicators['z_score'] = Indicator(indf.z_score, indf.z_score_vec, q_levels=arange(-2.0, 2.0, 0.4).tolist(), data_df=data_df)
    indicators['rsi'] = Indicator(indf.rsi, indf.rsi_vec, q_levels=arange(-2.0, 2.0, 0.4).tolist(), data_df=data_df)
    indicators['on_volume_balance'] = Indicator(indf.on_volume_balance, indf.on_volume_balance_vec, q_levels=arange(-2.0, 2.0, 0.4).tolist(), data_df=data_df)
    return indicators

In [20]:
def get_num_states(indicators):
    acum = 1
    for ind in indicators.values():
        acum *= len(ind.q_levels) + 1
    return acum

In [21]:
print(num_states)
get_num_states(indicators)


9261
Out[21]:
9261

In [22]:
current_price = data_df['Close'].iloc[-1]
current_price


Out[22]:
115.81999999999999

In [23]:
capital = 10000

In [24]:
import recommender.portfolio as pf

positions = env.portfolio.get_positions()
positions.loc[SYMBOL, pf.SHARES]


Out[24]:
-8300.0

In [25]:
self = env.portfolio
current_price = self.close_df.loc[self.current_date][SYMBOL]
current_price


Out[25]:
80.920000000000002

In [26]:
self.get_total_value()


Out[26]:
-373421.0

In [27]:
np.round(-1.2)


Out[27]:
-1.0

In [28]:
target_fraction = 0.3

In [29]:
np.round(self.get_total_value()*target_fraction/current_price)


Out[29]:
-1384.0

In [30]:
# Instance function of the class portfolio (positions are previously defined in the instance)

def order_target(target_fraction):
    current_price = self.close_df.loc[self.current_date][SYMBOL]
    return np.round(self.get_total_value()*target_fraction/current_price)

In [31]:
order_target(0.3)


Out[31]:
-1384.0

In [32]:
order_target(0.7)


Out[32]:
-3230.0

In [33]:
self = env

In [34]:
from recommender.quantizer import Quantizer

actions_fractions = Quantizer(np.arange(-1.0, 1.0, 0.1).round(decimals=3).tolist())

In [35]:
from recommender.portfolio import SHARES

previous_shares = self.portfolio.get_positions().loc[SYMBOL, SHARES]
previous_shares


Out[35]:
-8300.0

In [36]:
# Instance function of the class Environment (positions are previously defined in the instance)

def act_to_target(target_fraction):
    current_price = self.portfolio.close_df.loc[self.portfolio.current_date][self.symbol]
    wanted_shares = np.fix(self.portfolio.get_total_value() * \
                           actions_fractions.get_quantized_value(target_fraction)/current_price)
    previous_shares = self.portfolio.get_positions().loc[self.symbol, pf.SHARES]
    shares_increase = wanted_shares - previous_shares
    action = [Order([self.symbol, Order.BUY, shares_increase])]
    return self.get_consequences(action)

In [37]:
print(act_to_target(0.61))
print(env.portfolio.get_positions())
print(env.portfolio.get_positions().loc[self.symbol, pf.VALUE] / env.portfolio.get_total_value())


(359.8399999999674, 6543)
         shares      value
AAPL   -2768.00 -223626.72
CASH -149434.44 -149434.44
0.599437153951

In [38]:
print(env.actions_fractions.q_levels)
len(env.actions_fractions.q_levels)


[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
Out[38]:
11

In [ ]:


In [63]:
from recommender.environment import Environment
from recommender.agent import Agent
from logging import Logger

INITIAL_CAP = 10000
symbol = 'AAPL'
log = Logger('simulate_period')
log.setLevel('DEBUG')

# Initialization
total_data_df = pd.read_pickle('../../data/data_df.pkl')
data_df = total_data_df[symbol].unstack()
indicators = create_indicators(data_df)
env = Environment(total_data_df, indicators=indicators, initial_cap=INITIAL_CAP)
env.portfolio.set_current_date(data_df.index[252])

num_states = get_num_states(indicators)
num_actions = len(env.actions_fractions.q_levels)  # All the possible fractions of total value
agent = Agent(num_states=num_states, 
              num_actions=num_actions, 
              random_actions_rate=0.98, 
              random_actions_decrease=0.999,
              dyna_iterations=20)

In [64]:
agent.Q.shape


Out[64]:
(1331, 11)

In [65]:
# Loop and play
N_iters = data_df.shape[0]
fraction_index = 0

In [93]:
print('date={}\n'.format(env.portfolio.current_date))
print(env.portfolio.get_positions())
print('-'*70 + '\n\n')
reward, new_state = env.get_consequences_from_fraction_index(fraction_index)

print('date={}\n'.format(env.portfolio.current_date))
print('reward = {} \n\nnew_state = {} \n\naction = {} ({})'.format(reward, 
                                                                   new_state,
                                                                   fraction_index,
                                                                   env.actions_fractions.interval_to_value(fraction_index)))
pos = env.portfolio.positions_df
print(env.portfolio.get_positions())
print(pos.loc[symbol, 'value'] / pos['value'].sum())
print('-'*70 + '\n\n')


fraction_index = agent.play(reward, new_state)


date=1994-03-08 00:00:00

       shares    value
AAPL  3080.00  4065.60
CASH  6239.02  6239.02
----------------------------------------------------------------------


date=1994-03-09 00:00:00

reward = 62.44000000000051 

new_state = 1039 

action = 5 (0.4)
       shares    value
AAPL  3122.00  4183.48
CASH  6183.58  6183.58
0.403535814397
----------------------------------------------------------------------



In [ ]:

Let's test the act_to_target function


In [55]:
env.actions_fractions.q_levels


Out[55]:
[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

In [62]:
env.act_to_target(0.8)
pos = env.portfolio.get_positions()
print(pos)
print(pos.loc[symbol, 'value'] / pos['value'].sum())
print('-'*70 + '\n\n')


       shares    value
AAPL  6565.00  8665.80
CASH  2134.64  2134.64
0.802356200303
----------------------------------------------------------------------



In [116]:
def initialize_env(data_df, symbol):    
    # Initialization
    total_data_df = pd.read_pickle('../../data/data_df.pkl')
    data_df = total_data_df[symbol].unstack()
    indicators = create_indicators(data_df)
    env = Environment(total_data_df, indicators=indicators, initial_cap=INITIAL_CAP)
    env.portfolio.set_current_date(data_df.index[252])
    num_states = get_num_states(indicators)
    num_actions = len(env.actions_fractions.q_levels)  # All the possible fractions of total value
    return env, num_states, num_actions

In [203]:
from recommender.environment import Environment
from recommender.agent import Agent
import sys

INITIAL_CAP = 10000


def simulate_period(data_df, symbol, agent, other_env=None, verbose=False):
    """ 
    Simulate the market and one Agent for the entire period. 
    data_df format is like below:
    feature     Close   High    Low     Open    Volume
    date
    1993-01-29  2.12    2.19    2.11    0.0     66525200.0
    1993-02-01  2.19    2.19    2.12    0.0     60138400.0
    1993-02-02  2.15    2.20    2.15    0.0     45584000.0
    """
    if other_env is None:
        env, num_states, num_actions = initialize_env(data_df, symbol)
    else:
        env = other_env
        

    # Loop and play
    N_iters = data_df.shape[0]
    fraction_index = 0
    recorded_stock_value = {}
    recorded_cash_value = {}
    for i in range(N_iters):
        reward, new_state = env.get_consequences_from_fraction_index(fraction_index)
        
        if verbose:
            print('date={}\n'.format(env.portfolio.current_date))
            print('reward = {} \n\nnew_state = {} \n\naction = {} ({})'.format(reward, 
                                                                       new_state,
                                                                       fraction_index,
                                                                       env.actions_fractions.interval_to_value(fraction_index)))
            pos = env.portfolio.positions_df
            print(env.portfolio.get_positions())
            print(pos.loc[symbol, 'value'] / pos['value'].sum())
            print('-'*70 + '\n\n')
            
        pos = env.portfolio.positions_df
        recorded_stock_value[env.portfolio.current_date] = pos.loc[symbol, 'value']
        recorded_cash_value[env.portfolio.current_date] = pos.loc['CASH', 'value']
        fraction_index = agent.play(reward, new_state)
        if i%100 == 0:
            sys.stdout.write('\rIteration {}/{}'.format(i, N_iters))
    
    return pd.DataFrame({'stock_value':recorded_stock_value, 'cash':recorded_cash_value})

In [204]:
env, num_states, num_actions = initialize_env(data_df, symbol)
agent = Agent(num_states=num_states, 
              num_actions=num_actions, 
              random_actions_rate=0.98, 
              random_actions_decrease=0.999,
              dyna_iterations=20)

In [205]:
values = simulate_period(data_df, symbol, agent, other_env=env)


Iteration 6000/6024

In [156]:
print(type(value_eval(pd.DataFrame(values))))
len(value_eval(pd.DataFrame(values)))


<class 'tuple'>
Out[156]:
5

In [227]:
print(values.shape)
values.head()


(5771, 2)
Out[227]:
cash stock_value
1994-01-28 10000.00 0.00
1994-01-31 10000.00 0.00
1994-02-01 3001.06 7118.58
1994-02-02 6072.45 4013.18
1994-02-03 5043.49 5127.60

In [208]:
from utils.analysis import value_eval
total_value = values.sum(axis=1)
print('Sharpe ratio: {}\nCum. Ret.: {}\nAVG_DRET: {}\nSTD_DRET: {}\nFinal value: {}'.format(*value_eval(pd.DataFrame(total_value))))


Sharpe ratio: 0.21875070056678433
Cum. Ret.: 1.3737920000000057
AVG_DRET: 0.0016708708021690355
STD_DRET: 0.12125333369916133
Final value: 23737.920000000056

In [352]:
values_df = simulate_period(data_df, symbol, agent)
total_value = values_df.sum(axis=1)
value_eval(pd.DataFrame(total_value))


Iteration 6000/6024
Out[352]:
(1.6986094239704916,
 47.609569,
 0.00069403003155876579,
 0.0064861203758870807,
 486095.69)

In [353]:
total_value.plot()


Out[353]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa0ef4d45f8>

In [354]:
total_value[-1]


Out[354]:
486095.69

In [355]:
print(total_value.shape)
total_value.head()


(5771,)
Out[355]:
1994-01-28    10000.00
1994-01-31    10000.00
1994-02-01    10017.08
1994-02-02    10017.08
1994-02-03    10017.08
dtype: float64

In [356]:
total_value.index[0]


Out[356]:
Timestamp('1994-01-28 00:00:00')

In [357]:
initial_date = total_value.index[0]
compare_results = data_df.loc[initial_date:, 'Close'].copy()

In [358]:
compare_results.shape


Out[358]:
(5771,)

In [359]:
compare_results.name='AAPL'
compare_results_df = pd.DataFrame(compare_results)
compare_results_df['portfolio'] = total_value
compare_results_df.head()


Out[359]:
AAPL portfolio
date
1994-01-28 1.21 10000.00
1994-01-31 1.17 10000.00
1994-02-01 1.19 10017.08
1994-02-02 1.18 10017.08
1994-02-03 1.20 10017.08

In [360]:
std_comp_df = compare_results_df / compare_results_df.iloc[0]
std_comp_df.plot()


Out[360]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa0efa0ff98>

In [361]:
value_eval(pd.DataFrame(compare_results_df['AAPL']))


Out[361]:
(0.66696407892698417,
 94.719008264462801,
 0.0012196011129682511,
 0.029027901311892094,
 115.81999999999999)

In [ ]:


In [362]:
a_dict = {'col1':{'index1':89, 'index2':98}, 'col2':{'index1':90, 'index2':80}}

In [363]:
pd.DataFrame(a_dict)


Out[363]:
col1 col2
index1 89 90
index2 98 80

In [364]:
pd.DataFrame(a_dict).sum(axis=1)


Out[364]:
index1    179
index2    178
dtype: int64

In [ ]:


In [ ]:


In [ ]:


In [ ]: