In [1]:
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
from time import time
from sklearn.metrics import r2_score, median_absolute_error
%matplotlib inline
%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)
%load_ext autoreload
%autoreload 2
sys.path.append('../../')
In [2]:
from recommender.indicator import Indicator
import recommender.indicator_functions as indf
total_data_df = pd.read_pickle('../../data/data_df.pkl')
SYMBOL = 'AAPL'
data_df = total_data_df[SYMBOL].unstack()
data_df.head()
Out[2]:
In [3]:
close_df = total_data_df.xs('Close', level='feature')
close_df.head()
Out[3]:
In [4]:
total_data_df.head()
Out[4]:
In [5]:
ind1 = Indicator(indf.z_score, indf.z_score_vec, q_levels=arange(-2.0, 2.0, 0.2).tolist(), data_df=data_df)
ind2 = Indicator(indf.rsi, indf.rsi_vec, q_levels=arange(-2.0, 2.0, 0.2).tolist(), data_df=data_df)
ind3 = Indicator(indf.on_volume_balance, indf.on_volume_balance_vec, q_levels=arange(-2.0, 2.0, 0.2).tolist(), data_df=data_df)
indicators = {'ind1': ind1,
'ind2': ind2,
'ind3': ind3}
In [6]:
from recommender.environment import Environment
INITIAL_CAP = 100
env = Environment(total_data_df, indicators=indicators, initial_cap=INITIAL_CAP)
In [7]:
from recommender.agent import Agent
num_states = (len(ind1.q_levels)+1) * (len(ind2.q_levels)+1) * (len(ind3.q_levels)+1)
num_actions = 3 # Buy or sell all, or do nothing.
agent = Agent(num_states=num_states,
num_actions=num_actions,
random_actions_rate=0.98,
random_actions_decrease=0.999,
dyna_iterations=20)
In [8]:
from recommender.order import Order
action_0 = [Order(['AAPL',Order.BUY, 100])]
action_1 = [Order(['AAPL',Order.SELL, 100])]
action_2 = [Order(['AAPL',Order.NOTHING, 0])]
actions = {0: action_0,
1: action_1,
2: action_2,}
In [9]:
data_df.index[252]
Out[9]:
In [10]:
# Initialization
env.portfolio.set_current_date(data_df.index[252])
initial_state = env.get_state()
action = agent.play_learned_response(initial_state)
In [11]:
actions[action]
Out[11]:
In [12]:
import time
# TODO: Change N_iters to the whole period of data_df
# TODO: copy the code to a function that can be called from an outside loop.
N_iters = 5000
tic = time.time()
for i in range(N_iters):
# while X
reward, new_state = env.get_consequences(actions[action])
action = agent.play(reward, new_state)
# Show results
#print('date={}\n'.format(env.portfolio.current_date))
#print('reward={} \n\nnew_state={} \n\naction=\n{}'.format(reward, new_state, actions[action]))
#print('-'*70 + '\n\n')
#time.sleep(0.5)
toc = time.time()
print('Elapsed time: {}s'.format(toc-tic))
In [13]:
env.portfolio.get_positions()
Out[13]:
In [14]:
env.portfolio.get_positions()['value'].sum()
Out[14]:
In [15]:
env.portfolio.current_date
Out[15]:
In [16]:
(data_df.loc[env.portfolio.current_date] / data_df.iloc[0])['Close']
Out[16]:
In [17]:
env.portfolio.get_positions()['value'].sum() / INITIAL_CAP
Out[17]:
In [18]:
def simulate_one_step(action):
reward, new_state = env.get_consequences(actions[action])
return agent.play(reward, new_state)
In [19]:
from recommender.indicator import Indicator
import recommender.indicator_functions as indf
def create_indicators(data_df):
"""
Particular function to create a series of indicators.
To remove one just comment it's line.
"""
indicators = {}
indicators['z_score'] = Indicator(indf.z_score, indf.z_score_vec, q_levels=arange(-2.0, 2.0, 0.4).tolist(), data_df=data_df)
indicators['rsi'] = Indicator(indf.rsi, indf.rsi_vec, q_levels=arange(-2.0, 2.0, 0.4).tolist(), data_df=data_df)
indicators['on_volume_balance'] = Indicator(indf.on_volume_balance, indf.on_volume_balance_vec, q_levels=arange(-2.0, 2.0, 0.4).tolist(), data_df=data_df)
return indicators
In [20]:
def get_num_states(indicators):
acum = 1
for ind in indicators.values():
acum *= len(ind.q_levels) + 1
return acum
In [21]:
print(num_states)
get_num_states(indicators)
Out[21]:
In [22]:
current_price = data_df['Close'].iloc[-1]
current_price
Out[22]:
In [23]:
capital = 10000
In [24]:
import recommender.portfolio as pf
positions = env.portfolio.get_positions()
positions.loc[SYMBOL, pf.SHARES]
Out[24]:
In [25]:
self = env.portfolio
current_price = self.close_df.loc[self.current_date][SYMBOL]
current_price
Out[25]:
In [26]:
self.get_total_value()
Out[26]:
In [27]:
np.round(-1.2)
Out[27]:
In [28]:
target_fraction = 0.3
In [29]:
np.round(self.get_total_value()*target_fraction/current_price)
Out[29]:
In [30]:
# Instance function of the class portfolio (positions are previously defined in the instance)
def order_target(target_fraction):
current_price = self.close_df.loc[self.current_date][SYMBOL]
return np.round(self.get_total_value()*target_fraction/current_price)
In [31]:
order_target(0.3)
Out[31]:
In [32]:
order_target(0.7)
Out[32]:
In [33]:
self = env
In [34]:
from recommender.quantizer import Quantizer
actions_fractions = Quantizer(np.arange(-1.0, 1.0, 0.1).round(decimals=3).tolist())
In [35]:
from recommender.portfolio import SHARES
previous_shares = self.portfolio.get_positions().loc[SYMBOL, SHARES]
previous_shares
Out[35]:
In [36]:
# Instance function of the class Environment (positions are previously defined in the instance)
def act_to_target(target_fraction):
current_price = self.portfolio.close_df.loc[self.portfolio.current_date][self.symbol]
wanted_shares = np.fix(self.portfolio.get_total_value() * \
actions_fractions.get_quantized_value(target_fraction)/current_price)
previous_shares = self.portfolio.get_positions().loc[self.symbol, pf.SHARES]
shares_increase = wanted_shares - previous_shares
action = [Order([self.symbol, Order.BUY, shares_increase])]
return self.get_consequences(action)
In [37]:
print(act_to_target(0.61))
print(env.portfolio.get_positions())
print(env.portfolio.get_positions().loc[self.symbol, pf.VALUE] / env.portfolio.get_total_value())
In [38]:
print(env.actions_fractions.q_levels)
len(env.actions_fractions.q_levels)
Out[38]:
In [ ]:
In [63]:
from recommender.environment import Environment
from recommender.agent import Agent
from logging import Logger
INITIAL_CAP = 10000
symbol = 'AAPL'
log = Logger('simulate_period')
log.setLevel('DEBUG')
# Initialization
total_data_df = pd.read_pickle('../../data/data_df.pkl')
data_df = total_data_df[symbol].unstack()
indicators = create_indicators(data_df)
env = Environment(total_data_df, indicators=indicators, initial_cap=INITIAL_CAP)
env.portfolio.set_current_date(data_df.index[252])
num_states = get_num_states(indicators)
num_actions = len(env.actions_fractions.q_levels) # All the possible fractions of total value
agent = Agent(num_states=num_states,
num_actions=num_actions,
random_actions_rate=0.98,
random_actions_decrease=0.999,
dyna_iterations=20)
In [64]:
agent.Q.shape
Out[64]:
In [65]:
# Loop and play
N_iters = data_df.shape[0]
fraction_index = 0
In [93]:
print('date={}\n'.format(env.portfolio.current_date))
print(env.portfolio.get_positions())
print('-'*70 + '\n\n')
reward, new_state = env.get_consequences_from_fraction_index(fraction_index)
print('date={}\n'.format(env.portfolio.current_date))
print('reward = {} \n\nnew_state = {} \n\naction = {} ({})'.format(reward,
new_state,
fraction_index,
env.actions_fractions.interval_to_value(fraction_index)))
pos = env.portfolio.positions_df
print(env.portfolio.get_positions())
print(pos.loc[symbol, 'value'] / pos['value'].sum())
print('-'*70 + '\n\n')
fraction_index = agent.play(reward, new_state)
In [ ]:
In [55]:
env.actions_fractions.q_levels
Out[55]:
In [62]:
env.act_to_target(0.8)
pos = env.portfolio.get_positions()
print(pos)
print(pos.loc[symbol, 'value'] / pos['value'].sum())
print('-'*70 + '\n\n')
In [116]:
def initialize_env(data_df, symbol):
# Initialization
total_data_df = pd.read_pickle('../../data/data_df.pkl')
data_df = total_data_df[symbol].unstack()
indicators = create_indicators(data_df)
env = Environment(total_data_df, indicators=indicators, initial_cap=INITIAL_CAP)
env.portfolio.set_current_date(data_df.index[252])
num_states = get_num_states(indicators)
num_actions = len(env.actions_fractions.q_levels) # All the possible fractions of total value
return env, num_states, num_actions
In [203]:
from recommender.environment import Environment
from recommender.agent import Agent
import sys
INITIAL_CAP = 10000
def simulate_period(data_df, symbol, agent, other_env=None, verbose=False):
"""
Simulate the market and one Agent for the entire period.
data_df format is like below:
feature Close High Low Open Volume
date
1993-01-29 2.12 2.19 2.11 0.0 66525200.0
1993-02-01 2.19 2.19 2.12 0.0 60138400.0
1993-02-02 2.15 2.20 2.15 0.0 45584000.0
"""
if other_env is None:
env, num_states, num_actions = initialize_env(data_df, symbol)
else:
env = other_env
# Loop and play
N_iters = data_df.shape[0]
fraction_index = 0
recorded_stock_value = {}
recorded_cash_value = {}
for i in range(N_iters):
reward, new_state = env.get_consequences_from_fraction_index(fraction_index)
if verbose:
print('date={}\n'.format(env.portfolio.current_date))
print('reward = {} \n\nnew_state = {} \n\naction = {} ({})'.format(reward,
new_state,
fraction_index,
env.actions_fractions.interval_to_value(fraction_index)))
pos = env.portfolio.positions_df
print(env.portfolio.get_positions())
print(pos.loc[symbol, 'value'] / pos['value'].sum())
print('-'*70 + '\n\n')
pos = env.portfolio.positions_df
recorded_stock_value[env.portfolio.current_date] = pos.loc[symbol, 'value']
recorded_cash_value[env.portfolio.current_date] = pos.loc['CASH', 'value']
fraction_index = agent.play(reward, new_state)
if i%100 == 0:
sys.stdout.write('\rIteration {}/{}'.format(i, N_iters))
return pd.DataFrame({'stock_value':recorded_stock_value, 'cash':recorded_cash_value})
In [204]:
env, num_states, num_actions = initialize_env(data_df, symbol)
agent = Agent(num_states=num_states,
num_actions=num_actions,
random_actions_rate=0.98,
random_actions_decrease=0.999,
dyna_iterations=20)
In [205]:
values = simulate_period(data_df, symbol, agent, other_env=env)
In [156]:
print(type(value_eval(pd.DataFrame(values))))
len(value_eval(pd.DataFrame(values)))
Out[156]:
In [227]:
print(values.shape)
values.head()
Out[227]:
In [208]:
from utils.analysis import value_eval
total_value = values.sum(axis=1)
print('Sharpe ratio: {}\nCum. Ret.: {}\nAVG_DRET: {}\nSTD_DRET: {}\nFinal value: {}'.format(*value_eval(pd.DataFrame(total_value))))
In [352]:
values_df = simulate_period(data_df, symbol, agent)
total_value = values_df.sum(axis=1)
value_eval(pd.DataFrame(total_value))
Out[352]:
In [353]:
total_value.plot()
Out[353]:
In [354]:
total_value[-1]
Out[354]:
In [355]:
print(total_value.shape)
total_value.head()
Out[355]:
In [356]:
total_value.index[0]
Out[356]:
In [357]:
initial_date = total_value.index[0]
compare_results = data_df.loc[initial_date:, 'Close'].copy()
In [358]:
compare_results.shape
Out[358]:
In [359]:
compare_results.name='AAPL'
compare_results_df = pd.DataFrame(compare_results)
compare_results_df['portfolio'] = total_value
compare_results_df.head()
Out[359]:
In [360]:
std_comp_df = compare_results_df / compare_results_df.iloc[0]
std_comp_df.plot()
Out[360]:
In [361]:
value_eval(pd.DataFrame(compare_results_df['AAPL']))
Out[361]:
In [ ]:
In [362]:
a_dict = {'col1':{'index1':89, 'index2':98}, 'col2':{'index1':90, 'index2':80}}
In [363]:
pd.DataFrame(a_dict)
Out[363]:
In [364]:
pd.DataFrame(a_dict).sum(axis=1)
Out[364]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: