In [1]:
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
from time import time
from sklearn.metrics import r2_score, median_absolute_error
%matplotlib inline
%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)
%load_ext autoreload
%autoreload 2
sys.path.append('../../')
In [2]:
levels = [-13.5, -10.0, -1.0, 2.0, 3.0]
In [3]:
real_value = -6.7
temp_list = levels + [real_value]
temp_list
Out[3]:
In [4]:
temp_list.sort()
temp_list
Out[4]:
In [5]:
sorted_index = temp_list.index(real_value)
if sorted_index == 0:
q_value = levels[0]
elif sorted_index == len(temp_list)-1:
q_value = levels[-1]
else:
q_value = (temp_list[sorted_index-1] + temp_list[sorted_index+1])/2
q_value
Out[5]:
In [6]:
def quantize(real_value, levels):
temp_list = levels + [real_value]
temp_list.sort()
sorted_index = temp_list.index(real_value)
if sorted_index == 0:
q_value = levels[0]
elif sorted_index == len(temp_list)-1:
q_value = levels[-1]
else:
q_value = (temp_list[sorted_index-1] + temp_list[sorted_index+1])/2
return q_value
In [7]:
levels
Out[7]:
In [8]:
x = arange(-20,20,0.2)
x_df = pd.DataFrame(x, columns=['real_value'])
x_df
Out[8]:
In [9]:
len(x_df.values.tolist())
Out[9]:
In [10]:
from functools import partial
# x_df.apply(lambda x:print('{} \n {}'.format(x,'-'*20)), axis=1)
x_df['q_value'] = x_df.apply(lambda x: partial(quantize, levels=levels)(x[0]), axis=1)
x_df.head()
Out[10]:
In [11]:
plt.plot(x_df['real_value'], x_df['q_value'])
Out[11]:
In [12]:
data_df = pd.read_pickle('../../data/data_df.pkl')
In [13]:
first_date = data_df.index.get_level_values(0)[0]
first_date
Out[13]:
In [14]:
one_input_df = data_df.loc[first_date,:]
one_input_df
Out[14]:
Normally, the data to pass to the extractor will be all the data, for one symbol, during a period of some days.
In [15]:
num_days = 50
end_date = data_df.index.get_level_values(0).unique()[num_days-1]
In [16]:
sym_data = data_df['MSFT'].unstack()
sym_data.head()
Out[16]:
In [17]:
batch_data = sym_data[first_date:end_date]
batch_data.shape
Out[17]:
In [18]:
from recommender.indicator import Indicator
In [19]:
arange(0,1e4,1)
Out[19]:
In [20]:
batch_data.head()
Out[20]:
In [21]:
ind1 = Indicator(lambda x: x['Close'].mean(), arange(0,10000,100).tolist(), batch_data)
In [ ]:
ind1.extracted_data
In [ ]:
ind1.extract(batch_data)
In [ ]:
ind1.q_levels
Another Indicator
In [ ]:
ind2 = Indicator(lambda x: (x['Volume']/x['Close']).max(), arange(0,1e8,1e6).tolist(), batch_data)
In [ ]:
ind2.extract(batch_data)
In [ ]:
(batch_data['Volume']/batch_data['Close']).max()
In [ ]:
ind3 = Indicator(lambda x: x['High'].min(), arange(0,100,1).tolist(), batch_data)
In [ ]:
ind3.extract(batch_data)
In [ ]:
indicators = [ind1, ind2, ind3]
In [ ]:
vect_state = tuple(map(lambda x: x.extract(batch_data), indicators))
vect_state
Let's generate the q_values for the q_levels
In [ ]:
len(ind1.q_levels)
In [ ]:
q_values = [ind1.q_levels[0]] + ((np.array(ind1.q_levels[1:]) + np.array(ind1.q_levels[:-1])) / 2).tolist() + [ind1.q_levels[-1]]
q_values[:10]
In [ ]:
len(q_values)
In [ ]:
len(ind1.q_levels)
In [ ]:
import itertools as it
In [ ]:
states_list = list(it.product(np.arange(len(ind1.q_levels)), np.arange(len(ind2.q_levels)), np.arange(len(ind3.q_levels))))
In [ ]:
len(states_list)
In [ ]:
states_list
In [ ]:
states_list.index((5,1,13))
In [ ]:
indicators = {'ind1': ind1,
'ind2': ind2,
'ind3': ind3}
In [ ]:
states = list(it.product(*map(lambda x: arange(len(x.q_levels)), indicators.values())))
In [ ]:
states
In [ ]:
len(states)
In [ ]:
states.index((5,1,13))
In [ ]:
states_list = states.copy()
In [ ]:
state_vectors = dict(enumerate(states_list))
state_vectors
In [ ]:
states = dict(zip(state_vectors.values(), state_vectors.keys()))
states
In [ ]:
import random
In [ ]:
index = random.randint(0, len(states))
index
In [ ]:
states[state_vectors[index]] == index
In [ ]:
state_vectors[states[state_vectors[index]]] == state_vectors[index]
In [ ]:
rand_vec = tuple(np.random.randint(0,100,3))
rand_vec
In [ ]:
state_vectors[states[rand_vec]] == rand_vec
In [ ]:
from recommender.environment import Environment
from recommender.order import Order
In [ ]:
env = Environment(data_df, indicators)
In [ ]:
env.state_vectors
In [ ]:
len(env.indicators['ind2'].q_levels)
In [ ]:
env.states[(1,100,1)]
In [ ]:
old_pos_df = env.portfolio.get_positions()
reward, new_state = env.get_consequences([Order(['AAPL',Order.BUY, 100]),
Order(['AAPL',Order.SELL, 45]),
Order(['AAPL', Order.BUY, 10])])
new_pos_df = env.portfolio.get_positions()
In [ ]:
old_pos_df
In [ ]:
new_pos_df
In [ ]:
import recommender.portfolio as port
def reward_value_change(old_pos_df, new_pos_df):
return new_pos_df[port.VALUE].sum() - old_pos_df[port.VALUE].sum()
In [ ]:
def reward_cash_change(old_pos_df, new_pos_df):
return new_pos_df.loc[port.CASH, port.VALUE] - old_pos_df.loc[port.CASH, port.VALUE]
In [ ]:
reward_value_change(old_pos_df, new_pos_df)
In [ ]:
reward_cash_change(old_pos_df, new_pos_df)
In [ ]:
data_df.shape
In [ ]:
from recommender.agent import Agent
In [ ]:
NUM_ACTIONS = 3 # BUY, SELL, NOTHING
SYMBOL = 'AAPL'
indicators = {'ind1':}
env = Environment(data_df, indicators=indicators, symbol=SYMBOL)
agent = Agent(num_states,
num_actions,
alpha=0.2,
gamma=0.9,
random_actions_rate=0.9,
random_actions_decrease=0.999,
dyna_iterations=0,
verbose=False)
(self,
data_df,
indicators=None,
initial_cap=1000,
leverage_limit=3.0,
reward_fun=None,
symbol='AAPL')
In [ ]: