In [55]:
dfMain2 = pd.DataFrame(index=dates)
# dfMain = dfMain.join(dfSPY)
dfMain2 = dfMain2.join(dfJPM)
dfMain2.dropna(inplace=True)
print("Inspect missing values:")
display(dfMain2.isnull().sum())
print(len(dfMain2))
In [56]:
# Adjust Open, High, Low, Volume
dfMain2['Adj Factor'] = dfMain2['Adj Close'] / dfMain2['Close']
dfMain2['Open'] = dfMain2['Open'] * dfMain2['Adj Factor']
dfMain2['High'] = dfMain2['High'] * dfMain2['Adj Factor']
dfMain2['Low'] = dfMain2['Low'] * dfMain2['Adj Factor']
dfMain2['Volume'] = dfMain2['Volume'] / dfMain2['Adj Factor']
dfMain2.drop(['Close', 'Adj Factor'], axis=1, inplace=True)
In [57]:
display(dfMain2.head())
In [58]:
feature_days = 21 * 6
# Price Engineering
for i in range(feature_days):
# Get opens
dfMain2['-' + str(i + 1) + 'd_Open'] = dfMain2['Open'].shift(i + 1)
# Get adjCloses
dfMain2['-' + str(i + 1) + 'd_adjClose'] = dfMain2['Adj Close'].shift(i + 1)
# Get Highs
dfMain2['-' + str(i + 1) + 'd_High'] = dfMain2['High'].shift(i + 1)
# Get Lows
dfMain2['-' + str(i + 1) + 'd_Low'] = dfMain2['Low'].shift(i + 1)
# TODO: remove -xd_Open, -xd_adjClose, -xd_High, -xd_Low, x = range(1, feature_days + 1)
period_list = [21*x for x in range(1, 13)] # Create relative bases
period_list.extend([5, 10]) # Add 1, 2 week comparison basese
print(period_list)
for x in period_list:
# Get Max volumes
dfMain2[str(x) + 'd_Max_Vol'] = dfMain2['Volume'].rolling(window=x).max()
# Get Avg volumes
dfMain2[str(x) + 'd_Avg_Vol'] = dfMain2['Volume'].ewm(span=x).mean()
# Get Min volumes
dfMain2[str(x) + 'd_Min_Vol'] = dfMain2['Volume'].rolling(window=x).min()
# TODO: remove xd_Max_Vol, xd_Avg_Vol, xd_Min_Vol, for x in period_list
dfMain2['Abs_Spread'] = np.abs(dfMain2['Adj Close'] - dfMain2['Open'])
# dfMain2['Abs_Spread_Shift1'] = dfMain2['Abs_Spread'].shift()
for x in period_list:
# Get Max spreads
dfMain2[str(x) + 'd_Max_Spread'] = dfMain2['Abs_Spread'].rolling(window=x).max()
# Get Avg spreads
dfMain2[str(x) + 'd_Avg_Spread'] = dfMain2['Abs_Spread'].ewm(span=x).mean()
# TODO: remove xd_Max_Spread, xd_Avg_Spread, for x in period_list
dfMain2.drop(['Abs_Spread'], axis=1, inplace=True)
display(dfMain2.head())
In [59]:
# Volume Engineering
start_time = time.time()
for i in range(feature_days):
# Get volumes
dfMain2['-' + str(i + 1) + 'd_Vol'] = dfMain2['Volume'].shift(i + 1)
# Get relative volumes
for x in period_list:
dfMain2['-' + str(i + 1) + 'd_Vol_' + str(x) + 'Max'] = dfMain2['-' + str(i + 1) + 'd_Vol'] / dfMain2[str(x) + 'd_Max_Vol']
dfMain2['-' + str(i + 1) + 'd_Vol_' + str(x) + 'Avg'] = dfMain2['-' + str(i + 1) + 'd_Vol'] / dfMain2[str(x) + 'd_Avg_Vol']
dfMain2['-' + str(i + 1) + 'd_Vol_' + str(x) + 'Min'] = dfMain2['-' + str(i + 1) + 'd_Vol'] / dfMain2[str(x) + 'd_Min_Vol']
print("Generating volume features took {} seconds.".format(time.time() - start_time))
In [60]:
# Spread Engineering
start_time = time.time()
for i in range(feature_days):
# Get spread
dfMain2['-' + str(i + 1) + 'd_Spread'] = dfMain2['-' + str(i + 1) + 'd_adjClose'] - dfMain2['-' + str(i + 1) + 'd_Open']
# Get relative spread
for x in period_list:
dfMain2['-' + str(i + 1) + 'd_Spread_' + str(x) + 'Max'] = dfMain2['-' + str(i + 1) + 'd_Spread'] / dfMain2[str(x) + 'd_Max_Spread']
dfMain2['-' + str(i + 1) + 'd_Spread_' + str(x) + 'Vol'] = dfMain2['-' + str(i + 1) + 'd_Spread'] / dfMain2[str(x) + 'd_Avg_Spread']
# dfMain2['-' + str(i + 1) + 'd_Spread_' + str(x) + 'Min'] = dfMain2['-' + str(i + 1) + 'd_Spread'] / dfMain2[str(x) + 'd_Min_Spread']
print("Generating spread features took {} seconds.".format(time.time() - start_time))
In [61]:
# Level Engineering
start_time = time.time()
for x in period_list:
# Get Max adjClose
dfMain2[str(x) + 'd_Max_Price'] = dfMain2['Adj Close'].rolling(window=x).max()
# Get Avg adjClose
dfMain2[str(x) + 'd_Avg_Price'] = dfMain2['Adj Close'].ewm(span=x).mean()
# Get Min adjClose
dfMain2[str(x) + 'd_Min_Price'] = dfMain2['Adj Close'].rolling(window=x).min()
# Get Std adjClose
dfMain2[str(x) + 'd_Std_Price'] = dfMain2['Adj Close'].ewm(span=x).std()
# TODO: remove xd_Max_Price, xd_Avg_Price, xd_Min_Price. Retain xd_Std_Price for x in period_list
for i in range(feature_days):
# Get relative price
for x in period_list:
dfMain2['-' + str(i + 1) + 'd_Price_' + str(x) + 'Max'] = dfMain2['-' + str(i + 1) + 'd_adjClose'] / dfMain2[str(x) + 'd_Max_Price']
dfMain2['-' + str(i + 1) + 'd_Price_' + str(x) + 'Vol'] = dfMain2['-' + str(i + 1) + 'd_adjClose'] / dfMain2[str(x) + 'd_Avg_Price']
dfMain2['-' + str(i + 1) + 'd_Price_' + str(x) + 'Min'] = dfMain2['-' + str(i + 1) + 'd_adjClose'] / dfMain2[str(x) + 'd_Min_Price']
print("Generating level features took {} seconds.".format(time.time() - start_time))
In [62]:
def upperwick(open, adj_close, high):
if high > open and high > adj_close:
return True
else:
return False
def lowerwick(open, adj_close, low):
if low < open and low < adj_close:
return True
else:
return False
# Get wicks - new code has 10X speed!
start_time = time.time()
for i in range(feature_days):
dfMain2.ix[:, '-' + str(i + 1) + 'd_upperwick_bool'] = dfMain2.apply(lambda row: upperwick(row['-' + str(i + 1) + 'd_Open'], row['-' + str(i + 1) + 'd_adjClose'], row['-' + str(i + 1) + 'd_High']), axis=1)
dfMain2.ix[:, '-' + str(i + 1) + 'd_lowerwick_bool'] = dfMain2.apply(lambda row: lowerwick(row['-' + str(i + 1) + 'd_Open'], row['-' + str(i + 1) + 'd_adjClose'], row['-' + str(i + 1) + 'd_Low']), axis=1)
# TODO: remove -xd_upperwick_bool, -xd_lowerwick_bool, x in range(1, feature_days + 1)
print("Getting wicks took {} seconds.".format(time.time() - start_time))
In [63]:
def get_upperwick_length(open, adj_close, high):
return high - max(open, adj_close)
def get_lowerwick_length(open, adj_close, low):
return min(open, adj_close) - low
start_time = time.time()
# Transform upper wicks
for i in range(feature_days):
has_upperwicks = dfMain2['-' + str(i + 1) + 'd_upperwick_bool']
has_lowerwicks = dfMain2['-' + str(i + 1) + 'd_lowerwick_bool']
dfMain2.loc[has_upperwicks, '-' + str(i + 1) + 'd_upperwick'] = dfMain2.loc[has_upperwicks, :].apply(lambda row: get_upperwick_length(row['-' + str(i + 1) + 'd_Open'], row['-' + str(i + 1) + 'd_adjClose'], row['-' + str(i + 1) + 'd_High']), axis=1)
dfMain2.loc[has_lowerwicks, '-' + str(i + 1) + 'd_lowerwick'] = dfMain2.loc[has_lowerwicks, :].apply(lambda row: get_lowerwick_length(row['-' + str(i + 1) + 'd_Open'], row['-' + str(i + 1) + 'd_adjClose'], row['-' + str(i + 1) + 'd_Low']), axis=1)
# Get relative upperwick length
dfMain2.loc[dfMain2['-' + str(i + 1) + 'd_upperwick_bool'], '-' + str(i + 1) + 'd_upperwick'] = dfMain2.loc[dfMain2['-' + str(i + 1) + 'd_upperwick_bool'], '-' + str(i + 1) + 'd_upperwick'] / dfMain2.loc[dfMain2['-' + str(i + 1) + 'd_upperwick_bool'], '126d_Avg_Spread']
# Get relative lowerwick length
dfMain2.loc[dfMain2['-' + str(i + 1) + 'd_lowerwick_bool'], '-' + str(i + 1) + 'd_lowerwick'] = dfMain2.loc[dfMain2['-' + str(i + 1) + 'd_lowerwick_bool'], '-' + str(i + 1) + 'd_lowerwick'] / dfMain2.loc[dfMain2['-' + str(i + 1) + 'd_lowerwick_bool'], '126d_Avg_Spread']
# Assign 0 to no-upperwick days
dfMain2.loc[np.logical_not(dfMain2['-' + str(i + 1) + 'd_upperwick_bool']), '-' + str(i + 1) + 'd_upperwick'] = 0
# Assign 0 to no-lowerwick days
dfMain2.loc[np.logical_not(dfMain2['-' + str(i + 1) + 'd_lowerwick_bool']), '-' + str(i + 1) + 'd_lowerwick'] = 0
print("Transforming wicks took {} seconds.".format(time.time() - start_time))
In [64]:
dfMain2['Trade Price'] = dfMain2['Adj Close']
print(dfMain2[['Trade Price', 'Open', 'Adj Close']].head())
In [65]:
print(dfMain2.shape)
In [66]:
# Remove raw features
# raw_features = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
raw_features = []
# Remove vol comparison base
# vol_compare_type = ['Max', 'Avg', 'Min']
vol_compare_features = []
# Remove vol meta features
vol_meta = []
# for d in range(1, feature_days + 1):
# vol_meta.append('-' + str(d) + 'd_Vol')
# Remove spread comparison base
# sprd_compare_type = ['Max', 'Avg']
sprd_compare_features = []
# for d in period_list:
# for t in vol_compare_type:
# vol_compare_features.append(str(d) + 'd_' + t + '_Vol')
# for u in sprd_compare_type:
# sprd_compare_features.append(str(d) + 'd_' + u + '_Spread')
# Remove spread meta features
# price_raw = ['Open', 'adjClose', 'High', 'Low']
spread_meta = []
# for d in range(1, feature_days + 1):
# for t in price_raw:
# spread_meta.append('-' + str(d) + 'd_' + t)
# Remove price comparison base
# price_compare_type = vol_compare_type
price_compare_features = []
# for d in period_list:
# for t in price_compare_type:
# price_compare_features.append(str(d) + 'd_' + t + '_Price')
# Remove wick bools
wick_type = ['upperwick', 'lowerwick']
wick_bools = []
for d in range(1, feature_days + 1):
for t in wick_type:
wick_bools.append('-' + str(d) + 'd_' + t + '_bool')
drop_list = []
drop_list = drop_list + raw_features + vol_compare_features + vol_meta + sprd_compare_features + spread_meta + price_compare_features + wick_bools
dfMain2.drop(drop_list, axis=1, inplace=True)
In [67]:
from copy import deepcopy
new_data_full = deepcopy(dfMain2)
new_data_full.dropna(inplace=True)
In [68]:
display(new_data_full.head())
In [69]:
# Add derivatives
# Add 1st derivatives
diff1_temp = new_data_full.ix[:, :-1] - new_data_full.ix[:, :-1].shift()
diff1 = diff1_temp.add_suffix('_diff1')
# Add 2nd derivatives
diff2 = diff1_temp - diff1_temp.shift()
diff2 = diff2.add_suffix('_diff2')
# Concatenate all dataframes
trade_price = new_data_full['Trade Price']
new_data_full = pd.concat([new_data_full, diff1, diff2], axis=1)
new_data_full['Trade Price'] = trade_price
new_data_full.dropna(inplace=True)
In [70]:
trade_price_idx = new_data_full.columns.get_loc("Trade Price")
cols = new_data_full.columns.tolist()
cols = cols[:trade_price_idx] + cols[trade_price_idx + 1:] + [cols[trade_price_idx]]
new_data_full = new_data_full.reindex(columns=cols)
In [71]:
def split_data(df):
df_features = df.ix[:, :-1]
df_labels = df.ix[:, -1]
return df_features, df_labels
df_features, df_labels = split_data(new_data_full)
In [72]:
# Normalization
def normalization(X_train, X_test):
X_test_norm = (X_test - X_train.mean()) / (X_train.max() - X_train.min())
return X_test_norm
df_features_norm = normalization(df_features, df_features)
In [73]:
from sklearn.decomposition import PCA
start_time = time.time()
# Choose n in PCA
pca_dim = 1000
pca = PCA(n_components=pca_dim)
pca.fit(df_features_norm) # all data
cp_imp = pca.explained_variance_ratio_
cp_imp = pd.Series(cp_imp)
cp_imp_cum = cp_imp.cumsum()
cp_imp_cum.plot()
print("Running PCA took {} seconds".format(time.time() - start_time))
In [75]:
# PCA Fit and Transformation and DF Reconstruction
def pca_transform_reconstruct(fit_data, trans_data, pca_dim):
pca = PCA(n_components=pca_dim)
pca.fit(fit_data)
data_pca = pca.transform(trans_data)
data_pca = pd.DataFrame(data=data_pca)
data_pca['Date'] = trans_data.index
data_pca.set_index(data_pca['Date'], inplace=True)
del data_pca.index.name
del data_pca['Date']
return data_pca
df_features_pca = pca_transform_reconstruct(df_features_norm, df_features_norm, 300)
In [76]:
df_features = normalization(df_features_pca, df_features_pca)
In [77]:
new_data_full_pca = df_features
new_data_full_pca['Trade Price'] = df_labels
In [78]:
validation_start_date = datetime(2006, 9, 25)
validation_end_date = datetime(2011, 9, 27)
test_start_date = datetime(2011, 9, 26)
test_end_date = datetime(2016, 9, 27)
print("Validation phase")
print("{0} Trade Price: {1}".format(validation_start_date, new_data_full_pca.ix[validation_start_date, 'Trade Price']))
print("{0} Trade Price: {1}".format(validation_end_date, new_data_full_pca.ix[validation_end_date, 'Trade Price']))
validation_phase_data = new_data_full_pca.ix[validation_start_date:validation_end_date, :]
print("Number of dates in validation dataset: {}\n".format(len(validation_phase_data)))
print("Test phase")
print("{0} Trade Price: {1}".format(test_start_date, new_data_full_pca.ix[test_start_date, 'Trade Price']))
print("{0} Trade Price: {1}".format(test_end_date, new_data_full_pca.ix[test_end_date, 'Trade Price']))
test_phase_data = new_data_full_pca.ix[test_start_date:test_end_date, :]
print("Number of dates in test dataset: {}".format(len(test_phase_data)))
In [80]:
class MonkeyBot(object):
def __init__(self, dfEnv, cash=1000, share=0, pv=0, random_state=0):
random.seed(random_state)
np.random.seed(random_state)
self.cash = cash
self.share = share
self.pv = pv
self.asset_history_list = []
self.action_list = []
self.env = deepcopy(dfEnv)
def buy(self, stock_price, cost, fold=1):
if self.cash < stock_price:
self.hold(stock_price)
else:
num_affordable = int(self.cash // stock_price)
buy_amount = int(num_affordable // fold)
self.cash = self.cash - stock_price * buy_amount
self.share = self.share + buy_amount
self.pv = stock_price * self.share
# Adding transaction cost
self.trading_cost(buy_amount, cost)
# Append action to action list
self.action_list.append('Buy')
def sell(self, stock_price, cost, fold=1):
if self.share == 0:
self.hold(stock_price)
else:
sell_amount = int(self.share // fold)
self.cash = self.cash + stock_price * sell_amount
self.pv = 0
self.share = 0
# Adding transaction cost
self.trading_cost(sell_amount, cost)
self.action_list.append('Sell')
def hold(self, stock_price):
self.pv = stock_price * self.share
def trading_cost(self, trading_amount, cost):
if cost is None:
pass
elif cost == 'low':
if trading_amount * 0.01 < 1.99:
self.cash = self.cash - 1.99
else:
self.cash = self.cash - trading_amount * 0.01
elif cost == 'medium':
if trading_amount * 0.01 < 5:
self.cash = self.cash - 5
else:
self.cash = self.cash - trading_amount * 0.01
elif cost == 'high':
if trading_amount * 0.01 < 7:
self.cash = self.cash - 7
else:
self.cash = self.cash - trading_amount * 0.01
else:
raise ValueError("Invalid cost parameter!")
def reset(self):
self.cash = 1000
self.share = 0
self.pv = 0
def make_decision(self, x, cost):
random_choice = random.choice([1, 2])
if random_choice == 0:
self.hold(x)
elif random_choice == 1:
self.buy(x, cost)
elif random_choice == 2:
self.sell(x, cost)
else:
raise ValueError("Invalid choice!")
return self.pv # for frame-wise operation
def simulate(self, iters, cost=None):
start_time = time.time()
for i in range(iters):
for index, row in self.env.iterrows():
self.make_decision(row['Trade Price'], cost)
self.asset_history_list.append(self.pv + self.cash)
self.reset()
print("{0} iterations took {1} seconds".format(iters, time.time() - start_time))
return self.asset_history_list, self.action_list
In [81]:
pd.Series(monkey_full.asset_history_list).describe()
In [82]:
import sys
class ChimpBot(MonkeyBot):
"""An agent that learns to drive in the smartcab world."""
def __init__(self, dfEnv, iter_random_rounds, gamma, random_state=0, test_mode=False, cash=1000, share=0, pv=0):
super(ChimpBot, self).__init__(dfEnv, iter_random_rounds, cash, share, pv)
# From MonkeyBot:
# sets self.cash = 1000
# sets self.share = 0
# sets self.pv = 0
# sets self.pv_history_list = []
# sets self.env = dfEnv
# implements buy(self, stock_price)
# implements sell(self, stock_price)
# implements hold(self)
# Set random state
self.random_state = random_state
random.seed(self.random_state)
np.random.seed(self.random_state)
# Chimp parameters
self.valid_actions = ['Buy', 'Sell']
self.gamma = gamma # Discount factor
self.epsilon = 1 # Exploration-exploitation
self.test_mode = test_mode
self.random_rounds = iter_random_rounds # Number of rounds where the bot chooses to go monkey
self.num_features = len(dfEnv.columns) # Use every columns from the input data
# Turn input data into index, row
self.iter_env = self.env.iterrows()
self.now_env_index, self.now_row = self.iter_env.next()
# Numpy alternative
# self.env_arr = self.env.values
# self.now_row = 0
# May need to put back later
# self.prev_cash = self.cash
# self.prev_share = self.share
# self.prev_pv = self.pv
# Q-table and q_df
self.q_df_columns = list(self.env.columns)
self.q_df_columns.extend(['Action', 'Q Value'])
self.q_df = pd.DataFrame(columns=self.q_df_columns)
self.q_dict = defaultdict(lambda: (0, 0)) # element of q_dict is (state, act): (q_value, t)
self.q_dict_analysis = defaultdict(lambda: (0, 0))
# Misc
self.reset_counter = 0
def make_q_df(self):
"""Make a q_df out of the q_dict."""
print("Making q_df...")
result_dict = defaultdict(list)
for index, row in self.q_dict.iteritems():
for i in range(len(self.q_dict.keys()[0])):
column_name = 'col' + str(i + 1)
result_dict[column_name].append(index[i])
result_dict['Q'].append(self.q_dict[index][0])
self.q_df = pd.DataFrame(result_dict)
q_df_column_list = ['col' + str(x) for x in range(1, self.num_features - 1 + 1 + 1)] # features + action
q_df_column_list.append('Q')
self.q_df = self.q_df[q_df_column_list]
def transfer_action(x):
if x == 'Buy':
return 1
elif x == 'Sell':
return 2
elif x == 'Hold':
return 0
else:
raise ValueError("Wrong action!")
def str_float_int(x):
return int(float(x))
arr_int = np.vectorize(str_float_int)
print(self.q_df.head())
self.q_df.ix[:, -2] = self.q_df.ix[:, -2].apply(transfer_action)
self.q_df.ix[:, :-1] = self.q_df.ix[:, :-1].apply(arr_int) # Maybe useless
def split_q_df(self):
"""Splitting q_df into features and labels."""
self.q_df_X = self.q_df.ix[:, :-1]
self.q_df_y = self.q_df.ix[:, -1]
def train_on_q_df(self):
"""Model the q_df."""
print("Training on q_df...")
self.q_reg = RandomForestRegressor(n_estimators=2000, max_features='sqrt', n_jobs=-1, random_state=self.random_state)
self.q_reg = self.q_reg.fit(self.q_df_X, self.q_df_y)
def update_q_model(self):
"""1. Make q_df
2. Split q_df
3. Train on q_df
"""
# print("Updating Q model...")
# start_time = time.time()
self.make_q_df()
self.split_q_df()
self.train_on_q_df()
# print("Update took {} seconds".format(time.time() - start_time))
def from_state_action_predict_q(self, state_action):
"""Make prediction using self.reg"""
state_action = [state_action]
pred_q = self.q_reg.predict(state_action)
return pred_q
def max_q(self):
# print("Calculating Max Q")
def transfer_action(x):
if x == 'Buy':
return 1
elif x == 'Sell':
return 2
elif x == 'Hold':
return 0
else:
raise ValueError("Invalid action!")
# def str_float_int(x):
# return int(float(x))
max_q = None
q_compare_dict = {}
if len(self.now_states) != self.num_features - 1:
raise ValueError("Got ya bastard! @ MaxQ")
# Populate the q_dict
for act in set(self.valid_actions):
# added 1 more additional features to the feature set
self.now_states.append(act)
now_row_key = tuple(self.now_states)
_ = self.q_dict[now_row_key]
try:
self.q_reg
except AttributeError:
pass
# print('No q_reg yet...going with default.')
else:
if _[1] == 0:
# print("Dreaming mode...")
single_X = np.array(now_row_key)
# print(single_X)
# arr_int = np.vectorize(str_float_int)
single_X[-1] = transfer_action(single_X[-1])
# single_X = arr_int(single_X)
single_X = single_X.reshape(1, -1)
pred_q = self.q_reg.predict(single_X)
dreamed_q = (1 - (1 / (self.q_dict[now_row_key][1] + 1))) * self.q_dict[now_row_key][0] + (1 / (self.q_dict[now_row_key][1] + 1)) * pred_q[0]
self.q_dict[now_row_key] = (dreamed_q, self.q_dict[now_row_key][1] + 1)
q_compare_dict[now_row_key] = self.q_dict[now_row_key]
self.now_states.pop()
try:
key, qAndT = max(q_compare_dict.iteritems(), key=lambda x:x[1])
except ValueError:
print("Wrong Q Value in Q Compare Dict!")
sys.exit(1)
else:
return key[-1], qAndT[0], qAndT[1]
def q_update(self):
# print("Updating Q table...")
# prev_states.append(self.prev_yes_share)
self.prev_states.append(self.prev_action)
prev_states_key = tuple(self.prev_states)
if len(prev_states_key) != self.num_features - 1 + 1:
raise ValueError("Got ya bastard! @ Q_Update")
q_temp = self.q_dict[prev_states_key]
q_temp0 = (1 - (1 / (q_temp[1] + 1))) * q_temp[0] + (1 / (q_temp[1] + 1)) * (self.reward + self.gamma * self.max_q()[1])
self.q_dict[prev_states_key] = (q_temp0, q_temp[1] + 1)
# For analysis purpose
self.q_dict_analysis[prev_states_key] = (q_temp0, self.prev_env_index)
def reset(self):
# print("Resetting...")
# Portfolio change over iterations
self.asset_history_list.append(self.pv + self.cash)
self.iter_env = self.env.iterrows()
self.now_env_index, self.now_row = self.iter_env.next()
# self.now_row = 0 # Numpy option
self.cash = 1000
self.share = 0
self.pv = 0
# Delete all prevs
del self.prev_states
del self.prev_env_index
del self.prev_cash
del self.prev_share
del self.prev_pv
del self.prev_action
if self.test_mode is True:
self.epsilon = 0
else:
if self.epsilon - 1/self.random_rounds > 0.00001: # Epislon threshold: 0.01
self.epsilon = self.epsilon - 1/self.random_rounds
else:
self.epsilon = 0.00001 # Epislon threshold: 0.1
self.reset_counter += 1
if self.reset_counter % self.random_rounds == 0:
self.update_q_model()
if np.abs(self.epsilon - 0.00001) > 0.000001:
self.action_list = []
def make_decision(self):
return self.max_q()[0]
def update(self, cost):
# Update state
self.now_states = list(self.now_row)
self.now_states.pop() # Remove Trade Price
### Numpy option
# try:
# self.now_states = list(self.env_arr[self.now_row])
# except IndexError:
# print("End of data.")
# sys.exit(1)
# self.now_states.pop() # Remove Trade Price
if len(self.now_states) != self.num_features - 1:
raise ValueError("Got ya bastard! @ Q_Update...something wrong with the self.now_row!!!")
# Update Q-table using prevs
try:
self.prev_states
except AttributeError:
pass
# print("Running the first time...no prevs exist.")
else:
self.hold(self.now_row[-1])
self.reward = ((self.cash - self.prev_cash) + (self.pv - self.prev_pv)) / (self.prev_cash + self.prev_pv)
self.q_update()
# All the prev stuff!
self.prev_states = copy(self.now_states)
self.prev_env_index = deepcopy(self.now_env_index)
# self.prev_env_index = self.env.index[self.now_row] # Numpy option
self.prev_cash = self.cash
self.prev_share = self.share
self.prev_pv = self.pv
# Exploitation-exploration decisioning
self.decision = np.random.choice(2, p = [self.epsilon, 1 - self.epsilon]) # decide to go random or with the policy
# self.decision = 0 # Force random mode
# print("Random decision: {0}, Epislon: {1}".format(self.decision, self.epsilon))
if self.decision == 0: # if zero, go random
action = random.choice(self.valid_actions)
else: # else go with the policy
action = self.make_decision()
# Execute action and get reward
if action == 'Buy':
# print(self.now_row)
self.buy(self.now_row[-1], cost)
# self.buy(self.env_arr[self.now_row][-1], cost) # Numpy option
elif action == 'Sell':
# print(self.now_row)
self.sell(self.now_row[-1], cost)
# self.sell(self.env_arr[self.now_row][-1], cost) # Numpy option
elif action == 'Hold':
# print(self.now_row)
self.hold(self.now_row[-1])
# self.hold(self.env_arr[self.now_row][-1]) # Numpy option
else:
raise ValueError("Invalid action man!")
self.prev_action = action
# self.now_row += 1 # Numpy option
try:
self.now_env_index, self.now_row = self.iter_env.next()
except StopIteration:
pass
def simulate(self, cost=None):
start_time = time.time()
for i in range(self.random_rounds):
for l in range(len(self.env)):
# for l in range(len(self.env_arr)): # Numpy option
self.update(cost)
self.reset()
if (i + 1) % 500 == 0:
print(self.asset_history_list[-1])
print("Round {} finished".format(i + 1))
# print(self.asset_history_list[-1])
# print("Round {} finished".format(i + 1))
print("{0} rounds of simulation with cost = {1}, took {2} seconds".format(self.random_rounds, cost, time.time() - start_time))
return self.asset_history_list, self.action_list
In [83]:
god_chimp = ChimpBot(new_data_full_pca, iter_random_rounds=20000, gamma=0.9, random_state=0)
asset_history_list, action_list = god_chimp.simulate(cost='high')
print(pd.Series(action_list).describe())
print(asset_history_list[-1])
In [84]:
pd.Series(asset_history_list).plot()
In [85]:
(np.sign(pd.Series(asset_history_list)) * np.log(np.abs(pd.Series(asset_history_list)) + 1)).plot()
In [86]:
# Convert Q-Table to Dataframe from the God Chimp (full dataset)
iter_random_rounds=20000
result_dict = defaultdict(list)
for index, row in god_chimp.q_dict_analysis.iteritems():
for i in range(len(god_chimp.q_dict_analysis.keys()[0])):
column_name = 'col' + str(i + 1)
result_dict[column_name].append(index[i])
result_dict['Q'].append(god_chimp.q_dict_analysis[index][0])
result_dict['Date'].append(god_chimp.q_dict_analysis[index][1])
god_chimp_q_df = pd.DataFrame(result_dict)
# Yes share column removed
column_list = ['col' + str(x) for x in range(1, 301 + 1)]
column_list.extend(['Date', 'Q'])
god_chimp_q_df = god_chimp_q_df[column_list]
god_chimp_q_df.sort_values('Date', inplace=True)
god_chimp_q_df.reset_index(inplace=True)
del god_chimp_q_df['index']
god_chimp_q_df.reset_index(inplace=True)
del god_chimp_q_df['index']
god_chimp_q_df.set_index(god_chimp_q_df['Date'], inplace=True)
del god_chimp_q_df.index.name
del god_chimp_q_df['Date']
print(len(god_chimp_q_df))
display(god_chimp_q_df.head())
In [87]:
god_chimp = ChimpBot(new_data_full_pca, iter_random_rounds=5000, gamma=0.75, random_state=0)
asset_history_list, action_list = god_chimp.simulate(cost='high')
print(pd.Series(action_list).describe())
print(asset_history_list[-1])
In [88]:
# Convert Q-Table to Dataframe from the God Chimp (full dataset)
iter_random_rounds=5000
result_dict = defaultdict(list)
for index, row in god_chimp.q_dict_analysis.iteritems():
for i in range(len(god_chimp.q_dict_analysis.keys()[0])):
column_name = 'col' + str(i + 1)
result_dict[column_name].append(index[i])
result_dict['Q'].append(god_chimp.q_dict_analysis[index][0])
result_dict['Date'].append(god_chimp.q_dict_analysis[index][1])
god_chimp_q_df = pd.DataFrame(result_dict)
# Yes share column removed
column_list = ['col' + str(x) for x in range(1, 301 + 1)]
column_list.extend(['Date', 'Q'])
god_chimp_q_df = god_chimp_q_df[column_list]
god_chimp_q_df.sort_values('Date', inplace=True)
god_chimp_q_df.reset_index(inplace=True)
del god_chimp_q_df['index']
god_chimp_q_df.reset_index(inplace=True)
del god_chimp_q_df['index']
god_chimp_q_df.set_index(god_chimp_q_df['Date'], inplace=True)
del god_chimp_q_df.index.name
del god_chimp_q_df['Date']
print(len(god_chimp_q_df))
display(god_chimp_q_df.head())
In [89]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(god_chimp_q_df.ix[:, -2])
print(le.classes_)
god_chimp_q_df.ix[:, -2] = le.transform(god_chimp_q_df.ix[:, -2])
# = god_chimp_q_df.ix[:, -2].apply(action_to_int)
In [90]:
from sklearn.feature_selection import RFE
from sklearn.svm import SVR
fs_data = god_chimp_q_df
fs_X = fs_data.ix[:, :-1]
fs_y = fs_data.ix[:, -1]
names = list(fs_X.columns)
In [91]:
estimator = SVR(kernel="linear")
rfe = RFE(estimator, 37, step=1)
rfe = rfe.fit(fs_X, fs_y)
In [92]:
rfe_ranking = sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), names))
print("Features sorted by their rank:")
print(rfe_ranking)
In [93]:
rfe_cols = [x[1] for i, x in enumerate(rfe_ranking)]
rfe_cols = rfe_cols[:37]
In [94]:
from sklearn.linear_model import RandomizedLasso
rlasso = RandomizedLasso(alpha='aic')
rlasso.fit(fs_X, fs_y)
rlasso_ranking = sorted(zip(map(lambda x: round(x, 4), rlasso.scores_), names), reverse=True)
print "Features sorted by their score:"
print(rlasso_ranking)
In [95]:
# 24 features before the drop
plt.plot([x[0] for i, x in enumerate(rlasso_ranking)])
In [96]:
rlasso_cols = [x[1] for i, x in enumerate(rlasso_ranking) if x[0] >= 0.95]
print(len(rlasso_cols))
rlasso_cols = rlasso_cols[:96]
In [97]:
# reduced_columns = ['col' + str(x) for x in range(1, 11)]
reduced_columns = []
# reduced_columns.extend(rlasso_cols)
reduced_columns.extend(rfe_cols)
reduced_columns.extend(['col301', 'Q'])
reduced_columns = list(set(reduced_columns))
In [98]:
print(len(reduced_columns))
In [99]:
god_chimp_q_df = god_chimp_q_df[reduced_columns]
action_idx = god_chimp_q_df.columns.get_loc('col301')
cols = god_chimp_q_df.columns.tolist()
cols = cols[:action_idx] + cols[action_idx + 1:] + [cols[action_idx]]
god_chimp_q_df = god_chimp_q_df.reindex(columns=cols)
q_idx = god_chimp_q_df.columns.get_loc('Q')
cols = god_chimp_q_df.columns.tolist()
cols = cols[:q_idx] + cols[q_idx + 1:] + [cols[q_idx]]
god_chimp_q_df = god_chimp_q_df.reindex(columns=cols)
# god_chimp_q_df.ix[:, -2] = god_chimp_q_df.ix[:, -2].apply(action_to_int)
In [100]:
from sklearn.metrics import accuracy_score
# from sklearn.linear_model import LinearRegression
# import xgboost as xgb
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor
def find_best_training_size(data_full, full_q_df, training_sizes, testing_size, target_data, random_state=0):
start_time = time.time()
accs = []
d_counter = 0
# Loop through all batches in validation dataset
(u, ) = data_full.index.get_indexer_for([target_data.index[0]])
for d in range(u, u + testing_size * (len(target_data) // testing_size), testing_size):
acc_num_train_months = []
d_counter += 1
# Dates in the batch
date_range = data_full.iloc[d:d + testing_size].index
# Loop through all sizes of training sets
for num_train_month in range(1, training_sizes + 1):
# Prepare Training/Testing Datasets
X_train = full_q_df.iloc[d - (int(21 * num_train_month)):d, :-1]
y_train = full_q_df.iloc[d - (int(21 * num_train_month)):d, -1]
X_test = full_q_df.ix[date_range, :-1]
y_test = full_q_df.ix[date_range, -1]
# Fit data and make predictions
reg = GradientBoostingRegressor()
# reg = KNeighborsRegressor(n_neighbors=5, weights='distance', n_jobs=-1)
# reg = LinearRegression(n_jobs=-1)
# reg = SVR(kernel='rbf')
# reg = XGBRegressor()
reg = RandomForestRegressor(n_estimators=1500, max_features='auto', oob_score=True, n_jobs=-1, random_state=random_state)
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
y_fit = reg.predict(X_train)
pred_q = y_pred
actions = X_test.ix[:, -1]
data = {'Action': actions, 'Q': pred_q}
df_pred = pd.DataFrame(data=data, index=y_test.index)
pred_actions = []
for date in date_range:
max_q = [0, -1]
for i, r in df_pred.ix[date].iterrows():
if r['Q'] > max_q[1]:
max_q = [r['Action'], r['Q']]
pred_actions.append(max_q[0])
best_actions = []
for date in date_range:
max_q = [0, -1]
for i, r in full_q_df.ix[date].iterrows():
if r['Q'] > max_q[1]:
max_q = [r[-2], r['Q']]
best_actions.append(max_q[0])
acc_num_train_months.append(accuracy_score(best_actions, pred_actions))
accs.append(np.array(acc_num_train_months))
print("Batch {0} completed....{1:.2f}%".format(d_counter, 100 * (d_counter / len(range(u, u + testing_size * (len(target_data) // testing_size), testing_size)))))
geo_means = np.power(reduce(lambda x,y: x*y, accs), (1/len(accs)))
arithmetic_means = reduce(lambda x,y: x+y, accs) / len(accs)
print("Geometric Means Max: {}".format((np.argmax(geo_means) + 1, np.max(geo_means))))
print("Arithemtic Means Max: {}".format((np.argmax(arithmetic_means) + 1, np.max(arithmetic_means))))
print("Grid search best num_train_year took {} seconds:".format(time.time() - start_time))
return (geo_means, arithmetic_means)
In [101]:
means = find_best_training_size(data_full=new_data_full_pca, full_q_df=god_chimp_q_df, training_sizes=120, testing_size=5, target_data=validation_phase_data, random_state=0)
geo_means = means[0]
arithmetic_means = means[1]
In [102]:
print(geo_means)
print(sorted(range(len(geo_means)), key=lambda k: geo_means[k], reverse=True))
print(arithmetic_means)
print(sorted(range(len(arithmetic_means)), key=lambda k: arithmetic_means[k], reverse=True))
validation_phase_data['Trade Price'].plot()
plt.figure()
plt.plot(geo_means)
plt.figure()
plt.plot(arithmetic_means)
In [103]:
from sklearn.model_selection import GridSearchCV
def grid_search(data_full, full_q_df, training_size, testing_size, target_data, random_state=0):
start_time = time.time()
accs = []
d_counter = 0
# feature_importance_list = []
best_param_list = []
# Loop through all batches in validation dataset
(u, ) = data_full.index.get_indexer_for([target_data.index[0]])
for d in range(u, u + testing_size * (len(target_data) // testing_size), testing_size):
acc_num_train_months = []
d_counter += 1
# Dates in the batch
date_range = data_full.iloc[d:d + testing_size].index
# Loop through all sizes of training sets
num_train_month = training_size
# Prepare Training/Testing Datasets
X_train = full_q_df.iloc[d - (int(21 * num_train_month)):d, :-1]
y_train = full_q_df.iloc[d - (int(21 * num_train_month)):d, -1]
X_test = full_q_df.ix[date_range, :-1]
y_test = full_q_df.ix[date_range, -1]
# Fit data and make predictions
# reg = GradientBoostingRegressor()
# param_grid = {'n_estimators': [100, 200, 300, 400, 500], 'learning_rate': [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3], 'min_samples_leaf': [1, 5, 10, 20, 50]}
# reg = KNeighborsRegressor(n_jobs=-1)
# param_grid = {'n_neighbors': [5, 10, 15, 20, 40, 80], 'weights': ['uniform', 'distance']}
reg = RandomForestRegressor(n_estimators=128, max_features='sqrt', n_jobs=-1, random_state=random_state)
# param_grid = {'n_estimators': [128, 1500], 'max_features': ['auto'], 'min_samples_leaf': [1, 10, 50]}
# reg_gs = GridSearchCV(reg, param_grid, scoring='neg_mean_squared_error')
reg.fit(X_train, y_train)
# best_param_list.append(reg_gs.best_params_)
# # Create feature importance histogram
# feature_importance_list.append(reg.feature_importances_)
# vif = [int(np.argmax(x)) for x in feature_importance_list]
# vif = pd.DataFrame(vif)
y_pred = reg.predict(X_test)
y_fit = reg.predict(X_train)
pred_q = y_pred
actions = X_test.ix[:, -1]
data = {'Action': actions, 'Q': pred_q}
df_pred = pd.DataFrame(data=data, index=y_test.index)
pred_actions = []
for date in date_range:
max_q = [0, -1]
for i, r in df_pred.ix[date].iterrows():
if r['Q'] > max_q[1]:
max_q = [r['Action'], r['Q']]
pred_actions.append(max_q[0])
best_actions = []
for date in date_range:
max_q = [0, -1]
for i, r in full_q_df.ix[date].iterrows():
if r['Q'] > max_q[1]:
max_q = [r[-2], r['Q']]
best_actions.append(max_q[0])
acc_num_train_months.append(accuracy_score(best_actions, pred_actions))
accs.append(np.array(acc_num_train_months))
print("Batch {0} completed....{1:.2f}%".format(d_counter, 100 * (d_counter / len(range(u, u + testing_size * (len(target_data) // testing_size), testing_size)))))
geo_means = np.power(reduce(lambda x,y: x*y, accs), (1/len(accs)))
arithmetic_means = reduce(lambda x,y: x+y, accs) / len(accs)
print("Geometric Means Max: {}".format((np.argmax(geo_means) + 1, np.max(geo_means))))
print("Arithemtic Means Max: {}".format((np.argmax(arithmetic_means) + 1, np.max(arithmetic_means))))
print("Grid search best num_train_year took {} seconds:".format(time.time() - start_time))
return (geo_means, arithmetic_means, best_param_list)
In [104]:
results = grid_search(data_full=new_data_full_pca, full_q_df=god_chimp_q_df, training_size=35, testing_size=7, target_data=validation_phase_data, random_state=0)
geo_means = results[0]
arithmetic_means = results[1]
In [105]:
# Start simulation for the chimp ------ get new full q_df
start_time = time.time()
num_iter = 1500
day_count = 0
pv_history_list = []
new_data_features = new_data_full.ix[:, :-1]
new_data_features_norm = normalization(new_data_features, new_data_features)
new_data_features_pca = pca_transform_reconstruct(new_data_features_norm, new_data_features_norm)
new_data_full_norm2 = normalization(new_data_feature_pca, new_data_features_pca)
new_data_full_norm2['Trade Price'] = new_data_full.ix[:, 'Trade Price']
new_data_full = new_data_full_norm2
# display(new_data_full.isnull().sum())
# display(new_data_full.describe())
display(new_data_full.head())
chimp_analytics = EnhancedChimpBot(new_data_full)
start_time = time.time()
for i in range(num_iter):
for l in range(len(chimp_analytics.env)):
chimp_analytics.update()
pv_history_list.append(chimp_analytics.cash + chimp_analytics.pv)
chimp_analytics.reset()
print("{0} rounds of training took {1} seconds".format(num_iter, time.time() - start_time))
print(pv_history_list[-1])
# Convert Q-Table to Dataframe from trained chimp (full)
result_dict = defaultdict(list)
for index, row in chimp_analytics.q_dict_analysis.iteritems():
for i in range(len(chimp_analytics.q_dict_analysis.keys()[0])):
column_name = 'col' + str(i + 1)
result_dict[column_name].append(index[i])
result_dict['Q'].append(chimp_analytics.q_dict_analysis[index][0])
result_dict['Date'].append(chimp_analytics.q_dict_analysis[index][1])
new_full_q_df = pd.DataFrame(result_dict)
In [106]:
new_column_list = ['col' + str(x) for x in range(1, 247 + 1)]
new_column_list.extend(['Q'])
new_full_q_df = new_full_q_df[new_column_list]
new_full_q_df = new_full_q_df.sort_index()
del new_full_q_df.index.name
display(new_full_q_df.head())
print(type(new_full_q_df.index[0]))
new_full_q_df['col247'] = new_full_q_df['col247'].apply(action_to_int)
In [107]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
start_time = time.time()
accs = []
training_sizes = 48
testing_size = 7
# Loop through all batches in validation dataset
(u, ) = new_data_full.index.get_indexer_for([validation_phase_data.index[0]])
for d in range(u, u + testing_size * (252 // testing_size), testing_size):
acc_num_train_months = []
# Dates in the batch
date_range = new_data_full.iloc[d:d + testing_size].index
# Loop through all sizes of training sets
for num_train_month in range(1, training_sizes + 1):
# for num_train_month in range(1, 240 + 1):
# Prepare Training/Testing Datasets
X_train = new_full_q_df.iloc[d - (int(21 * num_train_month)):d, :-1]
y_train = new_full_q_df.iloc[d - (int(21 * num_train_month)):d, -1]
X_test = new_full_q_df.ix[date_range, :-1]
y_test = new_full_q_df.ix[date_range, -1]
# Fit data and make predictions
reg = RandomForestRegressor(n_estimators=1500, max_features='sqrt', oob_score=True, n_jobs=-1, random_state=0)
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
y_fit = reg.predict(X_train)
pred_q = y_pred
actions = X_test['col247']
data = {'Action': actions, 'Q': pred_q}
df_pred = pd.DataFrame(data=data, index=y_test.index)
pred_actions = []
for date in date_range:
max_q = [0, -1]
for i, r in df_pred.ix[date].iterrows():
if r['Q'] > max_q[1]:
max_q = [r['Action'], r['Q']]
pred_actions.append(max_q[0])
best_actions = []
for date in date_range:
max_q = [0, -1]
for i, r in new_full_q_df.ix[date].iterrows():
if r['Q'] > max_q[1]:
max_q = [r['col247'], r['Q']]
best_actions.append(max_q[0])
acc_num_train_months.append(accuracy_score(best_actions, pred_actions))
accs.append(np.array(acc_num_train_months))
print("Batch {0} completed. Total progress {1}%".format(d + 1 - u, d / (u + testing_size * (252 // testing_size))))
harmonic_means = np.power(reduce(lambda x,y: x*y, accs), (1/len(accs)))
arithmetic_means = reduce(lambda x,y: x+y, accs) / len(accs)
print("Harmonic Means Max: {}".format((np.argmax(harmonic_means) + 1, np.max(harmonic_means))))
print("Arithemtic Means Max: {}".format((np.argmax(arithmetic_means) + 1, np.max(arithmetic_means))))
print("Grid search best num_train_year took {} seconds:".format(time.time() - start_time))
In [108]:
from collections import defaultdict
from datetime import datetime, timedelta
from copy import deepcopy
from sklearn import cross_validation
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn import grid_search
class ChimpBot(MonkeyBot):
"""An agent that learns to drive in the smartcab world."""
num_features = 246
valid_actions = ['Buy', 'Sell']
num_trial = 500
trial_counter = 0 # For getting the trial number
random_rounds = 1500 # Number of rounds where the bot chooses to go monkey
trial_meta_info = {} # For monitoring what happens in each trial
epsilon = 1
gamma = 0.75
random_reward = [0]
random_counter = 0
policy_counter = 0
track_key1 = {'Sell': 0, 'Buy': 0, 'Hold': 0}
track_key2 = {'Sell': 0, 'Buy': 0, 'Hold': 0}
track_random_decision = {'Sell': 0, 'Buy': 0, 'Hold': 0}
reset_counter = 0
def __init__(self, dfEnv, cash=1000, share=0, pv=0):
super(ChimpBot, self).__init__(dfEnv, cash, share, pv)
# sets self.cash = 1000
# sets self.share = 0
# sets self.pv = 0
# sets self.pv_history_list = []
# sets self.env = dfEnv
# implements buy(self, stock_price)
# implements sell(self, stock_price)
# implements hold(self)
self.iter_env = self.env.iterrows()
self.now_env_index, self.now_row = self.iter_env.next()
# self.now_yes_share = 0
self.now_action = ''
# self.now_q = 0
self.prev_cash = self.cash
self.prev_share = self.share
self.prev_pv = self.pv
self.q_df_columns = list(self.env.columns)
self.q_df_columns.pop()
self.q_df_columns.extend(['Action', 'Q Value'])
self.q_df = pd.DataFrame(columns=self.q_df_columns)
self.q_dict = defaultdict(lambda: (0, 0)) # element of q_dict is (state, act): (q_value, t)
self.q_dict_analysis = defaultdict(lambda: (0, 0))
self.negative_reward = 0
self.n_reward_hisotry = []
self.net_reward = 0
self.reset_counter = 0
# Smartcab use only
# self.penalty = False
# self.num_step = 0 # Number of steps for each trial; get reset each time a new trial begins
def make_q_df(self):
result_dict = defaultdict(list)
for index, row in self.q_dict.iteritems():
for i in range(len(self.q_dict.keys()[0])):
column_name = 'col' + str(i + 1)
result_dict[column_name].append(index[i])
result_dict['Q'].append(self.q_dict[index][0])
self.q_df = pd.DataFrame(result_dict)
q_df_column_list = ['col' + str(x) for x in range(1, self.num_features + 1 + 1)]
q_df_column_list.append('Q')
# q_df_column_list = ['col1', 'col2', 'col3', 'col4', 'col5', 'col6', 'col7', 'col8', 'col9', 'col10', 'col11', 'col12', 'col13', 'col14', 'col15', 'col16', 'col17', 'col18', 'col19', 'col20', 'col21', 'col22', 'col23', 'col24', 'col25', 'col26', 'col27', 'col28', 'col29', 'col30', 'col31', 'col32', 'col33', 'col34', 'col35', 'col36', 'col37', 'col38', 'col39', 'Q']
self.q_df = self.q_df[q_df_column_list]
def transfer_action(x):
if x == 'Buy':
return 1
elif x == 'Sell':
return 2
elif x == 'Hold':
return 0
else:
raise ValueError("Wrong action!")
def str_float_int(x):
return int(float(x))
arr_int = np.vectorize(str_float_int)
self.q_df['col' + str(self.num_features + 1)] = self.q_df['col' + str(self.num_features + 1)].apply(transfer_action)
self.q_df.ix[:, :-1] = self.q_df.ix[:, :-1].apply(arr_int)
def split_q_df(self):
self.q_df_X = self.q_df.ix[:, :-1]
self.q_df_y = self.q_df.ix[:, -1]
# self.X_train, self.X_test, self.y_train, self.y_test = cross_validation.train_test_split(self.q_df_X, self.q_df_y, test_size=0.1, random_state=0)
def train_on_q_df(self):
reg = AdaBoostRegressor(DecisionTreeRegressor(max_depth=25), n_estimators=50, random_state=0)
self.q_reg = reg
self.q_reg = self.q_reg.fit(self.q_df_X, self.q_df_y)
def update_q_model(self):
# print("Updating Q model...")
start_time = time.time()
self.make_q_df()
self.split_q_df()
self.train_on_q_df()
# print("Update took {} seconds".format(time.time() - start_time))
def from_state_action_predict_q(self, state_action):
state_action = [state_action]
pred_q = self.q_reg.predict(state_action)
return pred_q
# def yes_share(self):
# # Represent chimp asset in state_action
# if self.share > 0:
# return 1
# else:
# return 0
def max_q(self, now_row):
def transfer_action(x):
if x == 'Buy':
return 1
elif x == 'Sell':
return 2
elif x == 'Hold':
return 0
else:
raise ValueError("Wrong action!")
def str_float_int(x):
return int(float(x))
now_row2 = list(now_row)
# now_row2.append(self.now_yes_share)
max_q = ''
q_compare_dict = {}
if len(now_row2) > self.num_features:
raise ValueError("Got ya bastard! @ MaxQ")
# Populate the q_dict
for act in set(self.valid_actions):
now_row2.append(act)
now_row_key = tuple(now_row2)
_ = self.q_dict[now_row_key]
# # K-Q Algorithm
# if np.random.choice(2, p = [0.9, 0.1]) == 1 and len(self.q_dict) > 30000:
# if _[1] == 0 and np.random.choice(2, p = [0.7, 0.3]) == 1 and len(self.q_dict) > 30000:
try:
self.q_reg
except AttributeError:
pass
# print('No q_reg yet...going with default.')
else:
if _[1] == 0:
# print("Dreaming mode...")
# start_time = time.time()
# self.update_q_model()
single_X = np.array(now_row_key)
# print(single_X)
arr_int = np.vectorize(str_float_int)
single_X[-1] = transfer_action(single_X[-1])
single_X = arr_int(single_X)
single_X = single_X.reshape(1, -1)
pred_q = self.q_reg.predict(single_X)
dreamed_q = (1 - (1 / (self.q_dict[now_row_key][1] + 1))) * self.q_dict[now_row_key][0] + (1 / (self.q_dict[now_row_key][1] + 1)) * pred_q[0]
self.q_dict[now_row_key] = (dreamed_q, self.q_dict[now_row_key][1] + 1)
# print("Q-dreamed: {0} for Act: {1}, taking {2} seconds.".format(self.q_dict[now_row_key], act, time.time() - start_time))
# print(act, self.q_dict[now_row_key])
q_compare_dict[now_row_key] = self.q_dict[now_row_key]
now_row2.pop()
try:
max(q_compare_dict.iteritems(), key=lambda x:x[1])
except ValueError:
print("Wrong Q Value in Q Compare Dict!")
else:
key, qAndT = max(q_compare_dict.iteritems(), key=lambda x:x[1])
# print("Action: {0}, with Q-value: {1}".format(key[-1], qAndT[0]))
return key[-1], qAndT[0], qAndT[1]
def q_update(self):
# print("Data Index: {}".format(self.now_env_index))
now_states = list(self.now_row)
# now_states = list(now_states)
now_states.pop() # disregard the Trade Price
prev_states = list(self.prev_states)
if len(prev_states) > self.num_features:
raise ValueError("Got ya bastard! @ Q_Update...something wrong with the self.prev_states!!!")
# prev_states.append(self.prev_yes_share)
prev_states.append(self.prev_action)
prev_states_key = tuple(prev_states)
if len(prev_states_key) > self.num_features + 2:
raise ValueError("Got ya bastard! @ Q_Update")
q_temp = self.q_dict[prev_states_key]
q_temp0 = (1 - (1 / (q_temp[1] + 1))) * q_temp[0] + (1 / (q_temp[1] + 1)) * (self.reward + self.gamma * self.max_q(now_states)[1])
if prev_states_key[:-1] == ('Low', 'Low', 'Average', 'Average', 'Low', 'Average', 'Average', 'Average', 'Low', 'Low', 'Low', 'Low', 'Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'N-Very Low', 'Low', 'Average', 'N-Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'High', 'Yes'):
self.track_key1[prev_states_key[-1]] += 1
elif prev_states_key[:-1] == ('Low', 'Low', 'Average', 'Average', 'Low', 'Average', 'Average', 'Average', 'Low', 'Low', 'Low', 'Low', 'Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'N-Very Low', 'Low', 'Average', 'N-Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'High', 'No'):
self.track_key2[prev_states_key[-1]] += 1
# elif prev_states_key[:-1] == ('Very High', 'Very High', 'Very High', 'Very High', 'Very High', 'Very High', 'Average', 'High', 'Average', 'Average', 'Average', 'Low', 'Average', 'Very Low', 'Low', 'N-Very Low', 'N-Very Low', 'N-Very Low', 'N-Very Low', 'Very Low', 'Very Low', 'Average', 'Very Low', 'Low', 'Low', 'Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Low', 'Very Low', 'Low', 'Very Low', 'Average', 'No'):
# self.track_key2[prev_states_key[-1]] += 1
self.q_dict[prev_states_key] = (q_temp0, q_temp[1] + 1)
# For analysis purpose
self.q_dict_analysis[prev_states_key] = (q_temp0, self.prev_env_index)
# print("Now Action: {}".format())
# print(prev_states_key)
return (self.q_dict[prev_states_key])
def policy(self, now_row):
return self.max_q(now_row)[0]
def reset(self):
# Portfolio change over iterations
self.pv_history_list.append(self.pv + self.cash)
self.iter_env = self.env.iterrows()
self.now_env_index, self.now_row = self.iter_env.next()
self.cash = 1000
self.share = 0
self.pv = 0
self.prev_cash = self.cash
self.prev_share = self.share
self.prev_pv = self.pv
if self.epsilon - 1/self.random_rounds > 0.001: # Epislon threshold: 0.01
self.random_counter += 1
self.epsilon = self.epsilon - 1/self.random_rounds
else:
self.epsilon = 0.001 # Epislon threshold: 0.1
self.policy_counter += 1
self.net_reward = 0
self.reset_counter += 1
if self.reset_counter % random_rounds == 0:
self.update_q_model()
# self.num_step = 0 # Recalculate the steps for the new trial
# self.penalty = False
# self.fail = False
def make_decision(self, now_row):
return self.policy(now_row)
def update(self):
# Update state
now_states = list(self.now_row)
if len(now_states) > self.num_features + 1:
print(len(now_states))
print(self.num_features)
raise ValueError("Got ya bastard! @ Q_Update...something wrong with the self.now_row!!!")
# now_states = list(now_states)
# print(type(self.now_row))
now_states.pop() # disregard the Trade Price
if len(now_states) > self.num_features:
print(now_states)
raise ValueError("Got ya bastard! @ Q_Update...something wrong with now_states after pop!!!")
# Exploitation-exploration decisioning
random.seed(datetime.now())
self.decision = np.random.choice(2, p = [self.epsilon, 1 - self.epsilon]) # decide to go random or with the policy
# self.decision = 0 # Force random mode
# print("Random decision: {0}, Epislon: {1}".format(self.decision, self.epsilon))
# print("What the FUCK?!")
if self.decision == 0: # if zero, go random
random.seed(datetime.now())
action = random.choice(self.valid_actions)
# if tuple(now_states) == ('Low', 'Low', 'Average', 'Average', 'Low', 'Average', 'Average', 'Average', 'Low', 'Low', 'Low', 'Low', 'Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'N-Very Low', 'Low', 'Average', 'N-Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'Very Low', 'High'):
# self.track_random_decision[action] += 1
else: # else go with the policy
# print("now_states: {}".format(now_states))
# self.now_yes_share = self.yes_share()
action = self.make_decision(now_states)
if len(now_states) > self.num_features:
print(now_states)
raise ValueError("Got ya bastard! @ Q_Update...something wrong with now_states after make_decision!!!")
# print("Now Action Real: {}".format(action))
# Execute action and get reward
if action == 'Buy':
# print(self.now_row)
self.buy(self.now_row[-1])
elif action == 'Sell':
# print(self.now_row)
self.sell(self.now_row[-1])
elif action == 'Hold':
# print(self.now_row)
self.hold(self.now_row[-1])
else:
raise ValueError("Wrong action man!")
try:
self.prev_states
except AttributeError:
print("Running the first time...no prevs exist.")
else:
self.reward = ((self.cash - self.prev_cash) + (self.pv - self.prev_pv)) / (self.prev_cash + self.prev_pv)
self.q_update()
self.prev_states = now_states
if len(now_states) > self.num_features:
raise ValueError("Got ya bastard! @ Q_Update...something wrong with the now_states!!!")
self.now_action = action
self.prev_action = action
# self.prev_yes_share = self.now_yes_share
self.prev_env_index = deepcopy(self.now_env_index)
self.prev_cash = self.cash
self.prev_share = self.share
self.prev_pv = self.pv
# if len(self.q_dict) > 20000:
# self.update_q_model()
try:
self.now_env_index, self.now_row = self.iter_env.next()
except StopIteration:
pass
# print("End of data.")
else:
pass
# if reward < 0:
# self.penalty = True
try:
_ = self.reward
except AttributeError:
print("No reward yet...0 assigned.")
self.reward = 0
# print "ChimpBot.update(): Action: {0} at Price: {1}, Cash: {2}, Num_Share: {3}, Cash + PV = {4}, Reward = {5}".format(action, self.now_row[-1], self.cash, self.share, self.cash + self.pv, self.reward) # [debug]
# print('Portfolio + Cash: {}'.format(self.cash + self.pv))
# print("================================")
In [109]:
def main_simulate():
# Initiating data and the chimp
start_date = test_phase_data.index[0]
end_date = test_phase_data.index[-1]
global data_full
dfFull = data_full
train_size = 21 * 8
batch_size = 7
date_range = test_phase_data.index[:] # Using 7 months of data to predict one month
print(date_range)
batch_count = 0
cash = 1000
share = 0
pv = 0
now_yes_share = 0
for batch in range(len(test_phase_data) // batch_size):
# for date in date_range:
batch_count += 1
print("Batch {}".format(batch_count))
try:
dfTest = dfFull.ix[test_phase_data.index[batch * batch_size]:test_phase_data.index[batch * batch_size + batch_size - 1]]
except IndexError:
dfTest = dfFull.ix[test_phase_data.index[batch * batch_size]:test_phase_data.index[-1]]
(u,) = dfFull.index.get_indexer_for([test_phase_data.index[batch * batch_size]])
dfTrain = dfFull.iloc[u - (train_size):u]
# Normalization
dfTrain.ix[:, :-1] = (dfTrain.ix[:, :-1] - dfTrain.ix[:, :-1].mean()) / dfTrain.ix[:, :-1].max() - dfTrain.ix[:, :-1].min())
dfTest.ix[:, :-1] = (dfTest.ix[:, :-1] - dfTrain.ix[:, :-1].mean()) / dfTrain.ix[:, :-1].max() - dfTrain.ix[:, :-1].min())
pca_dim = 246
pca = PCA(n_components=pca_dim)
pca.fit(dfTrain.ix[:, :-1])
chimp_train = ChimpBot(dfTrain)
for i in range(1500):
for l in range(len(chimp_train.env)):
# print("Train Round {0}-{1}".format(i + 1, l + 1))
chimp_train.update()
chimp_train.reset()
# Test the Chimp!
q_df = deepcopy(chimp_train.q_df)
q_dict = deepcopy(chimp_train.q_dict)
q_reg = deepcopy(chimp_train.q_reg)
try:
_ = chimp_test
except NameError:
print("First time running...")
else:
cash = chimp_test.cash
share = chimp_test.share
pv = chimp_test.pv
now_yes_share = chimp_test.now_yes_share
chimp_test = ChimpBot(dfTest, cash=cash, share=share, pv=pv, now_yes_share=now_yes_share)
chimp_test.q_df = deepcopy(q_df)
chimp_test.q_dict = deepcopy(q_dict)
chimp_test.q_reg = deepcopy(q_reg)
chimp_test.epsilon = 0
# Pass the cheatsheet to the next chimp
try:
chimp_test.prev_states = prev_states
chimp_test.now_action = now_action
chimp_test.prev_action = prev_action
chimp_test.prev_yes_share = prev_yes_share
chimp_test.reward = reward
chimp_test.prev_cash = prev_cash
chimp_test.prev_share = prev_share
chimp_test.prev_pv = prev_pv
chimp_test.prev_env_index = prev_env_index
except UnboundLocalError:
print("No cheatsheet to pass over yet...no worries!")
for l in range(len(chimp_test.env)):
# print("Train Round {0}-{1}".format(i + 1, l + 1))
chimp_test.update()
# Create cheatsheet for the next chimp
prev_states = chimp_test.prev_states
now_action = chimp_test.now_action
prev_action = chimp_test.prev_action
prev_yes_share = chimp_test.prev_yes_share
prev_env_index = chimp_test.prev_env_index
reward = chimp_test.reward
prev_cash = chimp_test.prev_cash
prev_share = chimp_test.prev_share
prev_pv = chimp_test.prev_pv
global action_lists
action_lists.append(chimp_test.action_list)
global pv_history_list
pv_history_list.append(chimp_test.cash + chimp_test.pv)
if (batch + 1) % 3 == 0:
print(pv_history_list)
print(pv_history_list)
In [110]:
# PCA Definition
# feature_days = 21 * 6 = 126
# n = 271: 0.96 <-- goldilock 1
# n = 130: 0.895 <-- goldilock 2
# Going with n = 246
pca_dim = 246
pca = PCA(n_components=pca_dim)
pca.fit(df_features[:last_training_day]) # all data
print(np.sum(pca.explained_variance_ratio_))
# print(pca.explained_variance_ratio_)
print("Running PCA took {} seconds".format(time.time() - start_time))
# PCA transformation
cp_imp = pca.explained_variance_ratio_
cp_imp = pd.Series(cp_imp)
cp_imp_cum = cp_imp.cumsum()
cp_imp_cum.plot()
df_features_pca = pca.transform(df_features)
df_features_pca = pd.DataFrame(data=df_features_pca)
df_features_pca['Date'] = df_features.index
df_features_pca.set_index(df_features_pca['Date'], inplace=True)
del df_features_pca.index.name
del df_features_pca['Date']
# Normalization post PCA
df_features_pca = (df_features_pca - df_features_pca[:last_training_day].mean()) / (df_features_pca[:last_training_day].max() - df_features_pca[:last_training_day].min())
# Reconstruct dataset
df_full = df_features_pca
df_full = df_features_pca['Trade Price'] = df_labels
df_full = df_features_pca
display(df_full.head())
# Start simulation for the chimp
num_iter = 1500
day_count = 0
pv_history_list = []
chimp = ChimpBot(df_full)
start_time = time.time()
for i in range(num_iter):
for l in range(len(chimp.env)):
chimp.update()
pv_history_list.append(chimp.cash + chimp.pv)
print(pv_history_list[-1])
chimp.reset()
print("{0} rounds of training took {1} seconds".format(num_iter, time.time() - start_time))
# Convert Q-Table to Dataframe from trained chimp (full)
result_dict = defaultdict(list)
for index, row in chimp.q_dict_analysis.iteritems():
for i in range(len(chimp.q_dict_analysis.keys()[0])):
column_name = 'col' + str(i + 1)
result_dict[column_name].append(index[i])
result_dict['Q'].append(chimp.q_dict_analysis[index][0])
result_dict['Date'].append(chimp.q_dict_analysis[index][1])
q_df = pd.DataFrame(result_dict)
In [ ]: