In [36]:
# Logger
import logging
logging.basicConfig(level=logging.INFO)
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['pdf.use14corefonts'] = True
# Import local paths
import sys, os
sys.path.append(os.path.abspath('../../..'))
sys.path.append(os.path.abspath('../../../../openai-envs'))
# Enable automatic module reload
%load_ext autoreload
%autoreload 2
# Load PyALCS module
from lcs.representations.RealValueEncoder import RealValueEncoder
from lcs.agents.racs import RACS, Configuration
# Load OpenAI environments
import gym
import gym_multiplexer
In 3-bit multiplexer first bit is responsible for addressing the correct answer.
For testing purposes the last bit is used as a flag indicating if the action chosen by the algorithm was correct. The environment always introduces it with 0. It's value is changed to 1 if the action was chosen correctly.
In [2]:
# Environment initialization
rmpx = gym.make('real-multiplexer-3bit-v0')
rMPX acts like a normal MPX with the difference that instead on boolean values floating numbers within range [0,1] are used. Environment used internal threshold to transform them into boolean values (threshold=0.5 is used as default).
In [3]:
state = rmpx.reset()
state
Out[3]:
Which corresponds to the following binary MPX:
In [4]:
threshold = 0.5 # secret knowledge
binary_state = map(lambda x: 1 if x > threshold else 0, state)
list(binary_state)
Out[4]:
Depending on the MPX size we have some actions to be executed. In 3-bit variant first bit (address bit) points to the correct answer (two possible values).
In [5]:
rmpx.action_space.n
Out[5]:
We can also investigate the properties of a single observation (like vector dimension, lower and upper bound of each attribute).
In [6]:
print(f"Observation shape: {rmpx.observation_space.shape}")
print(f"Upper bounds: {rmpx.observation_space.low}")
print(f"Lower bounds: {rmpx.observation_space.high}")
In [7]:
def encode(bits, val):
return int(RealValueEncoder(bits).encode(val))
r = np.arange(0, 1.1, .1)
df = pd.DataFrame(r, columns=['Perception'])
for bit in [1, 2, 3, 4, 5, 6, 7]:
df[f'{bit}-bit'] = df.apply(lambda row: encode(bit, row['Perception']), axis=1)
df.set_index('Perception', inplace=True)
df
Out[7]:
In [8]:
# Define function for fetching interesing metrics
def rmpx_metrics(population, environment):
return {
'population': len(population),
'numerosity': sum(cl.num for cl in population),
'reliable': len([cl for cl in population if cl.is_reliable()])
}
Define a function for executing experiments
In [9]:
def perform_experiment(env, encoder_bits, trials):
# create configuration
cfg = Configuration(env.observation_space.shape[0], env.action_space.n,
encoder=RealValueEncoder(encoder_bits),
metrics_trial_frequency=5,
user_metrics_collector_fcn=rmpx_metrics,
epsilon=1.0, # no biased exploration
do_ga=True,
theta_r=0.9, # reliablity threshold
theta_i=0.2, # inadequacy threshold
theta_ga=100,
cover_noise=0,
mutation_noise=0.25,
chi=1.0, # cross-over probability
mu=0.1) # mutation probability
# create agent
agent = RACS(cfg)
# run computations
population, metrics = agent.explore_exploit(env, trials)
return population, metrics
In [10]:
def parse_metrics(metrics):
lst = [[d['trial'], d['reward'], d['population'], d['reliable']] for d in metrics]
df = pd.DataFrame(lst, columns=['trial', 'reward', 'population', 'reliable'])
df = df.set_index('trial')
df['mode'] = df.index.map(lambda t: "explore" if t % 2 == 0 else "exploit")
return df
In [41]:
def plot_results(metrics, env, encoder_bits, major_ticker_freq=2000, out_path=None):
# parse metrics into data frame
df = parse_metrics(metrics)
# separate explore/exploit data
explore_df = df[df['mode'] == 'explore']
exploit_df = df[df['mode'] == 'exploit']
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
f.suptitle(f"{env.env.spec.id}, ubr encoder bits: {encoder_bits}", fontsize=14)
# plot 1 - average reward
explore_df['reward'].rolling(window=50).mean().plot(label='explore', ax=ax1)
exploit_df['reward'].rolling(window=50).mean().plot(label='exploit', ax=ax1)
ax1.axhline(y=500, color='black', linestyle='--', linewidth=1)
ax1.axhline(y=1000, color='black', linestyle='--', linewidth=1)
# ax1.annotate('maximum reward', xy=(500, 950), xycoords='data')
ax1.set_title('Average reward')
ax1.set_xlabel('Trial')
ax1.set_ylabel('Average reward')
ax1.set_ylim(400, 1100)
ax1.xaxis.set_major_locator(ticker.MultipleLocator(major_ticker_freq))
ax1.legend()
# plot 2 - number of classifiers
df['population'].plot(label='population', ax=ax2)
df['reliable'].plot(label='reliable', ax=ax2)
ax2.set_title("Classifiers evolution")
ax2.set_xlabel('Trial')
ax2.set_ylabel('# Classifiers')
ax2.xaxis.set_major_locator(ticker.MultipleLocator(major_ticker_freq))
ax2.legend()
if out_path:
f.savefig(out_path, bbox_inches='tight')
In [12]:
def evaluate(rmpx, encoder_bits, trials=10_000):
population, metrics = perform_experiment(rmpx, encoder_bits=encoder_bits, trials=trials)
# sort classifiers in population according to action
population = sorted(population, key=lambda cl: cl.action)
return population, metrics
In [13]:
rmpx3 = gym.make('real-multiplexer-3bit-v0')
In [40]:
%%time
_, metrics1 = evaluate(rmpx3, encoder_bits=1, trials=10_000)
In [43]:
plot_results(metrics1, rmpx3, encoder_bits=1, out_path = 'rmpx3bit_ubr1bit.pdf')
In [31]:
%%time
pop2, metrics2 = evaluate(rmpx3, encoder_bits=2, trials=15_000)
In [44]:
plot_results(metrics2, rmpx3, encoder_bits=2, major_ticker_freq=3000, out_path = 'rmpx3bit_ubr2bit.pdf')
In [33]:
from itertools import groupby
rel = [cl for cl in pop2 if cl.is_reliable()]
for k, g in groupby(rel, key=lambda cl: cl.action):
niche = list(g)
print(f'\nAction: {k}, classifiers: {len(niche)}')
for cl in sorted(niche, key=lambda cl: -cl.fitness):
print(f'{cl}')
In [34]:
%%time
_, metrics = evaluate(rmpx3, encoder_bits=3, trials=20_000)
In [50]:
plot_results(metrics, rmpx3, encoder_bits=3, major_ticker_freq=4000, out_path = 'rmpx3bit_ubr3bit.pdf')
In [48]:
#%%time
_, metrics4 = evaluate(rmpx3, encoder_bits=4, trials=40_000)
In [51]:
plot_results(metrics4, rmpx3, encoder_bits=4, major_ticker_freq=10000, out_path = 'rmpx3bit_ubr4bit.pdf')