In [10]:
# Logger
import logging
logging.basicConfig(level=logging.INFO)
# Import local paths
import sys, os
sys.path.append(os.path.abspath('../../..'))
sys.path.append(os.path.abspath('../../../../openai-envs'))
# Enable automatic module reload
%load_ext autoreload
%autoreload 2
# Load PyALCS module
from lcs.agents import EnvironmentAdapter
from lcs.agents.acs2 import ACS2, Configuration, ClassifiersList
# Load environments
import gym
import gym_corridor
In [11]:
class CorridorAdapter(EnvironmentAdapter):
@staticmethod
def to_genotype(phenotype):
return phenotype,
In [12]:
cfg = Configuration(
classifier_length=1,
number_of_possible_actions=2,
epsilon=1.0,
beta=0.05,
gamma=0.95,
theta_exp=50,
theta_ga=50,
do_ga=True,
mu=0.03,
u_max=1,
metrics_trial_frequency=20,
environment_adapter=CorridorAdapter)
In [13]:
def print_simple_stats(population, metrics):
pop_size = len(population)
# avg step in trial
steps = []
for m in metrics:
steps.append(m['steps_in_trial'])
print(f"Population of {pop_size}, avg steps {sum(steps)/len(steps)}")
In [14]:
corridor = gym.make('corridor-20-v0')
In [15]:
%%time
agent = ACS2(cfg)
population, metrics = agent.explore(corridor, 1000)
In [16]:
print_simple_stats(population, metrics)
In [17]:
sorted(population, key=lambda cl: -cl.fitness)[:5]
Out[17]:
In [19]:
%%time
agent = ACS2(cfg, population)
pop_exploit, metric_exploit = agent.exploit(corridor, 100)
In [20]:
print_simple_stats(pop_exploit, metric_exploit)
In [21]:
sorted(pop_exploit, key=lambda cl: -cl.fitness)[:5]
Out[21]: