In [10]:

    
# Logger
import logging
logging.basicConfig(level=logging.INFO)

# Import local paths
import sys, os
sys.path.append(os.path.abspath('../../..'))
sys.path.append(os.path.abspath('../../../../openai-envs'))

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS module
from lcs.agents import EnvironmentAdapter
from lcs.agents.acs2 import ACS2, Configuration, ClassifiersList

# Load environments
import gym
import gym_corridor









    



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

Corridor

Actions:

MOVE_LEFT = 0
MOVE_RIGHT = 1



In [11]:

    
class CorridorAdapter(EnvironmentAdapter):
    @staticmethod
    def to_genotype(phenotype):
        return phenotype,



In [12]:

    
cfg = Configuration(
        classifier_length=1,
        number_of_possible_actions=2,
        epsilon=1.0,
        beta=0.05,
        gamma=0.95,
        theta_exp=50,
        theta_ga=50,
        do_ga=True,
        mu=0.03,
        u_max=1,
        metrics_trial_frequency=20,
        environment_adapter=CorridorAdapter)



In [13]:

    
def print_simple_stats(population, metrics):
    pop_size = len(population)
    
    # avg step in trial
    steps = []
    for m in metrics:
        steps.append(m['steps_in_trial'])

    print(f"Population of {pop_size}, avg steps {sum(steps)/len(steps)}")

Corridor 20

Exploration



In [14]:

    
corridor = gym.make('corridor-20-v0')



In [15]:

    
%%time
agent = ACS2(cfg)
population, metrics = agent.explore(corridor, 1000)









    



INFO:lcs.agents.Agent:{'trial': 0, 'steps_in_trial': 101, 'reward': 1000}
INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 200, 'reward': 0}






    



CPU times: user 31.8 s, sys: 157 ms, total: 32 s
Wall time: 32.5 s



In [16]:

    
print_simple_stats(population, metrics)









    



Population of 38, avg steps 160.28



In [17]:

    
sorted(population, key=lambda cl: -cl.fitness)[:5]









    Out[17]:





[19 1 20               (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 262 tga: 157928 talp: 157928 tav: 1.14e+03 num: 20,
 18 1 19               (empty)               q: 1.0   r: 950.0  ir: 0.0    f: 950.0  exp: 475 tga: 157915 talp: 157928 tav: 6.32e+02 num: 20,
 17 1 18               (empty)               q: 1.0   r: 902.5  ir: 0.0    f: 902.5  exp: 640 tga: 157914 talp: 157927 tav: 4.45e+02 num: 20,
 19 0 18               (empty)               q: 1.0   r: 902.4  ir: 0.0    f: 902.4  exp: 264 tga: 157916 talp: 157917 tav: 1.1e+03 num: 20,
 16 1 17               (empty)               q: 1.0   r: 857.4  ir: 0.0    f: 857.4  exp: 752 tga: 157925 talp: 157926 tav: 4.57e+02 num: 20]

Exploitation



In [19]:

    
%%time
agent = ACS2(cfg, population)
pop_exploit, metric_exploit = agent.exploit(corridor, 100)









    



INFO:lcs.agents.Agent:{'trial': 0, 'steps_in_trial': 4, 'reward': 1000}






    



CPU times: user 106 ms, sys: 2.86 ms, total: 108 ms
Wall time: 114 ms



In [20]:

    
print_simple_stats(pop_exploit, metric_exploit)









    



Population of 38, avg steps 11.8



In [21]:

    
sorted(pop_exploit, key=lambda cl: -cl.fitness)[:5]









    Out[21]:





[19 1 20               (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 262 tga: 157928 talp: 157928 tav: 1.14e+03 num: 20,
 18 1 19               (empty)               q: 1.0   r: 950.0  ir: 0.0    f: 950.0  exp: 475 tga: 157915 talp: 157928 tav: 6.32e+02 num: 20,
 17 1 18               (empty)               q: 1.0   r: 902.5  ir: 0.0    f: 902.5  exp: 640 tga: 157914 talp: 157927 tav: 4.45e+02 num: 20,
 19 0 18               (empty)               q: 1.0   r: 902.4  ir: 0.0    f: 902.4  exp: 264 tga: 157916 talp: 157917 tav: 1.1e+03 num: 20,
 16 1 17               (empty)               q: 1.0   r: 857.4  ir: 0.0    f: 857.4  exp: 752 tga: 157925 talp: 157926 tav: 4.57e+02 num: 20]