In [ ]:
import rl.callbacks
import warnings
import timeit
import json
from tempfile import mkdtemp
from action import Action

import numpy as np

In [ ]:
# https://github.com/matthiasplappert/keras-rl/blob/3cfe1f16b3d4911f3c8270880a8e2ac75180a136/rl/callbacks.py#L104
class EpisodeLogger(rl.callbacks.TrainEpisodeLogger):
    def __init__(self, logger):
        self._logger = logger
        super().__init__()

    def on_train_begin(self, logs):
        self.train_start = timeit.default_timer()
        self.metrics_names = self.model.metrics_names
        self._logger.critical('Training for {} steps ...'.format(self.params['nb_steps'])) # ここのみ変更

    def on_train_end(self, logs):
        duration = timeit.default_timer() - self.train_start
        self._logger.critical('done, took {:.3f} seconds'.format(duration)) # ここのみ変更
        
    def on_episode_end(self, episode, logs):
        duration = timeit.default_timer() - self.episode_start[episode]
        episode_steps = len(self.observations[episode])

        # Format all metrics.
        metrics = np.array(self.metrics[episode])
        metrics_template = ''
        metrics_variables = []
        with warnings.catch_warnings():
            warnings.filterwarnings('error')
            for idx, name in enumerate(self.metrics_names):
                if idx > 0:
                    metrics_template += ', '
                try:
                    value = np.nanmean(metrics[:, idx])
                    metrics_template += '{}: {:f}'
                except Warning:
                    value = '--'
                    metrics_template += '{}: {}'
                metrics_variables += [name, value]          
        metrics_text = metrics_template.format(*metrics_variables)

        nb_step_digits = str(int(np.ceil(np.log10(self.params['nb_steps']))) + 1)
        template = '{step: ' + nb_step_digits + 'd}/{nb_steps}: episode: {episode}, duration: {duration:.3f}s, episode steps: {episode_steps}, steps per second: {sps:.0f}, episode reward: {episode_reward:.3f}, mean reward: {reward_mean:.3f} [{reward_min:.3f}, {reward_max:.3f}], mean action: {action_mean:.3f} [{action_min:.3f}, {action_max:.3f}], mean observation: {obs_mean:.3f} [{obs_min:.3f}, {obs_max:.3f}], {metrics}'
        variables = {
            'step': self.step,
            'nb_steps': self.params['nb_steps'],
            'episode': episode + 1,
            'duration': duration,
            'episode_steps': episode_steps,
            'sps': float(episode_steps) / duration,
            'episode_reward': np.sum(self.rewards[episode]),
            'reward_mean': np.mean(self.rewards[episode]),
            'reward_min': np.min(self.rewards[episode]),
            'reward_max': np.max(self.rewards[episode]),
            'action_mean': np.mean(self.actions[episode]),
            'action_min': np.min(self.actions[episode]),
            'action_max': np.max(self.actions[episode]),
            'obs_mean': np.mean(self.observations[episode]),
            'obs_min': np.min(self.observations[episode]),
            'obs_max': np.max(self.observations[episode]),
            'metrics': metrics_text,
        }
        self._logger.error(template.format(**variables)) # ここのみ変更

        # Free up resources.
        del self.episode_start[episode]
        del self.observations[episode]
        del self.rewards[episode]
        del self.actions[episode]
        del self.metrics[episode]

    #def on_step_end(self, step, logs):
    #    episode = logs['episode']
    #    self.observations[episode].append(logs['observation'])
    #    self.rewards[episode].append(logs['reward'])
    #    self.actions[episode].append(logs['action'])
    #    self.metrics[episode].append(logs['metrics'])
    #    self.step += 1
    #    print('[%6d] %3.5e %s' % (step, logs['reward'], Action(logs['action'])))