In [ ]:
import gc
from itertools import count
import time
import numpy as np
import gym
from agent.agent import Agent
from visdom import Visdom
from config import ACCESS_SIZE

viz = Visdom()
assert viz.check_connection()


MAX_EPISODES = 1000

env = gym.make('BipedalWalker-v2')
print(env.action_space.high)
print(env.action_space.low)
print(env.observation_space.high)
print(env.observation_space.low)

state_size = env.observation_space.shape[0]
action_size = env.action_space.shape[0]
agent = Agent(state_size, action_size, ACCESS_SIZE)
agent.restore_models(1000)

state = env.reset()
for _ in range(ACCESS_SIZE):
    action = np.clip(agent(state) + agent.get_noise(), -1, 1)
    next_state, reward, done, info = env.step(action)
    agent.append(state, action, reward, done, next_state)
    state = next_state
    if done:
        state = env.reset()


def to_np(scale):
    return np.array([scale])


viz_reward = viz.line(X=to_np(0), Y=to_np(0))
time.sleep(1)
viz_length = viz.line(X=to_np(0), Y=to_np(0))

for _ep in range(MAX_EPISODES):
    episode_length = 0
    episode_reward = 0
    state = env.reset()
    agent.noise.reset()
    for step in count(1):
        # env.render()
        action = np.clip(agent(state) + agent.get_noise(), -1, 1)
        next_state, reward, done, info = env.step(action)
        agent.append(state, action, reward, done, next_state)
        state = next_state
        agent.optimize()

        episode_reward += reward
        if step >= 1000:
            viz.line(X=to_np(_ep+1), Y=to_np(episode_reward), win=viz_reward, update="append")
            time.sleep(0.01)
            viz.line(X=to_np(_ep+1), Y=to_np(step), win=viz_length, update="append")
            break

        if done:
            viz.line(X=to_np(_ep+1), Y=to_np(episode_reward), win=viz_reward, update="append")
            time.sleep(0.01)
            viz.line(X=to_np(_ep+1), Y=to_np(step), win=viz_length, update="append")
            break
    print(_ep, done, step, episode_reward)
    gc.collect()

    if _ep % 1000 == 0:
        print("{} - score: {}".format(_ep, step))
        agent.save_models(_ep)

In [ ]: