In [1]:
import gym
import pylab
import random
import numpy as np
from collections import deque

In [2]:
import os    
os.environ['THEANO_FLAGS'] = "device=cuda*"  
import theano


ERROR (theano.gpuarray): pygpu was configured but could not be imported or is too old (version 0.7 or higher required)
NoneType

In [3]:
#Importing keras Dense (fully connected) layer and Sequential model
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import RMSprop


Using Theano backend.

Task: fill empty spaces in the following agent code


In [4]:
class DeepQAgent:
    def __init__(self, state_size, action_size, render=True):
        # Tip: if you are training this on AWS the best way is to turn off rendering
        # and load it later with the serialized model
        self.render = render
        self.state_size = state_size
        self.action_size = action_size

        self.discount_factor = 0.99
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.005
        self.epsilon_decay = (self.epsilon - self.epsilon_min) / 50000
        self.batch_size = 64
        self.train_start = 1000
        # replay memory
        self.memory = deque(maxlen=10000)

        self.model = self.build_model()
        self.target_model = self.build_model()
        self.update_target_model()

    def build_model(self):
        # Use tflearn to get simple NN for deep q-learning
        # Spoler alert: a couple of fully connected hidden layers should be enough
        # Output layer should have the same dimensionality as the action space
        
        model = Sequential()
        model.add(Dense(128, input_dim = self.state_size, activation='linear', init='lecun_uniform'))
        model.add(Dense(64, activation='relu', init='lecun_uniform'))
        model.add(Dense(32, activation='relu', init='lecun_uniform'))
        model.add(Dense(self.action_size, activation='linear', init='lecun_uniform'))
        model.compile(optimizer=RMSprop(lr=self.learning_rate), loss='mean_squared_error')
        return model


    def update_target_model(self):
        """Update your target model to the model you are currently learning at regular time intervals"""
        self.target_model.set_weights(self.model.get_weights())

    def get_action(self, state):
        """The choice of action uses the epsilon-greedy policy for the current network."""
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        else:
            q_value = self.model.predict(state)
            return np.argmax(q_value[0])

    def replay_memory(self, state, action, reward, next_state, done):
        """Save <s, a, r, s'> to replay_memory"""
        if action == 2:
            action = 1
        self.memory.append((state, action, reward, next_state, done))
        if self.epsilon > self.epsilon_min:
            self.epsilon -= self.epsilon_decay
            # print(len(self.memory))

    def train_replay(self):
        """Random sampling of batch_size samples from replay memory"""
        if len(self.memory) < self.train_start:
            return
        batch_size = min(self.batch_size, len(self.memory))
        mini_batch = random.sample(self.memory, batch_size)

        update_input = np.zeros((batch_size, self.state_size))
        update_target = np.zeros((batch_size, self.action_size))

        for i in range(batch_size):
            state, action, reward, next_state, done = mini_batch[i]
            target = self.model.predict(state)[0]

            # As in queuing, it gets the maximum Q Value at s'. However, it is imported from the target model.
            if done:
                target[action] = reward
            else:
                target[action] = reward + self.discount_factor * \
                                          np.amax(self.target_model.predict(next_state)[0])
            update_input[i] = state
            update_target[i] = target

        # You can create a minibatch of the correct target answer and the current value of your own,
        self.model.fit(update_input, update_target, batch_size=batch_size, epochs=1, verbose=0)

    def load_model(self, name):
        self.model.load_model(name)

    def save_model(self, name):
        self.model.save(name)

In [5]:
env = gym.make('MountainCar-v0')
state_size = env.observation_space.shape[0] # should be equal 2
ACTION_SIZE = 2
agent = DeepQAgent(state_size, ACTION_SIZE)
# agent.load_model("./save_model/<your_saved_model_name>")
scores, episodes = [], []
N_EPISODES = 500


/home/margarita/.local/lib/python3.5/site-packages/ipykernel_launcher.py:29: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(128, activation="linear", kernel_initializer="lecun_uniform", input_dim=2)`
/home/margarita/.local/lib/python3.5/site-packages/ipykernel_launcher.py:30: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(64, activation="relu", kernel_initializer="lecun_uniform")`
/home/margarita/.local/lib/python3.5/site-packages/ipykernel_launcher.py:31: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(32, activation="relu", kernel_initializer="lecun_uniform")`
/home/margarita/.local/lib/python3.5/site-packages/ipykernel_launcher.py:32: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(2, activation="linear", kernel_initializer="lecun_uniform")`
/home/margarita/.local/lib/python3.5/site-packages/ipykernel_launcher.py:29: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(128, activation="linear", kernel_initializer="lecun_uniform", input_dim=2)`

In [6]:
for e in range(N_EPISODES):
    done = False
    score = 0
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    print(state)

    # Action 0 (left), 1 (do nothing), 3 (declare fake_action to avoid doing nothing
    fake_action = 0

    # Counter for the same action 4 times
    action_count = 0

    while not done:
        #if agent.render:
        #    env.render()

        # Select an action in the current state and proceed to a step
        action_count = action_count + 1

        if action_count == 4:
            action = agent.get_action(state)
            action_count = 0

            if action == 0:
                fake_action = 0
            elif action == 1:
                fake_action = 2

        # Take 1 step with the selected action
        next_state, reward, done, info = env.step(fake_action)
        next_state = np.reshape(next_state, [1, state_size])
        # Give a penalty of -100 for actions that end an episode
        # reward = reward if not done else -100

        # Save <s, a, r, s'> to replay memory
        agent.replay_memory(state, fake_action, reward, next_state, done)
        # Continue to learn every time step
        agent.train_replay()
        score += reward
        state = next_state

        if done:
            env.reset()
            # Copy the learning model for each episode to the target model
            agent.update_target_model()

            # For each episode, the time step where cartpole stood is plot
            scores.append(score)
            episodes.append(e)
            print("episode:", e, "  score:", score, "  memory length:", len(agent.memory),
                  "  epsilon:", agent.epsilon)

    # Save model for every 50 episodes
    if e % 50 == 0:
        agent.save_model("save_model/my_model")


[[-0.53583677  0.        ]]
episode: 0   score: -200.0   memory length: 200   epsilon: 0.9960200000000077
[[-0.51427713  0.        ]]
episode: 1   score: -200.0   memory length: 400   epsilon: 0.9920400000000154
[[-0.49273763  0.        ]]
episode: 2   score: -200.0   memory length: 600   epsilon: 0.988060000000023
[[-0.51529317  0.        ]]
episode: 3   score: -200.0   memory length: 800   epsilon: 0.9840800000000307
[[-0.40901322  0.        ]]
episode: 4   score: -200.0   memory length: 1000   epsilon: 0.9801000000000384
[[-0.54091158  0.        ]]
episode: 5   score: -200.0   memory length: 1200   epsilon: 0.9761200000000461
[[-0.54000392  0.        ]]
episode: 6   score: -200.0   memory length: 1400   epsilon: 0.9721400000000537
[[-0.57237521  0.        ]]
episode: 7   score: -200.0   memory length: 1600   epsilon: 0.9681600000000614
[[-0.41698564  0.        ]]
episode: 8   score: -200.0   memory length: 1800   epsilon: 0.9641800000000691
[[-0.47841907  0.        ]]
episode: 9   score: -200.0   memory length: 2000   epsilon: 0.9602000000000768
[[-0.48101037  0.        ]]
episode: 10   score: -200.0   memory length: 2200   epsilon: 0.9562200000000844
[[-0.4009593  0.       ]]
episode: 11   score: -200.0   memory length: 2400   epsilon: 0.9522400000000921
[[-0.54627004  0.        ]]
episode: 12   score: -200.0   memory length: 2600   epsilon: 0.9482600000000998
[[-0.57592189  0.        ]]
episode: 13   score: -200.0   memory length: 2800   epsilon: 0.9442800000001075
[[-0.55303378  0.        ]]
episode: 14   score: -200.0   memory length: 3000   epsilon: 0.9403000000001152
[[-0.51921346  0.        ]]
episode: 15   score: -200.0   memory length: 3200   epsilon: 0.9363200000001228
[[-0.58209287  0.        ]]
episode: 16   score: -200.0   memory length: 3400   epsilon: 0.9323400000001305
[[-0.51444356  0.        ]]
episode: 17   score: -200.0   memory length: 3600   epsilon: 0.9283600000001382
[[-0.48856965  0.        ]]
episode: 18   score: -200.0   memory length: 3800   epsilon: 0.9243800000001459
[[-0.52592939  0.        ]]
episode: 19   score: -200.0   memory length: 4000   epsilon: 0.9204000000001535
[[-0.40226442  0.        ]]
episode: 20   score: -200.0   memory length: 4200   epsilon: 0.9164200000001612
[[-0.56113105  0.        ]]
episode: 21   score: -200.0   memory length: 4400   epsilon: 0.9124400000001689
[[-0.41590624  0.        ]]
episode: 22   score: -200.0   memory length: 4600   epsilon: 0.9084600000001766
[[-0.58668832  0.        ]]
episode: 23   score: -200.0   memory length: 4800   epsilon: 0.9044800000001842
[[-0.57197046  0.        ]]
episode: 24   score: -200.0   memory length: 5000   epsilon: 0.9005000000001919
[[-0.58796145  0.        ]]
episode: 25   score: -200.0   memory length: 5200   epsilon: 0.8965200000001996
[[-0.54007079  0.        ]]
episode: 26   score: -200.0   memory length: 5400   epsilon: 0.8925400000002073
[[-0.55405352  0.        ]]
episode: 27   score: -200.0   memory length: 5600   epsilon: 0.888560000000215
[[-0.45043119  0.        ]]
episode: 28   score: -200.0   memory length: 5800   epsilon: 0.8845800000002226
[[-0.41990999  0.        ]]
episode: 29   score: -200.0   memory length: 6000   epsilon: 0.8806000000002303
[[-0.43730989  0.        ]]
episode: 30   score: -200.0   memory length: 6200   epsilon: 0.876620000000238
[[-0.50100287  0.        ]]
episode: 31   score: -200.0   memory length: 6400   epsilon: 0.8726400000002457
[[-0.49338285  0.        ]]
episode: 32   score: -200.0   memory length: 6600   epsilon: 0.8686600000002533
[[-0.4032447  0.       ]]
episode: 33   score: -200.0   memory length: 6800   epsilon: 0.864680000000261
[[-0.44586296  0.        ]]
episode: 34   score: -200.0   memory length: 7000   epsilon: 0.8607000000002687
[[-0.59018498  0.        ]]
episode: 35   score: -200.0   memory length: 7200   epsilon: 0.8567200000002764
[[-0.49603244  0.        ]]
episode: 36   score: -200.0   memory length: 7400   epsilon: 0.852740000000284
[[-0.54446517  0.        ]]
episode: 37   score: -200.0   memory length: 7600   epsilon: 0.8487600000002917
[[-0.52823535  0.        ]]
episode: 38   score: -200.0   memory length: 7800   epsilon: 0.8447800000002994
[[-0.51298533  0.        ]]
episode: 39   score: -200.0   memory length: 8000   epsilon: 0.8408000000003071
[[-0.41050116  0.        ]]
episode: 40   score: -200.0   memory length: 8200   epsilon: 0.8368200000003148
[[-0.42597386  0.        ]]
episode: 41   score: -200.0   memory length: 8400   epsilon: 0.8328400000003224
[[-0.55355716  0.        ]]
episode: 42   score: -200.0   memory length: 8600   epsilon: 0.8288600000003301
[[-0.56754626  0.        ]]
episode: 43   score: -200.0   memory length: 8800   epsilon: 0.8248800000003378
[[-0.50842845  0.        ]]
episode: 44   score: -200.0   memory length: 9000   epsilon: 0.8209000000003455
[[-0.55137717  0.        ]]
episode: 45   score: -200.0   memory length: 9200   epsilon: 0.8169200000003531
[[-0.53398167  0.        ]]
episode: 46   score: -200.0   memory length: 9400   epsilon: 0.8129400000003608
[[-0.40767622  0.        ]]
episode: 47   score: -200.0   memory length: 9600   epsilon: 0.8089600000003685
[[-0.48439841  0.        ]]
episode: 48   score: -200.0   memory length: 9800   epsilon: 0.8049800000003762
[[-0.47099825  0.        ]]
episode: 49   score: -200.0   memory length: 10000   epsilon: 0.8010000000003838
[[-0.57954626  0.        ]]
episode: 50   score: -200.0   memory length: 10000   epsilon: 0.7970200000003915
[[-0.49926608  0.        ]]
episode: 51   score: -200.0   memory length: 10000   epsilon: 0.7930400000003992
[[-0.49412279  0.        ]]
episode: 52   score: -200.0   memory length: 10000   epsilon: 0.7890600000004069
[[-0.53672628  0.        ]]
episode: 53   score: -200.0   memory length: 10000   epsilon: 0.7850800000004146
[[-0.43159426  0.        ]]
episode: 54   score: -200.0   memory length: 10000   epsilon: 0.7811000000004222
[[-0.54554478  0.        ]]
episode: 55   score: -200.0   memory length: 10000   epsilon: 0.7771200000004299
[[-0.45349228  0.        ]]
episode: 56   score: -200.0   memory length: 10000   epsilon: 0.7731400000004376
[[-0.49099491  0.        ]]
episode: 57   score: -200.0   memory length: 10000   epsilon: 0.7691600000004453
[[-0.57888741  0.        ]]
episode: 58   score: -200.0   memory length: 10000   epsilon: 0.7651800000004529
[[-0.44762946  0.        ]]
episode: 59   score: -200.0   memory length: 10000   epsilon: 0.7612000000004606
[[-0.48877814  0.        ]]
episode: 60   score: -200.0   memory length: 10000   epsilon: 0.7572200000004683
[[-0.43510342  0.        ]]
episode: 61   score: -200.0   memory length: 10000   epsilon: 0.753240000000476
[[-0.52402513  0.        ]]
episode: 62   score: -200.0   memory length: 10000   epsilon: 0.7492600000004837
[[-0.47118868  0.        ]]
episode: 63   score: -200.0   memory length: 10000   epsilon: 0.7452800000004913
[[-0.56399676  0.        ]]
episode: 64   score: -200.0   memory length: 10000   epsilon: 0.741300000000499
[[-0.42158939  0.        ]]
episode: 65   score: -200.0   memory length: 10000   epsilon: 0.7373200000005067
[[-0.47666474  0.        ]]
episode: 66   score: -200.0   memory length: 10000   epsilon: 0.7333400000005144
[[-0.4025322  0.       ]]
episode: 67   score: -200.0   memory length: 10000   epsilon: 0.729360000000522
[[-0.40186572  0.        ]]
episode: 68   score: -200.0   memory length: 10000   epsilon: 0.7253800000005297
[[-0.44114639  0.        ]]
episode: 69   score: -200.0   memory length: 10000   epsilon: 0.7214000000005374
[[-0.49575583  0.        ]]
episode: 70   score: -200.0   memory length: 10000   epsilon: 0.7174200000005451
[[-0.55105772  0.        ]]
episode: 71   score: -200.0   memory length: 10000   epsilon: 0.7134400000005527
[[-0.55503067  0.        ]]
episode: 72   score: -200.0   memory length: 10000   epsilon: 0.7094600000005604
[[-0.52340394  0.        ]]
episode: 73   score: -200.0   memory length: 10000   epsilon: 0.7054800000005681
[[-0.40468782  0.        ]]
episode: 74   score: -200.0   memory length: 10000   epsilon: 0.7015000000005758
[[-0.51053962  0.        ]]
episode: 75   score: -200.0   memory length: 10000   epsilon: 0.6975200000005835
[[-0.48131336  0.        ]]
episode: 76   score: -200.0   memory length: 10000   epsilon: 0.6935400000005911
[[-0.48214496  0.        ]]
episode: 77   score: -200.0   memory length: 10000   epsilon: 0.6895600000005988
[[-0.49975231  0.        ]]
episode: 78   score: -200.0   memory length: 10000   epsilon: 0.6855800000006065
[[-0.59734485  0.        ]]
episode: 79   score: -200.0   memory length: 10000   epsilon: 0.6816000000006142
[[-0.40969727  0.        ]]
episode: 80   score: -200.0   memory length: 10000   epsilon: 0.6776200000006218
[[-0.54055586  0.        ]]
episode: 81   score: -200.0   memory length: 10000   epsilon: 0.6736400000006295
[[-0.41712782  0.        ]]
episode: 82   score: -200.0   memory length: 10000   epsilon: 0.6696600000006372
[[-0.56951423  0.        ]]
episode: 83   score: -200.0   memory length: 10000   epsilon: 0.6656800000006449
[[-0.4025934  0.       ]]
episode: 84   score: -200.0   memory length: 10000   epsilon: 0.6617000000006525
[[-0.51069655  0.        ]]
episode: 85   score: -200.0   memory length: 10000   epsilon: 0.6577200000006602
[[-0.52388745  0.        ]]
episode: 86   score: -200.0   memory length: 10000   epsilon: 0.6537400000006679
[[-0.44822788  0.        ]]
episode: 87   score: -200.0   memory length: 10000   epsilon: 0.6497600000006756
[[-0.55029183  0.        ]]
episode: 88   score: -200.0   memory length: 10000   epsilon: 0.6457800000006833
[[-0.44705405  0.        ]]
episode: 89   score: -200.0   memory length: 10000   epsilon: 0.6418000000006909
[[-0.47693751  0.        ]]
episode: 90   score: -200.0   memory length: 10000   epsilon: 0.6378200000006986
[[-0.49278408  0.        ]]
episode: 91   score: -200.0   memory length: 10000   epsilon: 0.6338400000007063
[[-0.56078766  0.        ]]
episode: 92   score: -200.0   memory length: 10000   epsilon: 0.629860000000714
[[-0.57611755  0.        ]]
episode: 93   score: -200.0   memory length: 10000   epsilon: 0.6258800000007216
[[-0.54468901  0.        ]]
episode: 94   score: -200.0   memory length: 10000   epsilon: 0.6219000000007293
[[-0.52738988  0.        ]]
episode: 95   score: -200.0   memory length: 10000   epsilon: 0.617920000000737
[[-0.46019646  0.        ]]
episode: 96   score: -200.0   memory length: 10000   epsilon: 0.6139400000007447
[[-0.54592069  0.        ]]
episode: 97   score: -200.0   memory length: 10000   epsilon: 0.6099600000007523
[[-0.49624867  0.        ]]
episode: 98   score: -200.0   memory length: 10000   epsilon: 0.60598000000076
[[-0.44538826  0.        ]]
episode: 99   score: -200.0   memory length: 10000   epsilon: 0.6020000000007677
[[-0.57243476  0.        ]]
episode: 100   score: -200.0   memory length: 10000   epsilon: 0.5980200000007754
[[-0.47713665  0.        ]]
episode: 101   score: -200.0   memory length: 10000   epsilon: 0.594040000000783
[[-0.43696973  0.        ]]
episode: 102   score: -200.0   memory length: 10000   epsilon: 0.5900600000007907
[[-0.51822516  0.        ]]
episode: 103   score: -200.0   memory length: 10000   epsilon: 0.5860800000007984
[[-0.57757928  0.        ]]
episode: 104   score: -200.0   memory length: 10000   epsilon: 0.5821000000008061
[[-0.50640183  0.        ]]
episode: 105   score: -200.0   memory length: 10000   epsilon: 0.5781200000008138
[[-0.55401746  0.        ]]
episode: 106   score: -200.0   memory length: 10000   epsilon: 0.5741400000008214
[[-0.56007237  0.        ]]
episode: 107   score: -200.0   memory length: 10000   epsilon: 0.5701600000008291
[[-0.46892224  0.        ]]
episode: 108   score: -200.0   memory length: 10000   epsilon: 0.5661800000008368
[[-0.52506473  0.        ]]
episode: 109   score: -200.0   memory length: 10000   epsilon: 0.5622000000008445
[[-0.5427446  0.       ]]
episode: 110   score: -200.0   memory length: 10000   epsilon: 0.5582200000008521
[[-0.48940847  0.        ]]
episode: 111   score: -200.0   memory length: 10000   epsilon: 0.5542400000008598
[[-0.59065694  0.        ]]
episode: 112   score: -200.0   memory length: 10000   epsilon: 0.5502600000008675
[[-0.47420754  0.        ]]
episode: 113   score: -200.0   memory length: 10000   epsilon: 0.5462800000008752
[[-0.44506599  0.        ]]
episode: 114   score: -200.0   memory length: 10000   epsilon: 0.5423000000008829
[[-0.49423618  0.        ]]
episode: 115   score: -200.0   memory length: 10000   epsilon: 0.5383200000008905
[[-0.58124739  0.        ]]
episode: 116   score: -200.0   memory length: 10000   epsilon: 0.5343400000008982
[[-0.50255614  0.        ]]
episode: 117   score: -200.0   memory length: 10000   epsilon: 0.5303600000009059
[[-0.56708248  0.        ]]
episode: 118   score: -200.0   memory length: 10000   epsilon: 0.5263800000009136
[[-0.52079585  0.        ]]
episode: 119   score: -200.0   memory length: 10000   epsilon: 0.5224000000009212
[[-0.55922569  0.        ]]
episode: 120   score: -200.0   memory length: 10000   epsilon: 0.5184200000009289
[[-0.46360373  0.        ]]
episode: 121   score: -200.0   memory length: 10000   epsilon: 0.5144400000009366
[[-0.52980339  0.        ]]
episode: 122   score: -200.0   memory length: 10000   epsilon: 0.5104600000009443
[[-0.41984305  0.        ]]
episode: 123   score: -163.0   memory length: 10000   epsilon: 0.5072163000009505
[[-0.48018849  0.        ]]
episode: 124   score: -200.0   memory length: 10000   epsilon: 0.5032363000009582
[[-0.4248494  0.       ]]
episode: 125   score: -200.0   memory length: 10000   epsilon: 0.49925630000096377
[[-0.59341968  0.        ]]
episode: 126   score: -200.0   memory length: 10000   epsilon: 0.49527630000096035
[[-0.4542485  0.       ]]
episode: 127   score: -200.0   memory length: 10000   epsilon: 0.4912963000009569
[[-0.59104737  0.        ]]
episode: 128   score: -200.0   memory length: 10000   epsilon: 0.4873163000009535
[[-0.54778854  0.        ]]
episode: 129   score: -200.0   memory length: 10000   epsilon: 0.48333630000095007
[[-0.43671273  0.        ]]
episode: 130   score: -200.0   memory length: 10000   epsilon: 0.47935630000094664
[[-0.46647181  0.        ]]
episode: 131   score: -200.0   memory length: 10000   epsilon: 0.4753763000009432
[[-0.42261082  0.        ]]
episode: 132   score: -200.0   memory length: 10000   epsilon: 0.4713963000009398
[[-0.50903525  0.        ]]
episode: 133   score: -200.0   memory length: 10000   epsilon: 0.46741630000093637
[[-0.51191011  0.        ]]
episode: 134   score: -200.0   memory length: 10000   epsilon: 0.46343630000093294
[[-0.49379035  0.        ]]
episode: 135   score: -200.0   memory length: 10000   epsilon: 0.4594563000009295
[[-0.49663071  0.        ]]
episode: 136   score: -200.0   memory length: 10000   epsilon: 0.4554763000009261
[[-0.47029481  0.        ]]
episode: 137   score: -200.0   memory length: 10000   epsilon: 0.45149630000092267
[[-0.54383056  0.        ]]
episode: 138   score: -200.0   memory length: 10000   epsilon: 0.44751630000091924
[[-0.41537694  0.        ]]
episode: 139   score: -200.0   memory length: 10000   epsilon: 0.4435363000009158
[[-0.51539076  0.        ]]
episode: 140   score: -200.0   memory length: 10000   epsilon: 0.4395563000009124
[[-0.44947841  0.        ]]
episode: 141   score: -200.0   memory length: 10000   epsilon: 0.43557630000090897
[[-0.43729748  0.        ]]
episode: 142   score: -200.0   memory length: 10000   epsilon: 0.43159630000090554
[[-0.55028236  0.        ]]
episode: 143   score: -200.0   memory length: 10000   epsilon: 0.4276163000009021
[[-0.58004906  0.        ]]
episode: 144   score: -200.0   memory length: 10000   epsilon: 0.4236363000008987
[[-0.43187533  0.        ]]
episode: 145   score: -200.0   memory length: 10000   epsilon: 0.41965630000089527
[[-0.5219291  0.       ]]
episode: 146   score: -200.0   memory length: 10000   epsilon: 0.41567630000089184
[[-0.48659875  0.        ]]
episode: 147   score: -200.0   memory length: 10000   epsilon: 0.4116963000008884
[[-0.4143881  0.       ]]
episode: 148   score: -200.0   memory length: 10000   epsilon: 0.407716300000885
[[-0.57761872  0.        ]]
episode: 149   score: -200.0   memory length: 10000   epsilon: 0.40373630000088156
[[-0.58228779  0.        ]]
episode: 150   score: -200.0   memory length: 10000   epsilon: 0.39975630000087814
[[-0.42523175  0.        ]]
episode: 151   score: -158.0   memory length: 10000   epsilon: 0.39661210000087543
[[-0.47001968  0.        ]]
episode: 152   score: -161.0   memory length: 10000   epsilon: 0.3934082000008727
[[-0.46853772  0.        ]]
episode: 153   score: -200.0   memory length: 10000   epsilon: 0.38942820000086925
[[-0.45892435  0.        ]]
episode: 154   score: -200.0   memory length: 10000   epsilon: 0.3854482000008658
[[-0.52258008  0.        ]]
episode: 155   score: -200.0   memory length: 10000   epsilon: 0.3814682000008624
[[-0.56408358  0.        ]]
episode: 156   score: -200.0   memory length: 10000   epsilon: 0.377488200000859
[[-0.52447452  0.        ]]
episode: 157   score: -200.0   memory length: 10000   epsilon: 0.37350820000085555
[[-0.41945954  0.        ]]
episode: 158   score: -200.0   memory length: 10000   epsilon: 0.3695282000008521
[[-0.46652422  0.        ]]
episode: 159   score: -200.0   memory length: 10000   epsilon: 0.3655482000008487
[[-0.52585353  0.        ]]
episode: 160   score: -200.0   memory length: 10000   epsilon: 0.3615682000008453
[[-0.43531502  0.        ]]
episode: 161   score: -200.0   memory length: 10000   epsilon: 0.35758820000084185
[[-0.5895076  0.       ]]
episode: 162   score: -178.0   memory length: 10000   epsilon: 0.3540460000008388
[[-0.54971828  0.        ]]
episode: 163   score: -200.0   memory length: 10000   epsilon: 0.3500660000008354
[[-0.58446289  0.        ]]
episode: 164   score: -148.0   memory length: 10000   epsilon: 0.34712080000083284
[[-0.41082584  0.        ]]
episode: 165   score: -195.0   memory length: 10000   epsilon: 0.3432403000008295
[[-0.56113161  0.        ]]
episode: 166   score: -200.0   memory length: 10000   epsilon: 0.3392603000008261
[[-0.55972957  0.        ]]
episode: 167   score: -178.0   memory length: 10000   epsilon: 0.335718100000823
[[-0.50873074  0.        ]]
episode: 168   score: -200.0   memory length: 10000   epsilon: 0.3317381000008196
[[-0.5234839  0.       ]]
episode: 169   score: -151.0   memory length: 10000   epsilon: 0.328733200000817
[[-0.52122228  0.        ]]
episode: 170   score: -176.0   memory length: 10000   epsilon: 0.325230800000814
[[-0.48051585  0.        ]]
episode: 171   score: -155.0   memory length: 10000   epsilon: 0.32214630000081135
[[-0.43870798  0.        ]]
episode: 172   score: -161.0   memory length: 10000   epsilon: 0.3189424000008086
[[-0.48515461  0.        ]]
episode: 173   score: -200.0   memory length: 10000   epsilon: 0.31496240000080516
[[-0.43877229  0.        ]]
episode: 174   score: -173.0   memory length: 10000   epsilon: 0.3115197000008022
[[-0.47759404  0.        ]]
episode: 175   score: -200.0   memory length: 10000   epsilon: 0.3075397000007988
[[-0.52161798  0.        ]]
episode: 176   score: -158.0   memory length: 10000   epsilon: 0.30439550000079607
[[-0.43560485  0.        ]]
episode: 177   score: -169.0   memory length: 10000   epsilon: 0.3010324000007932
[[-0.59466904  0.        ]]
episode: 178   score: -200.0   memory length: 10000   epsilon: 0.29705240000078975
[[-0.56231917  0.        ]]
episode: 179   score: -123.0   memory length: 10000   epsilon: 0.29460470000078764
[[-0.55216933  0.        ]]
episode: 180   score: -146.0   memory length: 10000   epsilon: 0.29169930000078514
[[-0.58449613  0.        ]]
episode: 181   score: -142.0   memory length: 10000   epsilon: 0.2888735000007827
[[-0.47130237  0.        ]]
episode: 182   score: -200.0   memory length: 10000   epsilon: 0.2848935000007793
[[-0.56965522  0.        ]]
episode: 183   score: -142.0   memory length: 10000   epsilon: 0.28206770000077686
[[-0.43775627  0.        ]]
episode: 184   score: -200.0   memory length: 10000   epsilon: 0.27808770000077343
[[-0.50058993  0.        ]]
episode: 185   score: -184.0   memory length: 10000   epsilon: 0.2744261000007703
[[-0.54816653  0.        ]]
episode: 186   score: -158.0   memory length: 10000   epsilon: 0.2712819000007676
[[-0.48020591  0.        ]]
episode: 187   score: -153.0   memory length: 10000   epsilon: 0.26823720000076495
[[-0.45713646  0.        ]]
episode: 188   score: -200.0   memory length: 10000   epsilon: 0.2642572000007615
[[-0.40749228  0.        ]]
episode: 189   score: -160.0   memory length: 10000   epsilon: 0.2610732000007588
[[-0.42856741  0.        ]]
episode: 190   score: -200.0   memory length: 10000   epsilon: 0.25709320000075536
[[-0.57535217  0.        ]]
episode: 191   score: -144.0   memory length: 10000   epsilon: 0.2542276000007529
[[-0.51040831  0.        ]]
episode: 192   score: -146.0   memory length: 10000   epsilon: 0.2513222000007504
[[-0.57475393  0.        ]]
episode: 193   score: -150.0   memory length: 10000   epsilon: 0.24833720000075016
[[-0.50223638  0.        ]]
episode: 194   score: -148.0   memory length: 10000   epsilon: 0.24539200000075173
[[-0.53262211  0.        ]]
episode: 195   score: -159.0   memory length: 10000   epsilon: 0.24222790000075342
[[-0.56161893  0.        ]]
episode: 196   score: -169.0   memory length: 10000   epsilon: 0.23886480000075522
[[-0.42170116  0.        ]]
episode: 197   score: -160.0   memory length: 10000   epsilon: 0.23568080000075692
[[-0.57719812  0.        ]]
episode: 198   score: -164.0   memory length: 10000   epsilon: 0.23241720000075866
[[-0.54577766  0.        ]]
episode: 199   score: -171.0   memory length: 10000   epsilon: 0.22901430000076048
[[-0.42297623  0.        ]]
episode: 200   score: -156.0   memory length: 10000   epsilon: 0.22590990000076214
[[-0.40603717  0.        ]]
episode: 201   score: -160.0   memory length: 10000   epsilon: 0.22272590000076384
[[-0.41931218  0.        ]]
episode: 202   score: -172.0   memory length: 10000   epsilon: 0.21930310000076567
[[-0.56553196  0.        ]]
episode: 203   score: -200.0   memory length: 10000   epsilon: 0.2153231000007678
[[-0.43784997  0.        ]]
episode: 204   score: -145.0   memory length: 10000   epsilon: 0.21243760000076933
[[-0.49978158  0.        ]]
episode: 205   score: -157.0   memory length: 10000   epsilon: 0.209313300000771
[[-0.57706619  0.        ]]
episode: 206   score: -156.0   memory length: 10000   epsilon: 0.20620890000077266
[[-0.53738194  0.        ]]
episode: 207   score: -147.0   memory length: 10000   epsilon: 0.20328360000077422
[[-0.56905515  0.        ]]
episode: 208   score: -153.0   memory length: 10000   epsilon: 0.20023890000077585
[[-0.55252216  0.        ]]
episode: 209   score: -161.0   memory length: 10000   epsilon: 0.19703500000077756
[[-0.56202566  0.        ]]
episode: 210   score: -200.0   memory length: 10000   epsilon: 0.1930550000007797
[[-0.4230178  0.       ]]
episode: 211   score: -90.0   memory length: 10000   epsilon: 0.19126400000078064
[[-0.53858058  0.        ]]
episode: 212   score: -154.0   memory length: 10000   epsilon: 0.18819940000078228
[[-0.57262015  0.        ]]
episode: 213   score: -185.0   memory length: 10000   epsilon: 0.18451790000078425
[[-0.42122182  0.        ]]
episode: 214   score: -186.0   memory length: 10000   epsilon: 0.18081650000078622
[[-0.42174627  0.        ]]
episode: 215   score: -154.0   memory length: 10000   epsilon: 0.17775190000078786
[[-0.45490762  0.        ]]
episode: 216   score: -173.0   memory length: 10000   epsilon: 0.1743092000007897
[[-0.40830367  0.        ]]
episode: 217   score: -98.0   memory length: 10000   epsilon: 0.17235900000079074
[[-0.58589035  0.        ]]
episode: 218   score: -200.0   memory length: 10000   epsilon: 0.16837900000079287
[[-0.4268946  0.       ]]
episode: 219   score: -87.0   memory length: 10000   epsilon: 0.1666477000007938
[[-0.53504626  0.        ]]
episode: 220   score: -159.0   memory length: 10000   epsilon: 0.16348360000079548
[[-0.40699699  0.        ]]
episode: 221   score: -158.0   memory length: 10000   epsilon: 0.16033940000079716
[[-0.50566597  0.        ]]
episode: 222   score: -165.0   memory length: 10000   epsilon: 0.15705590000079891
[[-0.5925687  0.       ]]
episode: 223   score: -158.0   memory length: 10000   epsilon: 0.1539117000008006
[[-0.41324602  0.        ]]
episode: 224   score: -160.0   memory length: 10000   epsilon: 0.1507277000008023
[[-0.58039285  0.        ]]
episode: 225   score: -158.0   memory length: 10000   epsilon: 0.14758350000080397
[[-0.42648443  0.        ]]
episode: 226   score: -87.0   memory length: 10000   epsilon: 0.1458522000008049
[[-0.50708814  0.        ]]
episode: 227   score: -149.0   memory length: 10000   epsilon: 0.14288710000080648
[[-0.41249059  0.        ]]
episode: 228   score: -84.0   memory length: 10000   epsilon: 0.14121550000080738
[[-0.42028637  0.        ]]
episode: 229   score: -93.0   memory length: 10000   epsilon: 0.13936480000080836
[[-0.43533363  0.        ]]
episode: 230   score: -89.0   memory length: 10000   epsilon: 0.1375937000008093
[[-0.4360518  0.       ]]
episode: 231   score: -159.0   memory length: 10000   epsilon: 0.134429600000811
[[-0.56222776  0.        ]]
episode: 232   score: -173.0   memory length: 10000   epsilon: 0.13098690000081284
[[-0.52297067  0.        ]]
episode: 233   score: -161.0   memory length: 10000   epsilon: 0.12778300000081455
[[-0.40397583  0.        ]]
episode: 234   score: -91.0   memory length: 10000   epsilon: 0.12597210000081552
[[-0.56797411  0.        ]]
episode: 235   score: -137.0   memory length: 10000   epsilon: 0.12324580000081574
[[-0.42016342  0.        ]]
episode: 236   score: -85.0   memory length: 10000   epsilon: 0.12155430000081546
[[-0.47226652  0.        ]]
episode: 237   score: -176.0   memory length: 10000   epsilon: 0.11805190000081489
[[-0.51257929  0.        ]]
episode: 238   score: -200.0   memory length: 10000   epsilon: 0.11407190000081424
[[-0.47264555  0.        ]]
episode: 239   score: -200.0   memory length: 10000   epsilon: 0.11009190000081359
[[-0.50994982  0.        ]]
episode: 240   score: -200.0   memory length: 10000   epsilon: 0.10611190000081294
[[-0.40089041  0.        ]]
episode: 241   score: -164.0   memory length: 10000   epsilon: 0.10284830000081241
[[-0.5026515  0.       ]]
episode: 242   score: -155.0   memory length: 10000   epsilon: 0.0997638000008119
[[-0.58479386  0.        ]]
episode: 243   score: -200.0   memory length: 10000   epsilon: 0.09578380000081126
[[-0.4312842  0.       ]]
episode: 244   score: -155.0   memory length: 10000   epsilon: 0.09269930000081075
[[-0.46443047  0.        ]]
episode: 245   score: -157.0   memory length: 10000   epsilon: 0.08957500000081024
[[-0.56783193  0.        ]]
episode: 246   score: -170.0   memory length: 10000   epsilon: 0.08619200000080969
[[-0.58014049  0.        ]]
episode: 247   score: -162.0   memory length: 10000   epsilon: 0.08296820000080916
[[-0.59985676  0.        ]]
episode: 248   score: -168.0   memory length: 10000   epsilon: 0.07962500000080862
[[-0.43380759  0.        ]]
episode: 249   score: -87.0   memory length: 10000   epsilon: 0.07789370000080834
[[-0.58061307  0.        ]]
episode: 250   score: -114.0   memory length: 10000   epsilon: 0.07562510000080797
[[-0.4985184  0.       ]]
episode: 251   score: -150.0   memory length: 10000   epsilon: 0.07264010000080748
[[-0.55453915  0.        ]]
episode: 252   score: -153.0   memory length: 10000   epsilon: 0.06959540000080698
[[-0.46036644  0.        ]]
episode: 253   score: -94.0   memory length: 10000   epsilon: 0.06772480000080668
[[-0.41446941  0.        ]]
episode: 254   score: -184.0   memory length: 10000   epsilon: 0.06406320000080608
[[-0.59681357  0.        ]]
episode: 255   score: -138.0   memory length: 10000   epsilon: 0.06131700000080563
[[-0.5496289  0.       ]]
episode: 256   score: -142.0   memory length: 10000   epsilon: 0.05849120000080517
[[-0.50045515  0.        ]]
episode: 257   score: -156.0   memory length: 10000   epsilon: 0.05538680000080466
[[-0.56675928  0.        ]]
episode: 258   score: -200.0   memory length: 10000   epsilon: 0.05140680000080401
[[-0.57558011  0.        ]]
episode: 259   score: -145.0   memory length: 10000   epsilon: 0.04852130000080354
[[-0.47796024  0.        ]]
episode: 260   score: -122.0   memory length: 10000   epsilon: 0.046093500000803145
[[-0.59081463  0.        ]]
episode: 261   score: -200.0   memory length: 10000   epsilon: 0.042113500000802495
[[-0.55002963  0.        ]]
episode: 262   score: -110.0   memory length: 10000   epsilon: 0.03992450000080214
[[-0.52043081  0.        ]]
episode: 263   score: -121.0   memory length: 10000   epsilon: 0.037516600000801745
[[-0.59769522  0.        ]]
episode: 264   score: -110.0   memory length: 10000   epsilon: 0.03532760000080139
[[-0.51437978  0.        ]]
episode: 265   score: -110.0   memory length: 10000   epsilon: 0.03313860000080103
[[-0.44893587  0.        ]]
episode: 266   score: -146.0   memory length: 10000   epsilon: 0.030233200000800736
[[-0.51685797  0.        ]]
episode: 267   score: -160.0   memory length: 10000   epsilon: 0.02704920000080077
[[-0.47462  0.     ]]
episode: 268   score: -104.0   memory length: 10000   epsilon: 0.024979600000800795
[[-0.5019885  0.       ]]
episode: 269   score: -110.0   memory length: 10000   epsilon: 0.02279060000080082
[[-0.47305762  0.        ]]
episode: 270   score: -158.0   memory length: 10000   epsilon: 0.019646400000800854
[[-0.51073562  0.        ]]
episode: 271   score: -133.0   memory length: 10000   epsilon: 0.016999700000800883
[[-0.4923762  0.       ]]
episode: 272   score: -137.0   memory length: 10000   epsilon: 0.014273400000800913
[[-0.55797097  0.        ]]
episode: 273   score: -127.0   memory length: 10000   epsilon: 0.011746100000800942
[[-0.59900498  0.        ]]
episode: 274   score: -127.0   memory length: 10000   epsilon: 0.00921880000080097
[[-0.58388244  0.        ]]
episode: 275   score: -127.0   memory length: 10000   epsilon: 0.006691500000800998
[[-0.59368756  0.        ]]
episode: 276   score: -124.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57436173  0.        ]]
episode: 277   score: -127.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.56023349  0.        ]]
episode: 278   score: -127.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.45971983  0.        ]]
episode: 279   score: -158.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.56133058  0.        ]]
episode: 280   score: -124.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.58737453  0.        ]]
episode: 281   score: -120.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.58664132  0.        ]]
episode: 282   score: -126.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.50129781  0.        ]]
episode: 283   score: -127.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42404278  0.        ]]
episode: 284   score: -87.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.56099323  0.        ]]
episode: 285   score: -118.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57068995  0.        ]]
episode: 286   score: -129.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.56549358  0.        ]]
episode: 287   score: -121.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53296228  0.        ]]
episode: 288   score: -147.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.48675349  0.        ]]
episode: 289   score: -123.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57565089  0.        ]]
episode: 290   score: -122.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57154522  0.        ]]
episode: 291   score: -128.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.58121677  0.        ]]
episode: 292   score: -124.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.40198841  0.        ]]
episode: 293   score: -83.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53425651  0.        ]]
episode: 294   score: -161.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.56722541  0.        ]]
episode: 295   score: -109.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.52507152  0.        ]]
episode: 296   score: -164.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53589507  0.        ]]
episode: 297   score: -159.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.55385166  0.        ]]
episode: 298   score: -110.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.54766998  0.        ]]
episode: 299   score: -153.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46448562  0.        ]]
episode: 300   score: -117.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.47004932  0.        ]]
episode: 301   score: -104.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.40057286  0.        ]]
episode: 302   score: -92.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.49845152  0.        ]]
episode: 303   score: -113.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.47363956  0.        ]]
episode: 304   score: -108.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53906473  0.        ]]
episode: 305   score: -160.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44759462  0.        ]]
episode: 306   score: -91.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.40253145  0.        ]]
episode: 307   score: -83.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.40896943  0.        ]]
episode: 308   score: -90.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42799413  0.        ]]
episode: 309   score: -88.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44247343  0.        ]]
episode: 310   score: -90.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.4087797  0.       ]]
episode: 311   score: -85.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.55036395  0.        ]]
episode: 312   score: -159.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.58533419  0.        ]]
episode: 313   score: -140.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46214596  0.        ]]
episode: 314   score: -100.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46642114  0.        ]]
episode: 315   score: -97.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.47382992  0.        ]]
episode: 316   score: -101.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44531244  0.        ]]
episode: 317   score: -91.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.5324469  0.       ]]
episode: 318   score: -145.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42072736  0.        ]]
episode: 319   score: -87.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.54400415  0.        ]]
episode: 320   score: -120.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.55189661  0.        ]]
episode: 321   score: -125.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.47664529  0.        ]]
episode: 322   score: -148.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.51036835  0.        ]]
episode: 323   score: -117.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.4128931  0.       ]]
episode: 324   score: -85.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.4829788  0.       ]]
episode: 325   score: -120.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.51114155  0.        ]]
episode: 326   score: -124.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42746789  0.        ]]
episode: 327   score: -161.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53875084  0.        ]]
episode: 328   score: -122.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.50030003  0.        ]]
episode: 329   score: -152.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.4083966  0.       ]]
episode: 330   score: -89.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46417906  0.        ]]
episode: 331   score: -135.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.41793079  0.        ]]
episode: 332   score: -86.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.45696644  0.        ]]
episode: 333   score: -156.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42937115  0.        ]]
episode: 334   score: -138.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42977664  0.        ]]
episode: 335   score: -135.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.56510376  0.        ]]
episode: 336   score: -119.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.41424411  0.        ]]
episode: 337   score: -84.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44434882  0.        ]]
episode: 338   score: -140.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.56055849  0.        ]]
episode: 339   score: -121.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.58669943  0.        ]]
episode: 340   score: -123.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44344659  0.        ]]
episode: 341   score: -134.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42817683  0.        ]]
episode: 342   score: -133.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.59480564  0.        ]]
episode: 343   score: -120.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.45535048  0.        ]]
episode: 344   score: -130.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44825934  0.        ]]
episode: 345   score: -133.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42711792  0.        ]]
episode: 346   score: -95.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.48431452  0.        ]]
episode: 347   score: -127.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.43173724  0.        ]]
episode: 348   score: -96.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.56540694  0.        ]]
episode: 349   score: -119.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.52898539  0.        ]]
episode: 350   score: -115.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.52325385  0.        ]]
episode: 351   score: -125.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.54595474  0.        ]]
episode: 352   score: -122.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57178426  0.        ]]
episode: 353   score: -116.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.4509011  0.       ]]
episode: 354   score: -92.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.41142839  0.        ]]
episode: 355   score: -84.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53808286  0.        ]]
episode: 356   score: -110.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.59866881  0.        ]]
episode: 357   score: -120.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42694446  0.        ]]
episode: 358   score: -86.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.41204268  0.        ]]
episode: 359   score: -84.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.49827709  0.        ]]
episode: 360   score: -183.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.58545606  0.        ]]
episode: 361   score: -117.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.54991615  0.        ]]
episode: 362   score: -119.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.58452317  0.        ]]
episode: 363   score: -125.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46802778  0.        ]]
episode: 364   score: -98.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.54057639  0.        ]]
episode: 365   score: -130.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46797903  0.        ]]
episode: 366   score: -98.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.55943142  0.        ]]
episode: 367   score: -151.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.49272108  0.        ]]
episode: 368   score: -182.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53452328  0.        ]]
episode: 369   score: -162.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.5469488  0.       ]]
episode: 370   score: -131.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.49935266  0.        ]]
episode: 371   score: -165.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.58894034  0.        ]]
episode: 372   score: -128.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.45742762  0.        ]]
episode: 373   score: -157.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.49444265  0.        ]]
episode: 374   score: -177.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.58796947  0.        ]]
episode: 375   score: -125.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.56671306  0.        ]]
episode: 376   score: -121.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.52926234  0.        ]]
episode: 377   score: -151.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.50690089  0.        ]]
episode: 378   score: -160.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.40977119  0.        ]]
episode: 379   score: -88.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42420488  0.        ]]
episode: 380   score: -87.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57502006  0.        ]]
episode: 381   score: -121.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44749625  0.        ]]
episode: 382   score: -90.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.55280011  0.        ]]
episode: 383   score: -116.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.5241961  0.       ]]
episode: 384   score: -119.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.45106694  0.        ]]
episode: 385   score: -92.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53272173  0.        ]]
episode: 386   score: -123.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.50211944  0.        ]]
episode: 387   score: -158.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.58461493  0.        ]]
episode: 388   score: -118.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.52690706  0.        ]]
episode: 389   score: -120.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42406475  0.        ]]
episode: 390   score: -87.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.50219359  0.        ]]
episode: 391   score: -113.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.50944497  0.        ]]
episode: 392   score: -112.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.4499301  0.       ]]
episode: 393   score: -91.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.48664147  0.        ]]
episode: 394   score: -185.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.40250542  0.        ]]
episode: 395   score: -87.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.55940586  0.        ]]
episode: 396   score: -110.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.49638763  0.        ]]
episode: 397   score: -114.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.50415901  0.        ]]
episode: 398   score: -170.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44019124  0.        ]]
episode: 399   score: -88.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.40384005  0.        ]]
episode: 400   score: -85.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42474959  0.        ]]
episode: 401   score: -178.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.49511055  0.        ]]
episode: 402   score: -113.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.40597957  0.        ]]
episode: 403   score: -84.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53177054  0.        ]]
episode: 404   score: -113.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53165586  0.        ]]
episode: 405   score: -115.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.55957347  0.        ]]
episode: 406   score: -110.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.4719085  0.       ]]
episode: 407   score: -193.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.5894683  0.       ]]
episode: 408   score: -121.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46537427  0.        ]]
episode: 409   score: -97.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.40495527  0.        ]]
episode: 410   score: -85.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.45212238  0.        ]]
episode: 411   score: -91.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46413318  0.        ]]
episode: 412   score: -115.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.50960589  0.        ]]
episode: 413   score: -123.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44286115  0.        ]]
episode: 414   score: -92.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.41411773  0.        ]]
episode: 415   score: -84.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.45364027  0.        ]]
episode: 416   score: -97.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.55931187  0.        ]]
episode: 417   score: -119.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.43717269  0.        ]]
episode: 418   score: -88.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.59201388  0.        ]]
episode: 419   score: -170.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.50772654  0.        ]]
episode: 420   score: -130.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.56911362  0.        ]]
episode: 421   score: -115.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.49731741  0.        ]]
episode: 422   score: -114.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.51965117  0.        ]]
episode: 423   score: -112.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.51013238  0.        ]]
episode: 424   score: -120.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57309879  0.        ]]
episode: 425   score: -111.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46422621  0.        ]]
episode: 426   score: -121.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53195501  0.        ]]
episode: 427   score: -119.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57835903  0.        ]]
episode: 428   score: -125.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44561965  0.        ]]
episode: 429   score: -93.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.43761423  0.        ]]
episode: 430   score: -90.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.4553457  0.       ]]
episode: 431   score: -134.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57951188  0.        ]]
episode: 432   score: -116.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44647931  0.        ]]
episode: 433   score: -134.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.58668178  0.        ]]
episode: 434   score: -113.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.51210294  0.        ]]
episode: 435   score: -120.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46755815  0.        ]]
episode: 436   score: -136.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.41013835  0.        ]]
episode: 437   score: -84.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.43157246  0.        ]]
episode: 438   score: -87.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.51453829  0.        ]]
episode: 439   score: -151.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42310999  0.        ]]
episode: 440   score: -86.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.50857887  0.        ]]
episode: 441   score: -129.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.45804664  0.        ]]
episode: 442   score: -93.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53430862  0.        ]]
episode: 443   score: -124.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.52209343  0.        ]]
episode: 444   score: -118.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42046135  0.        ]]
episode: 445   score: -86.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.4167956  0.       ]]
episode: 446   score: -85.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.47986214  0.        ]]
episode: 447   score: -112.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.45925401  0.        ]]
episode: 448   score: -94.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.59370583  0.        ]]
episode: 449   score: -114.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46804653  0.        ]]
episode: 450   score: -98.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.5855297  0.       ]]
episode: 451   score: -119.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.47741411  0.        ]]
episode: 452   score: -107.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.47302522  0.        ]]
episode: 453   score: -101.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57827038  0.        ]]
episode: 454   score: -162.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.58785624  0.        ]]
episode: 455   score: -118.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.48881764  0.        ]]
episode: 456   score: -178.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.54935973  0.        ]]
episode: 457   score: -177.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46545698  0.        ]]
episode: 458   score: -113.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.59434911  0.        ]]
episode: 459   score: -113.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.52461367  0.        ]]
episode: 460   score: -158.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.5577489  0.       ]]
episode: 461   score: -159.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53849395  0.        ]]
episode: 462   score: -159.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.55504775  0.        ]]
episode: 463   score: -165.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.41387258  0.        ]]
episode: 464   score: -84.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46649818  0.        ]]
episode: 465   score: -97.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46896413  0.        ]]
episode: 466   score: -154.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44514699  0.        ]]
episode: 467   score: -90.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.51697761  0.        ]]
episode: 468   score: -156.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.48799183  0.        ]]
episode: 469   score: -160.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.53351568  0.        ]]
episode: 470   score: -158.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57832872  0.        ]]
episode: 471   score: -119.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57713982  0.        ]]
episode: 472   score: -120.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.54895169  0.        ]]
episode: 473   score: -114.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.43991613  0.        ]]
episode: 474   score: -90.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44268352  0.        ]]
episode: 475   score: -89.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.40888225  0.        ]]
episode: 476   score: -85.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.55136662  0.        ]]
episode: 477   score: -116.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.51995644  0.        ]]
episode: 478   score: -157.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.56317807  0.        ]]
episode: 479   score: -121.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.4838376  0.       ]]
episode: 480   score: -157.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57445365  0.        ]]
episode: 481   score: -110.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46989019  0.        ]]
episode: 482   score: -99.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.48953526  0.        ]]
episode: 483   score: -158.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.56655943  0.        ]]
episode: 484   score: -121.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46430587  0.        ]]
episode: 485   score: -156.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.5382927  0.       ]]
episode: 486   score: -111.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.50613699  0.        ]]
episode: 487   score: -111.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.48997816  0.        ]]
episode: 488   score: -119.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.57075207  0.        ]]
episode: 489   score: -121.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.51398182  0.        ]]
episode: 490   score: -117.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.47987869  0.        ]]
episode: 491   score: -107.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.44123889  0.        ]]
episode: 492   score: -90.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.41439868  0.        ]]
episode: 493   score: -84.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.46244858  0.        ]]
episode: 494   score: -95.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.47837307  0.        ]]
episode: 495   score: -105.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.40827071  0.        ]]
episode: 496   score: -84.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.42474999  0.        ]]
episode: 497   score: -86.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.56696973  0.        ]]
episode: 498   score: -124.0   memory length: 10000   epsilon: 0.004980100000801017
[[-0.5458167  0.       ]]
episode: 499   score: -119.0   memory length: 10000   epsilon: 0.004980100000801017

In [9]:
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

plt.figure(figsize=(20,10))

every_i = 5
scores_i = scores[::every_i]
plt.plot(range(len(scores))[::every_i], scores_i);



In [ ]: