In [1]:
import gym
import pylab
import random
import numpy as np
from collections import deque
In [2]:
import os
os.environ['THEANO_FLAGS'] = "device=cuda*"
import theano
ERROR (theano.gpuarray): pygpu was configured but could not be imported or is too old (version 0.7 or higher required)
NoneType
In [3]:
#Importing keras Dense (fully connected) layer and Sequential model
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import RMSprop
Using Theano backend.
In [4]:
class DeepQAgent:
def __init__(self, state_size, action_size, render=True):
# Tip: if you are training this on AWS the best way is to turn off rendering
# and load it later with the serialized model
self.render = render
self.state_size = state_size
self.action_size = action_size
self.discount_factor = 0.99
self.learning_rate = 0.001
self.epsilon = 1.0
self.epsilon_min = 0.005
self.epsilon_decay = (self.epsilon - self.epsilon_min) / 50000
self.batch_size = 64
self.train_start = 1000
# replay memory
self.memory = deque(maxlen=10000)
self.model = self.build_model()
self.target_model = self.build_model()
self.update_target_model()
def build_model(self):
# Use tflearn to get simple NN for deep q-learning
# Spoler alert: a couple of fully connected hidden layers should be enough
# Output layer should have the same dimensionality as the action space
model = Sequential()
model.add(Dense(128, input_dim = self.state_size, activation='linear', init='lecun_uniform'))
model.add(Dense(64, activation='relu', init='lecun_uniform'))
model.add(Dense(32, activation='relu', init='lecun_uniform'))
model.add(Dense(self.action_size, activation='linear', init='lecun_uniform'))
model.compile(optimizer=RMSprop(lr=self.learning_rate), loss='mean_squared_error')
return model
def update_target_model(self):
"""Update your target model to the model you are currently learning at regular time intervals"""
self.target_model.set_weights(self.model.get_weights())
def get_action(self, state):
"""The choice of action uses the epsilon-greedy policy for the current network."""
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
else:
q_value = self.model.predict(state)
return np.argmax(q_value[0])
def replay_memory(self, state, action, reward, next_state, done):
"""Save <s, a, r, s'> to replay_memory"""
if action == 2:
action = 1
self.memory.append((state, action, reward, next_state, done))
if self.epsilon > self.epsilon_min:
self.epsilon -= self.epsilon_decay
# print(len(self.memory))
def train_replay(self):
"""Random sampling of batch_size samples from replay memory"""
if len(self.memory) < self.train_start:
return
batch_size = min(self.batch_size, len(self.memory))
mini_batch = random.sample(self.memory, batch_size)
update_input = np.zeros((batch_size, self.state_size))
update_target = np.zeros((batch_size, self.action_size))
for i in range(batch_size):
state, action, reward, next_state, done = mini_batch[i]
target = self.model.predict(state)[0]
# As in queuing, it gets the maximum Q Value at s'. However, it is imported from the target model.
if done:
target[action] = reward
else:
target[action] = reward + self.discount_factor * \
np.amax(self.target_model.predict(next_state)[0])
update_input[i] = state
update_target[i] = target
# You can create a minibatch of the correct target answer and the current value of your own,
self.model.fit(update_input, update_target, batch_size=batch_size, epochs=1, verbose=0)
def load_model(self, name):
self.model.load_model(name)
def save_model(self, name):
self.model.save(name)
In [5]:
env = gym.make('MountainCar-v0')
state_size = env.observation_space.shape[0] # should be equal 2
ACTION_SIZE = 2
agent = DeepQAgent(state_size, ACTION_SIZE)
# agent.load_model("./save_model/<your_saved_model_name>")
scores, episodes = [], []
N_EPISODES = 500
/home/margarita/.local/lib/python3.5/site-packages/ipykernel_launcher.py:29: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(128, activation="linear", kernel_initializer="lecun_uniform", input_dim=2)`
/home/margarita/.local/lib/python3.5/site-packages/ipykernel_launcher.py:30: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(64, activation="relu", kernel_initializer="lecun_uniform")`
/home/margarita/.local/lib/python3.5/site-packages/ipykernel_launcher.py:31: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(32, activation="relu", kernel_initializer="lecun_uniform")`
/home/margarita/.local/lib/python3.5/site-packages/ipykernel_launcher.py:32: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(2, activation="linear", kernel_initializer="lecun_uniform")`
/home/margarita/.local/lib/python3.5/site-packages/ipykernel_launcher.py:29: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(128, activation="linear", kernel_initializer="lecun_uniform", input_dim=2)`
In [6]:
for e in range(N_EPISODES):
done = False
score = 0
state = env.reset()
state = np.reshape(state, [1, state_size])
print(state)
# Action 0 (left), 1 (do nothing), 3 (declare fake_action to avoid doing nothing
fake_action = 0
# Counter for the same action 4 times
action_count = 0
while not done:
#if agent.render:
# env.render()
# Select an action in the current state and proceed to a step
action_count = action_count + 1
if action_count == 4:
action = agent.get_action(state)
action_count = 0
if action == 0:
fake_action = 0
elif action == 1:
fake_action = 2
# Take 1 step with the selected action
next_state, reward, done, info = env.step(fake_action)
next_state = np.reshape(next_state, [1, state_size])
# Give a penalty of -100 for actions that end an episode
# reward = reward if not done else -100
# Save <s, a, r, s'> to replay memory
agent.replay_memory(state, fake_action, reward, next_state, done)
# Continue to learn every time step
agent.train_replay()
score += reward
state = next_state
if done:
env.reset()
# Copy the learning model for each episode to the target model
agent.update_target_model()
# For each episode, the time step where cartpole stood is plot
scores.append(score)
episodes.append(e)
print("episode:", e, " score:", score, " memory length:", len(agent.memory),
" epsilon:", agent.epsilon)
# Save model for every 50 episodes
if e % 50 == 0:
agent.save_model("save_model/my_model")
[[-0.53583677 0. ]]
episode: 0 score: -200.0 memory length: 200 epsilon: 0.9960200000000077
[[-0.51427713 0. ]]
episode: 1 score: -200.0 memory length: 400 epsilon: 0.9920400000000154
[[-0.49273763 0. ]]
episode: 2 score: -200.0 memory length: 600 epsilon: 0.988060000000023
[[-0.51529317 0. ]]
episode: 3 score: -200.0 memory length: 800 epsilon: 0.9840800000000307
[[-0.40901322 0. ]]
episode: 4 score: -200.0 memory length: 1000 epsilon: 0.9801000000000384
[[-0.54091158 0. ]]
episode: 5 score: -200.0 memory length: 1200 epsilon: 0.9761200000000461
[[-0.54000392 0. ]]
episode: 6 score: -200.0 memory length: 1400 epsilon: 0.9721400000000537
[[-0.57237521 0. ]]
episode: 7 score: -200.0 memory length: 1600 epsilon: 0.9681600000000614
[[-0.41698564 0. ]]
episode: 8 score: -200.0 memory length: 1800 epsilon: 0.9641800000000691
[[-0.47841907 0. ]]
episode: 9 score: -200.0 memory length: 2000 epsilon: 0.9602000000000768
[[-0.48101037 0. ]]
episode: 10 score: -200.0 memory length: 2200 epsilon: 0.9562200000000844
[[-0.4009593 0. ]]
episode: 11 score: -200.0 memory length: 2400 epsilon: 0.9522400000000921
[[-0.54627004 0. ]]
episode: 12 score: -200.0 memory length: 2600 epsilon: 0.9482600000000998
[[-0.57592189 0. ]]
episode: 13 score: -200.0 memory length: 2800 epsilon: 0.9442800000001075
[[-0.55303378 0. ]]
episode: 14 score: -200.0 memory length: 3000 epsilon: 0.9403000000001152
[[-0.51921346 0. ]]
episode: 15 score: -200.0 memory length: 3200 epsilon: 0.9363200000001228
[[-0.58209287 0. ]]
episode: 16 score: -200.0 memory length: 3400 epsilon: 0.9323400000001305
[[-0.51444356 0. ]]
episode: 17 score: -200.0 memory length: 3600 epsilon: 0.9283600000001382
[[-0.48856965 0. ]]
episode: 18 score: -200.0 memory length: 3800 epsilon: 0.9243800000001459
[[-0.52592939 0. ]]
episode: 19 score: -200.0 memory length: 4000 epsilon: 0.9204000000001535
[[-0.40226442 0. ]]
episode: 20 score: -200.0 memory length: 4200 epsilon: 0.9164200000001612
[[-0.56113105 0. ]]
episode: 21 score: -200.0 memory length: 4400 epsilon: 0.9124400000001689
[[-0.41590624 0. ]]
episode: 22 score: -200.0 memory length: 4600 epsilon: 0.9084600000001766
[[-0.58668832 0. ]]
episode: 23 score: -200.0 memory length: 4800 epsilon: 0.9044800000001842
[[-0.57197046 0. ]]
episode: 24 score: -200.0 memory length: 5000 epsilon: 0.9005000000001919
[[-0.58796145 0. ]]
episode: 25 score: -200.0 memory length: 5200 epsilon: 0.8965200000001996
[[-0.54007079 0. ]]
episode: 26 score: -200.0 memory length: 5400 epsilon: 0.8925400000002073
[[-0.55405352 0. ]]
episode: 27 score: -200.0 memory length: 5600 epsilon: 0.888560000000215
[[-0.45043119 0. ]]
episode: 28 score: -200.0 memory length: 5800 epsilon: 0.8845800000002226
[[-0.41990999 0. ]]
episode: 29 score: -200.0 memory length: 6000 epsilon: 0.8806000000002303
[[-0.43730989 0. ]]
episode: 30 score: -200.0 memory length: 6200 epsilon: 0.876620000000238
[[-0.50100287 0. ]]
episode: 31 score: -200.0 memory length: 6400 epsilon: 0.8726400000002457
[[-0.49338285 0. ]]
episode: 32 score: -200.0 memory length: 6600 epsilon: 0.8686600000002533
[[-0.4032447 0. ]]
episode: 33 score: -200.0 memory length: 6800 epsilon: 0.864680000000261
[[-0.44586296 0. ]]
episode: 34 score: -200.0 memory length: 7000 epsilon: 0.8607000000002687
[[-0.59018498 0. ]]
episode: 35 score: -200.0 memory length: 7200 epsilon: 0.8567200000002764
[[-0.49603244 0. ]]
episode: 36 score: -200.0 memory length: 7400 epsilon: 0.852740000000284
[[-0.54446517 0. ]]
episode: 37 score: -200.0 memory length: 7600 epsilon: 0.8487600000002917
[[-0.52823535 0. ]]
episode: 38 score: -200.0 memory length: 7800 epsilon: 0.8447800000002994
[[-0.51298533 0. ]]
episode: 39 score: -200.0 memory length: 8000 epsilon: 0.8408000000003071
[[-0.41050116 0. ]]
episode: 40 score: -200.0 memory length: 8200 epsilon: 0.8368200000003148
[[-0.42597386 0. ]]
episode: 41 score: -200.0 memory length: 8400 epsilon: 0.8328400000003224
[[-0.55355716 0. ]]
episode: 42 score: -200.0 memory length: 8600 epsilon: 0.8288600000003301
[[-0.56754626 0. ]]
episode: 43 score: -200.0 memory length: 8800 epsilon: 0.8248800000003378
[[-0.50842845 0. ]]
episode: 44 score: -200.0 memory length: 9000 epsilon: 0.8209000000003455
[[-0.55137717 0. ]]
episode: 45 score: -200.0 memory length: 9200 epsilon: 0.8169200000003531
[[-0.53398167 0. ]]
episode: 46 score: -200.0 memory length: 9400 epsilon: 0.8129400000003608
[[-0.40767622 0. ]]
episode: 47 score: -200.0 memory length: 9600 epsilon: 0.8089600000003685
[[-0.48439841 0. ]]
episode: 48 score: -200.0 memory length: 9800 epsilon: 0.8049800000003762
[[-0.47099825 0. ]]
episode: 49 score: -200.0 memory length: 10000 epsilon: 0.8010000000003838
[[-0.57954626 0. ]]
episode: 50 score: -200.0 memory length: 10000 epsilon: 0.7970200000003915
[[-0.49926608 0. ]]
episode: 51 score: -200.0 memory length: 10000 epsilon: 0.7930400000003992
[[-0.49412279 0. ]]
episode: 52 score: -200.0 memory length: 10000 epsilon: 0.7890600000004069
[[-0.53672628 0. ]]
episode: 53 score: -200.0 memory length: 10000 epsilon: 0.7850800000004146
[[-0.43159426 0. ]]
episode: 54 score: -200.0 memory length: 10000 epsilon: 0.7811000000004222
[[-0.54554478 0. ]]
episode: 55 score: -200.0 memory length: 10000 epsilon: 0.7771200000004299
[[-0.45349228 0. ]]
episode: 56 score: -200.0 memory length: 10000 epsilon: 0.7731400000004376
[[-0.49099491 0. ]]
episode: 57 score: -200.0 memory length: 10000 epsilon: 0.7691600000004453
[[-0.57888741 0. ]]
episode: 58 score: -200.0 memory length: 10000 epsilon: 0.7651800000004529
[[-0.44762946 0. ]]
episode: 59 score: -200.0 memory length: 10000 epsilon: 0.7612000000004606
[[-0.48877814 0. ]]
episode: 60 score: -200.0 memory length: 10000 epsilon: 0.7572200000004683
[[-0.43510342 0. ]]
episode: 61 score: -200.0 memory length: 10000 epsilon: 0.753240000000476
[[-0.52402513 0. ]]
episode: 62 score: -200.0 memory length: 10000 epsilon: 0.7492600000004837
[[-0.47118868 0. ]]
episode: 63 score: -200.0 memory length: 10000 epsilon: 0.7452800000004913
[[-0.56399676 0. ]]
episode: 64 score: -200.0 memory length: 10000 epsilon: 0.741300000000499
[[-0.42158939 0. ]]
episode: 65 score: -200.0 memory length: 10000 epsilon: 0.7373200000005067
[[-0.47666474 0. ]]
episode: 66 score: -200.0 memory length: 10000 epsilon: 0.7333400000005144
[[-0.4025322 0. ]]
episode: 67 score: -200.0 memory length: 10000 epsilon: 0.729360000000522
[[-0.40186572 0. ]]
episode: 68 score: -200.0 memory length: 10000 epsilon: 0.7253800000005297
[[-0.44114639 0. ]]
episode: 69 score: -200.0 memory length: 10000 epsilon: 0.7214000000005374
[[-0.49575583 0. ]]
episode: 70 score: -200.0 memory length: 10000 epsilon: 0.7174200000005451
[[-0.55105772 0. ]]
episode: 71 score: -200.0 memory length: 10000 epsilon: 0.7134400000005527
[[-0.55503067 0. ]]
episode: 72 score: -200.0 memory length: 10000 epsilon: 0.7094600000005604
[[-0.52340394 0. ]]
episode: 73 score: -200.0 memory length: 10000 epsilon: 0.7054800000005681
[[-0.40468782 0. ]]
episode: 74 score: -200.0 memory length: 10000 epsilon: 0.7015000000005758
[[-0.51053962 0. ]]
episode: 75 score: -200.0 memory length: 10000 epsilon: 0.6975200000005835
[[-0.48131336 0. ]]
episode: 76 score: -200.0 memory length: 10000 epsilon: 0.6935400000005911
[[-0.48214496 0. ]]
episode: 77 score: -200.0 memory length: 10000 epsilon: 0.6895600000005988
[[-0.49975231 0. ]]
episode: 78 score: -200.0 memory length: 10000 epsilon: 0.6855800000006065
[[-0.59734485 0. ]]
episode: 79 score: -200.0 memory length: 10000 epsilon: 0.6816000000006142
[[-0.40969727 0. ]]
episode: 80 score: -200.0 memory length: 10000 epsilon: 0.6776200000006218
[[-0.54055586 0. ]]
episode: 81 score: -200.0 memory length: 10000 epsilon: 0.6736400000006295
[[-0.41712782 0. ]]
episode: 82 score: -200.0 memory length: 10000 epsilon: 0.6696600000006372
[[-0.56951423 0. ]]
episode: 83 score: -200.0 memory length: 10000 epsilon: 0.6656800000006449
[[-0.4025934 0. ]]
episode: 84 score: -200.0 memory length: 10000 epsilon: 0.6617000000006525
[[-0.51069655 0. ]]
episode: 85 score: -200.0 memory length: 10000 epsilon: 0.6577200000006602
[[-0.52388745 0. ]]
episode: 86 score: -200.0 memory length: 10000 epsilon: 0.6537400000006679
[[-0.44822788 0. ]]
episode: 87 score: -200.0 memory length: 10000 epsilon: 0.6497600000006756
[[-0.55029183 0. ]]
episode: 88 score: -200.0 memory length: 10000 epsilon: 0.6457800000006833
[[-0.44705405 0. ]]
episode: 89 score: -200.0 memory length: 10000 epsilon: 0.6418000000006909
[[-0.47693751 0. ]]
episode: 90 score: -200.0 memory length: 10000 epsilon: 0.6378200000006986
[[-0.49278408 0. ]]
episode: 91 score: -200.0 memory length: 10000 epsilon: 0.6338400000007063
[[-0.56078766 0. ]]
episode: 92 score: -200.0 memory length: 10000 epsilon: 0.629860000000714
[[-0.57611755 0. ]]
episode: 93 score: -200.0 memory length: 10000 epsilon: 0.6258800000007216
[[-0.54468901 0. ]]
episode: 94 score: -200.0 memory length: 10000 epsilon: 0.6219000000007293
[[-0.52738988 0. ]]
episode: 95 score: -200.0 memory length: 10000 epsilon: 0.617920000000737
[[-0.46019646 0. ]]
episode: 96 score: -200.0 memory length: 10000 epsilon: 0.6139400000007447
[[-0.54592069 0. ]]
episode: 97 score: -200.0 memory length: 10000 epsilon: 0.6099600000007523
[[-0.49624867 0. ]]
episode: 98 score: -200.0 memory length: 10000 epsilon: 0.60598000000076
[[-0.44538826 0. ]]
episode: 99 score: -200.0 memory length: 10000 epsilon: 0.6020000000007677
[[-0.57243476 0. ]]
episode: 100 score: -200.0 memory length: 10000 epsilon: 0.5980200000007754
[[-0.47713665 0. ]]
episode: 101 score: -200.0 memory length: 10000 epsilon: 0.594040000000783
[[-0.43696973 0. ]]
episode: 102 score: -200.0 memory length: 10000 epsilon: 0.5900600000007907
[[-0.51822516 0. ]]
episode: 103 score: -200.0 memory length: 10000 epsilon: 0.5860800000007984
[[-0.57757928 0. ]]
episode: 104 score: -200.0 memory length: 10000 epsilon: 0.5821000000008061
[[-0.50640183 0. ]]
episode: 105 score: -200.0 memory length: 10000 epsilon: 0.5781200000008138
[[-0.55401746 0. ]]
episode: 106 score: -200.0 memory length: 10000 epsilon: 0.5741400000008214
[[-0.56007237 0. ]]
episode: 107 score: -200.0 memory length: 10000 epsilon: 0.5701600000008291
[[-0.46892224 0. ]]
episode: 108 score: -200.0 memory length: 10000 epsilon: 0.5661800000008368
[[-0.52506473 0. ]]
episode: 109 score: -200.0 memory length: 10000 epsilon: 0.5622000000008445
[[-0.5427446 0. ]]
episode: 110 score: -200.0 memory length: 10000 epsilon: 0.5582200000008521
[[-0.48940847 0. ]]
episode: 111 score: -200.0 memory length: 10000 epsilon: 0.5542400000008598
[[-0.59065694 0. ]]
episode: 112 score: -200.0 memory length: 10000 epsilon: 0.5502600000008675
[[-0.47420754 0. ]]
episode: 113 score: -200.0 memory length: 10000 epsilon: 0.5462800000008752
[[-0.44506599 0. ]]
episode: 114 score: -200.0 memory length: 10000 epsilon: 0.5423000000008829
[[-0.49423618 0. ]]
episode: 115 score: -200.0 memory length: 10000 epsilon: 0.5383200000008905
[[-0.58124739 0. ]]
episode: 116 score: -200.0 memory length: 10000 epsilon: 0.5343400000008982
[[-0.50255614 0. ]]
episode: 117 score: -200.0 memory length: 10000 epsilon: 0.5303600000009059
[[-0.56708248 0. ]]
episode: 118 score: -200.0 memory length: 10000 epsilon: 0.5263800000009136
[[-0.52079585 0. ]]
episode: 119 score: -200.0 memory length: 10000 epsilon: 0.5224000000009212
[[-0.55922569 0. ]]
episode: 120 score: -200.0 memory length: 10000 epsilon: 0.5184200000009289
[[-0.46360373 0. ]]
episode: 121 score: -200.0 memory length: 10000 epsilon: 0.5144400000009366
[[-0.52980339 0. ]]
episode: 122 score: -200.0 memory length: 10000 epsilon: 0.5104600000009443
[[-0.41984305 0. ]]
episode: 123 score: -163.0 memory length: 10000 epsilon: 0.5072163000009505
[[-0.48018849 0. ]]
episode: 124 score: -200.0 memory length: 10000 epsilon: 0.5032363000009582
[[-0.4248494 0. ]]
episode: 125 score: -200.0 memory length: 10000 epsilon: 0.49925630000096377
[[-0.59341968 0. ]]
episode: 126 score: -200.0 memory length: 10000 epsilon: 0.49527630000096035
[[-0.4542485 0. ]]
episode: 127 score: -200.0 memory length: 10000 epsilon: 0.4912963000009569
[[-0.59104737 0. ]]
episode: 128 score: -200.0 memory length: 10000 epsilon: 0.4873163000009535
[[-0.54778854 0. ]]
episode: 129 score: -200.0 memory length: 10000 epsilon: 0.48333630000095007
[[-0.43671273 0. ]]
episode: 130 score: -200.0 memory length: 10000 epsilon: 0.47935630000094664
[[-0.46647181 0. ]]
episode: 131 score: -200.0 memory length: 10000 epsilon: 0.4753763000009432
[[-0.42261082 0. ]]
episode: 132 score: -200.0 memory length: 10000 epsilon: 0.4713963000009398
[[-0.50903525 0. ]]
episode: 133 score: -200.0 memory length: 10000 epsilon: 0.46741630000093637
[[-0.51191011 0. ]]
episode: 134 score: -200.0 memory length: 10000 epsilon: 0.46343630000093294
[[-0.49379035 0. ]]
episode: 135 score: -200.0 memory length: 10000 epsilon: 0.4594563000009295
[[-0.49663071 0. ]]
episode: 136 score: -200.0 memory length: 10000 epsilon: 0.4554763000009261
[[-0.47029481 0. ]]
episode: 137 score: -200.0 memory length: 10000 epsilon: 0.45149630000092267
[[-0.54383056 0. ]]
episode: 138 score: -200.0 memory length: 10000 epsilon: 0.44751630000091924
[[-0.41537694 0. ]]
episode: 139 score: -200.0 memory length: 10000 epsilon: 0.4435363000009158
[[-0.51539076 0. ]]
episode: 140 score: -200.0 memory length: 10000 epsilon: 0.4395563000009124
[[-0.44947841 0. ]]
episode: 141 score: -200.0 memory length: 10000 epsilon: 0.43557630000090897
[[-0.43729748 0. ]]
episode: 142 score: -200.0 memory length: 10000 epsilon: 0.43159630000090554
[[-0.55028236 0. ]]
episode: 143 score: -200.0 memory length: 10000 epsilon: 0.4276163000009021
[[-0.58004906 0. ]]
episode: 144 score: -200.0 memory length: 10000 epsilon: 0.4236363000008987
[[-0.43187533 0. ]]
episode: 145 score: -200.0 memory length: 10000 epsilon: 0.41965630000089527
[[-0.5219291 0. ]]
episode: 146 score: -200.0 memory length: 10000 epsilon: 0.41567630000089184
[[-0.48659875 0. ]]
episode: 147 score: -200.0 memory length: 10000 epsilon: 0.4116963000008884
[[-0.4143881 0. ]]
episode: 148 score: -200.0 memory length: 10000 epsilon: 0.407716300000885
[[-0.57761872 0. ]]
episode: 149 score: -200.0 memory length: 10000 epsilon: 0.40373630000088156
[[-0.58228779 0. ]]
episode: 150 score: -200.0 memory length: 10000 epsilon: 0.39975630000087814
[[-0.42523175 0. ]]
episode: 151 score: -158.0 memory length: 10000 epsilon: 0.39661210000087543
[[-0.47001968 0. ]]
episode: 152 score: -161.0 memory length: 10000 epsilon: 0.3934082000008727
[[-0.46853772 0. ]]
episode: 153 score: -200.0 memory length: 10000 epsilon: 0.38942820000086925
[[-0.45892435 0. ]]
episode: 154 score: -200.0 memory length: 10000 epsilon: 0.3854482000008658
[[-0.52258008 0. ]]
episode: 155 score: -200.0 memory length: 10000 epsilon: 0.3814682000008624
[[-0.56408358 0. ]]
episode: 156 score: -200.0 memory length: 10000 epsilon: 0.377488200000859
[[-0.52447452 0. ]]
episode: 157 score: -200.0 memory length: 10000 epsilon: 0.37350820000085555
[[-0.41945954 0. ]]
episode: 158 score: -200.0 memory length: 10000 epsilon: 0.3695282000008521
[[-0.46652422 0. ]]
episode: 159 score: -200.0 memory length: 10000 epsilon: 0.3655482000008487
[[-0.52585353 0. ]]
episode: 160 score: -200.0 memory length: 10000 epsilon: 0.3615682000008453
[[-0.43531502 0. ]]
episode: 161 score: -200.0 memory length: 10000 epsilon: 0.35758820000084185
[[-0.5895076 0. ]]
episode: 162 score: -178.0 memory length: 10000 epsilon: 0.3540460000008388
[[-0.54971828 0. ]]
episode: 163 score: -200.0 memory length: 10000 epsilon: 0.3500660000008354
[[-0.58446289 0. ]]
episode: 164 score: -148.0 memory length: 10000 epsilon: 0.34712080000083284
[[-0.41082584 0. ]]
episode: 165 score: -195.0 memory length: 10000 epsilon: 0.3432403000008295
[[-0.56113161 0. ]]
episode: 166 score: -200.0 memory length: 10000 epsilon: 0.3392603000008261
[[-0.55972957 0. ]]
episode: 167 score: -178.0 memory length: 10000 epsilon: 0.335718100000823
[[-0.50873074 0. ]]
episode: 168 score: -200.0 memory length: 10000 epsilon: 0.3317381000008196
[[-0.5234839 0. ]]
episode: 169 score: -151.0 memory length: 10000 epsilon: 0.328733200000817
[[-0.52122228 0. ]]
episode: 170 score: -176.0 memory length: 10000 epsilon: 0.325230800000814
[[-0.48051585 0. ]]
episode: 171 score: -155.0 memory length: 10000 epsilon: 0.32214630000081135
[[-0.43870798 0. ]]
episode: 172 score: -161.0 memory length: 10000 epsilon: 0.3189424000008086
[[-0.48515461 0. ]]
episode: 173 score: -200.0 memory length: 10000 epsilon: 0.31496240000080516
[[-0.43877229 0. ]]
episode: 174 score: -173.0 memory length: 10000 epsilon: 0.3115197000008022
[[-0.47759404 0. ]]
episode: 175 score: -200.0 memory length: 10000 epsilon: 0.3075397000007988
[[-0.52161798 0. ]]
episode: 176 score: -158.0 memory length: 10000 epsilon: 0.30439550000079607
[[-0.43560485 0. ]]
episode: 177 score: -169.0 memory length: 10000 epsilon: 0.3010324000007932
[[-0.59466904 0. ]]
episode: 178 score: -200.0 memory length: 10000 epsilon: 0.29705240000078975
[[-0.56231917 0. ]]
episode: 179 score: -123.0 memory length: 10000 epsilon: 0.29460470000078764
[[-0.55216933 0. ]]
episode: 180 score: -146.0 memory length: 10000 epsilon: 0.29169930000078514
[[-0.58449613 0. ]]
episode: 181 score: -142.0 memory length: 10000 epsilon: 0.2888735000007827
[[-0.47130237 0. ]]
episode: 182 score: -200.0 memory length: 10000 epsilon: 0.2848935000007793
[[-0.56965522 0. ]]
episode: 183 score: -142.0 memory length: 10000 epsilon: 0.28206770000077686
[[-0.43775627 0. ]]
episode: 184 score: -200.0 memory length: 10000 epsilon: 0.27808770000077343
[[-0.50058993 0. ]]
episode: 185 score: -184.0 memory length: 10000 epsilon: 0.2744261000007703
[[-0.54816653 0. ]]
episode: 186 score: -158.0 memory length: 10000 epsilon: 0.2712819000007676
[[-0.48020591 0. ]]
episode: 187 score: -153.0 memory length: 10000 epsilon: 0.26823720000076495
[[-0.45713646 0. ]]
episode: 188 score: -200.0 memory length: 10000 epsilon: 0.2642572000007615
[[-0.40749228 0. ]]
episode: 189 score: -160.0 memory length: 10000 epsilon: 0.2610732000007588
[[-0.42856741 0. ]]
episode: 190 score: -200.0 memory length: 10000 epsilon: 0.25709320000075536
[[-0.57535217 0. ]]
episode: 191 score: -144.0 memory length: 10000 epsilon: 0.2542276000007529
[[-0.51040831 0. ]]
episode: 192 score: -146.0 memory length: 10000 epsilon: 0.2513222000007504
[[-0.57475393 0. ]]
episode: 193 score: -150.0 memory length: 10000 epsilon: 0.24833720000075016
[[-0.50223638 0. ]]
episode: 194 score: -148.0 memory length: 10000 epsilon: 0.24539200000075173
[[-0.53262211 0. ]]
episode: 195 score: -159.0 memory length: 10000 epsilon: 0.24222790000075342
[[-0.56161893 0. ]]
episode: 196 score: -169.0 memory length: 10000 epsilon: 0.23886480000075522
[[-0.42170116 0. ]]
episode: 197 score: -160.0 memory length: 10000 epsilon: 0.23568080000075692
[[-0.57719812 0. ]]
episode: 198 score: -164.0 memory length: 10000 epsilon: 0.23241720000075866
[[-0.54577766 0. ]]
episode: 199 score: -171.0 memory length: 10000 epsilon: 0.22901430000076048
[[-0.42297623 0. ]]
episode: 200 score: -156.0 memory length: 10000 epsilon: 0.22590990000076214
[[-0.40603717 0. ]]
episode: 201 score: -160.0 memory length: 10000 epsilon: 0.22272590000076384
[[-0.41931218 0. ]]
episode: 202 score: -172.0 memory length: 10000 epsilon: 0.21930310000076567
[[-0.56553196 0. ]]
episode: 203 score: -200.0 memory length: 10000 epsilon: 0.2153231000007678
[[-0.43784997 0. ]]
episode: 204 score: -145.0 memory length: 10000 epsilon: 0.21243760000076933
[[-0.49978158 0. ]]
episode: 205 score: -157.0 memory length: 10000 epsilon: 0.209313300000771
[[-0.57706619 0. ]]
episode: 206 score: -156.0 memory length: 10000 epsilon: 0.20620890000077266
[[-0.53738194 0. ]]
episode: 207 score: -147.0 memory length: 10000 epsilon: 0.20328360000077422
[[-0.56905515 0. ]]
episode: 208 score: -153.0 memory length: 10000 epsilon: 0.20023890000077585
[[-0.55252216 0. ]]
episode: 209 score: -161.0 memory length: 10000 epsilon: 0.19703500000077756
[[-0.56202566 0. ]]
episode: 210 score: -200.0 memory length: 10000 epsilon: 0.1930550000007797
[[-0.4230178 0. ]]
episode: 211 score: -90.0 memory length: 10000 epsilon: 0.19126400000078064
[[-0.53858058 0. ]]
episode: 212 score: -154.0 memory length: 10000 epsilon: 0.18819940000078228
[[-0.57262015 0. ]]
episode: 213 score: -185.0 memory length: 10000 epsilon: 0.18451790000078425
[[-0.42122182 0. ]]
episode: 214 score: -186.0 memory length: 10000 epsilon: 0.18081650000078622
[[-0.42174627 0. ]]
episode: 215 score: -154.0 memory length: 10000 epsilon: 0.17775190000078786
[[-0.45490762 0. ]]
episode: 216 score: -173.0 memory length: 10000 epsilon: 0.1743092000007897
[[-0.40830367 0. ]]
episode: 217 score: -98.0 memory length: 10000 epsilon: 0.17235900000079074
[[-0.58589035 0. ]]
episode: 218 score: -200.0 memory length: 10000 epsilon: 0.16837900000079287
[[-0.4268946 0. ]]
episode: 219 score: -87.0 memory length: 10000 epsilon: 0.1666477000007938
[[-0.53504626 0. ]]
episode: 220 score: -159.0 memory length: 10000 epsilon: 0.16348360000079548
[[-0.40699699 0. ]]
episode: 221 score: -158.0 memory length: 10000 epsilon: 0.16033940000079716
[[-0.50566597 0. ]]
episode: 222 score: -165.0 memory length: 10000 epsilon: 0.15705590000079891
[[-0.5925687 0. ]]
episode: 223 score: -158.0 memory length: 10000 epsilon: 0.1539117000008006
[[-0.41324602 0. ]]
episode: 224 score: -160.0 memory length: 10000 epsilon: 0.1507277000008023
[[-0.58039285 0. ]]
episode: 225 score: -158.0 memory length: 10000 epsilon: 0.14758350000080397
[[-0.42648443 0. ]]
episode: 226 score: -87.0 memory length: 10000 epsilon: 0.1458522000008049
[[-0.50708814 0. ]]
episode: 227 score: -149.0 memory length: 10000 epsilon: 0.14288710000080648
[[-0.41249059 0. ]]
episode: 228 score: -84.0 memory length: 10000 epsilon: 0.14121550000080738
[[-0.42028637 0. ]]
episode: 229 score: -93.0 memory length: 10000 epsilon: 0.13936480000080836
[[-0.43533363 0. ]]
episode: 230 score: -89.0 memory length: 10000 epsilon: 0.1375937000008093
[[-0.4360518 0. ]]
episode: 231 score: -159.0 memory length: 10000 epsilon: 0.134429600000811
[[-0.56222776 0. ]]
episode: 232 score: -173.0 memory length: 10000 epsilon: 0.13098690000081284
[[-0.52297067 0. ]]
episode: 233 score: -161.0 memory length: 10000 epsilon: 0.12778300000081455
[[-0.40397583 0. ]]
episode: 234 score: -91.0 memory length: 10000 epsilon: 0.12597210000081552
[[-0.56797411 0. ]]
episode: 235 score: -137.0 memory length: 10000 epsilon: 0.12324580000081574
[[-0.42016342 0. ]]
episode: 236 score: -85.0 memory length: 10000 epsilon: 0.12155430000081546
[[-0.47226652 0. ]]
episode: 237 score: -176.0 memory length: 10000 epsilon: 0.11805190000081489
[[-0.51257929 0. ]]
episode: 238 score: -200.0 memory length: 10000 epsilon: 0.11407190000081424
[[-0.47264555 0. ]]
episode: 239 score: -200.0 memory length: 10000 epsilon: 0.11009190000081359
[[-0.50994982 0. ]]
episode: 240 score: -200.0 memory length: 10000 epsilon: 0.10611190000081294
[[-0.40089041 0. ]]
episode: 241 score: -164.0 memory length: 10000 epsilon: 0.10284830000081241
[[-0.5026515 0. ]]
episode: 242 score: -155.0 memory length: 10000 epsilon: 0.0997638000008119
[[-0.58479386 0. ]]
episode: 243 score: -200.0 memory length: 10000 epsilon: 0.09578380000081126
[[-0.4312842 0. ]]
episode: 244 score: -155.0 memory length: 10000 epsilon: 0.09269930000081075
[[-0.46443047 0. ]]
episode: 245 score: -157.0 memory length: 10000 epsilon: 0.08957500000081024
[[-0.56783193 0. ]]
episode: 246 score: -170.0 memory length: 10000 epsilon: 0.08619200000080969
[[-0.58014049 0. ]]
episode: 247 score: -162.0 memory length: 10000 epsilon: 0.08296820000080916
[[-0.59985676 0. ]]
episode: 248 score: -168.0 memory length: 10000 epsilon: 0.07962500000080862
[[-0.43380759 0. ]]
episode: 249 score: -87.0 memory length: 10000 epsilon: 0.07789370000080834
[[-0.58061307 0. ]]
episode: 250 score: -114.0 memory length: 10000 epsilon: 0.07562510000080797
[[-0.4985184 0. ]]
episode: 251 score: -150.0 memory length: 10000 epsilon: 0.07264010000080748
[[-0.55453915 0. ]]
episode: 252 score: -153.0 memory length: 10000 epsilon: 0.06959540000080698
[[-0.46036644 0. ]]
episode: 253 score: -94.0 memory length: 10000 epsilon: 0.06772480000080668
[[-0.41446941 0. ]]
episode: 254 score: -184.0 memory length: 10000 epsilon: 0.06406320000080608
[[-0.59681357 0. ]]
episode: 255 score: -138.0 memory length: 10000 epsilon: 0.06131700000080563
[[-0.5496289 0. ]]
episode: 256 score: -142.0 memory length: 10000 epsilon: 0.05849120000080517
[[-0.50045515 0. ]]
episode: 257 score: -156.0 memory length: 10000 epsilon: 0.05538680000080466
[[-0.56675928 0. ]]
episode: 258 score: -200.0 memory length: 10000 epsilon: 0.05140680000080401
[[-0.57558011 0. ]]
episode: 259 score: -145.0 memory length: 10000 epsilon: 0.04852130000080354
[[-0.47796024 0. ]]
episode: 260 score: -122.0 memory length: 10000 epsilon: 0.046093500000803145
[[-0.59081463 0. ]]
episode: 261 score: -200.0 memory length: 10000 epsilon: 0.042113500000802495
[[-0.55002963 0. ]]
episode: 262 score: -110.0 memory length: 10000 epsilon: 0.03992450000080214
[[-0.52043081 0. ]]
episode: 263 score: -121.0 memory length: 10000 epsilon: 0.037516600000801745
[[-0.59769522 0. ]]
episode: 264 score: -110.0 memory length: 10000 epsilon: 0.03532760000080139
[[-0.51437978 0. ]]
episode: 265 score: -110.0 memory length: 10000 epsilon: 0.03313860000080103
[[-0.44893587 0. ]]
episode: 266 score: -146.0 memory length: 10000 epsilon: 0.030233200000800736
[[-0.51685797 0. ]]
episode: 267 score: -160.0 memory length: 10000 epsilon: 0.02704920000080077
[[-0.47462 0. ]]
episode: 268 score: -104.0 memory length: 10000 epsilon: 0.024979600000800795
[[-0.5019885 0. ]]
episode: 269 score: -110.0 memory length: 10000 epsilon: 0.02279060000080082
[[-0.47305762 0. ]]
episode: 270 score: -158.0 memory length: 10000 epsilon: 0.019646400000800854
[[-0.51073562 0. ]]
episode: 271 score: -133.0 memory length: 10000 epsilon: 0.016999700000800883
[[-0.4923762 0. ]]
episode: 272 score: -137.0 memory length: 10000 epsilon: 0.014273400000800913
[[-0.55797097 0. ]]
episode: 273 score: -127.0 memory length: 10000 epsilon: 0.011746100000800942
[[-0.59900498 0. ]]
episode: 274 score: -127.0 memory length: 10000 epsilon: 0.00921880000080097
[[-0.58388244 0. ]]
episode: 275 score: -127.0 memory length: 10000 epsilon: 0.006691500000800998
[[-0.59368756 0. ]]
episode: 276 score: -124.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57436173 0. ]]
episode: 277 score: -127.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.56023349 0. ]]
episode: 278 score: -127.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.45971983 0. ]]
episode: 279 score: -158.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.56133058 0. ]]
episode: 280 score: -124.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.58737453 0. ]]
episode: 281 score: -120.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.58664132 0. ]]
episode: 282 score: -126.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.50129781 0. ]]
episode: 283 score: -127.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42404278 0. ]]
episode: 284 score: -87.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.56099323 0. ]]
episode: 285 score: -118.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57068995 0. ]]
episode: 286 score: -129.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.56549358 0. ]]
episode: 287 score: -121.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53296228 0. ]]
episode: 288 score: -147.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.48675349 0. ]]
episode: 289 score: -123.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57565089 0. ]]
episode: 290 score: -122.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57154522 0. ]]
episode: 291 score: -128.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.58121677 0. ]]
episode: 292 score: -124.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.40198841 0. ]]
episode: 293 score: -83.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53425651 0. ]]
episode: 294 score: -161.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.56722541 0. ]]
episode: 295 score: -109.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.52507152 0. ]]
episode: 296 score: -164.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53589507 0. ]]
episode: 297 score: -159.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.55385166 0. ]]
episode: 298 score: -110.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.54766998 0. ]]
episode: 299 score: -153.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46448562 0. ]]
episode: 300 score: -117.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.47004932 0. ]]
episode: 301 score: -104.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.40057286 0. ]]
episode: 302 score: -92.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.49845152 0. ]]
episode: 303 score: -113.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.47363956 0. ]]
episode: 304 score: -108.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53906473 0. ]]
episode: 305 score: -160.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44759462 0. ]]
episode: 306 score: -91.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.40253145 0. ]]
episode: 307 score: -83.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.40896943 0. ]]
episode: 308 score: -90.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42799413 0. ]]
episode: 309 score: -88.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44247343 0. ]]
episode: 310 score: -90.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.4087797 0. ]]
episode: 311 score: -85.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.55036395 0. ]]
episode: 312 score: -159.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.58533419 0. ]]
episode: 313 score: -140.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46214596 0. ]]
episode: 314 score: -100.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46642114 0. ]]
episode: 315 score: -97.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.47382992 0. ]]
episode: 316 score: -101.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44531244 0. ]]
episode: 317 score: -91.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.5324469 0. ]]
episode: 318 score: -145.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42072736 0. ]]
episode: 319 score: -87.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.54400415 0. ]]
episode: 320 score: -120.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.55189661 0. ]]
episode: 321 score: -125.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.47664529 0. ]]
episode: 322 score: -148.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.51036835 0. ]]
episode: 323 score: -117.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.4128931 0. ]]
episode: 324 score: -85.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.4829788 0. ]]
episode: 325 score: -120.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.51114155 0. ]]
episode: 326 score: -124.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42746789 0. ]]
episode: 327 score: -161.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53875084 0. ]]
episode: 328 score: -122.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.50030003 0. ]]
episode: 329 score: -152.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.4083966 0. ]]
episode: 330 score: -89.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46417906 0. ]]
episode: 331 score: -135.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.41793079 0. ]]
episode: 332 score: -86.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.45696644 0. ]]
episode: 333 score: -156.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42937115 0. ]]
episode: 334 score: -138.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42977664 0. ]]
episode: 335 score: -135.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.56510376 0. ]]
episode: 336 score: -119.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.41424411 0. ]]
episode: 337 score: -84.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44434882 0. ]]
episode: 338 score: -140.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.56055849 0. ]]
episode: 339 score: -121.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.58669943 0. ]]
episode: 340 score: -123.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44344659 0. ]]
episode: 341 score: -134.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42817683 0. ]]
episode: 342 score: -133.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.59480564 0. ]]
episode: 343 score: -120.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.45535048 0. ]]
episode: 344 score: -130.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44825934 0. ]]
episode: 345 score: -133.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42711792 0. ]]
episode: 346 score: -95.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.48431452 0. ]]
episode: 347 score: -127.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.43173724 0. ]]
episode: 348 score: -96.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.56540694 0. ]]
episode: 349 score: -119.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.52898539 0. ]]
episode: 350 score: -115.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.52325385 0. ]]
episode: 351 score: -125.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.54595474 0. ]]
episode: 352 score: -122.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57178426 0. ]]
episode: 353 score: -116.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.4509011 0. ]]
episode: 354 score: -92.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.41142839 0. ]]
episode: 355 score: -84.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53808286 0. ]]
episode: 356 score: -110.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.59866881 0. ]]
episode: 357 score: -120.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42694446 0. ]]
episode: 358 score: -86.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.41204268 0. ]]
episode: 359 score: -84.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.49827709 0. ]]
episode: 360 score: -183.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.58545606 0. ]]
episode: 361 score: -117.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.54991615 0. ]]
episode: 362 score: -119.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.58452317 0. ]]
episode: 363 score: -125.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46802778 0. ]]
episode: 364 score: -98.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.54057639 0. ]]
episode: 365 score: -130.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46797903 0. ]]
episode: 366 score: -98.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.55943142 0. ]]
episode: 367 score: -151.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.49272108 0. ]]
episode: 368 score: -182.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53452328 0. ]]
episode: 369 score: -162.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.5469488 0. ]]
episode: 370 score: -131.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.49935266 0. ]]
episode: 371 score: -165.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.58894034 0. ]]
episode: 372 score: -128.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.45742762 0. ]]
episode: 373 score: -157.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.49444265 0. ]]
episode: 374 score: -177.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.58796947 0. ]]
episode: 375 score: -125.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.56671306 0. ]]
episode: 376 score: -121.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.52926234 0. ]]
episode: 377 score: -151.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.50690089 0. ]]
episode: 378 score: -160.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.40977119 0. ]]
episode: 379 score: -88.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42420488 0. ]]
episode: 380 score: -87.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57502006 0. ]]
episode: 381 score: -121.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44749625 0. ]]
episode: 382 score: -90.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.55280011 0. ]]
episode: 383 score: -116.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.5241961 0. ]]
episode: 384 score: -119.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.45106694 0. ]]
episode: 385 score: -92.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53272173 0. ]]
episode: 386 score: -123.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.50211944 0. ]]
episode: 387 score: -158.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.58461493 0. ]]
episode: 388 score: -118.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.52690706 0. ]]
episode: 389 score: -120.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42406475 0. ]]
episode: 390 score: -87.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.50219359 0. ]]
episode: 391 score: -113.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.50944497 0. ]]
episode: 392 score: -112.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.4499301 0. ]]
episode: 393 score: -91.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.48664147 0. ]]
episode: 394 score: -185.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.40250542 0. ]]
episode: 395 score: -87.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.55940586 0. ]]
episode: 396 score: -110.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.49638763 0. ]]
episode: 397 score: -114.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.50415901 0. ]]
episode: 398 score: -170.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44019124 0. ]]
episode: 399 score: -88.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.40384005 0. ]]
episode: 400 score: -85.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42474959 0. ]]
episode: 401 score: -178.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.49511055 0. ]]
episode: 402 score: -113.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.40597957 0. ]]
episode: 403 score: -84.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53177054 0. ]]
episode: 404 score: -113.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53165586 0. ]]
episode: 405 score: -115.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.55957347 0. ]]
episode: 406 score: -110.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.4719085 0. ]]
episode: 407 score: -193.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.5894683 0. ]]
episode: 408 score: -121.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46537427 0. ]]
episode: 409 score: -97.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.40495527 0. ]]
episode: 410 score: -85.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.45212238 0. ]]
episode: 411 score: -91.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46413318 0. ]]
episode: 412 score: -115.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.50960589 0. ]]
episode: 413 score: -123.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44286115 0. ]]
episode: 414 score: -92.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.41411773 0. ]]
episode: 415 score: -84.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.45364027 0. ]]
episode: 416 score: -97.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.55931187 0. ]]
episode: 417 score: -119.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.43717269 0. ]]
episode: 418 score: -88.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.59201388 0. ]]
episode: 419 score: -170.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.50772654 0. ]]
episode: 420 score: -130.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.56911362 0. ]]
episode: 421 score: -115.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.49731741 0. ]]
episode: 422 score: -114.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.51965117 0. ]]
episode: 423 score: -112.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.51013238 0. ]]
episode: 424 score: -120.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57309879 0. ]]
episode: 425 score: -111.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46422621 0. ]]
episode: 426 score: -121.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53195501 0. ]]
episode: 427 score: -119.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57835903 0. ]]
episode: 428 score: -125.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44561965 0. ]]
episode: 429 score: -93.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.43761423 0. ]]
episode: 430 score: -90.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.4553457 0. ]]
episode: 431 score: -134.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57951188 0. ]]
episode: 432 score: -116.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44647931 0. ]]
episode: 433 score: -134.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.58668178 0. ]]
episode: 434 score: -113.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.51210294 0. ]]
episode: 435 score: -120.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46755815 0. ]]
episode: 436 score: -136.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.41013835 0. ]]
episode: 437 score: -84.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.43157246 0. ]]
episode: 438 score: -87.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.51453829 0. ]]
episode: 439 score: -151.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42310999 0. ]]
episode: 440 score: -86.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.50857887 0. ]]
episode: 441 score: -129.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.45804664 0. ]]
episode: 442 score: -93.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53430862 0. ]]
episode: 443 score: -124.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.52209343 0. ]]
episode: 444 score: -118.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42046135 0. ]]
episode: 445 score: -86.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.4167956 0. ]]
episode: 446 score: -85.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.47986214 0. ]]
episode: 447 score: -112.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.45925401 0. ]]
episode: 448 score: -94.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.59370583 0. ]]
episode: 449 score: -114.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46804653 0. ]]
episode: 450 score: -98.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.5855297 0. ]]
episode: 451 score: -119.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.47741411 0. ]]
episode: 452 score: -107.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.47302522 0. ]]
episode: 453 score: -101.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57827038 0. ]]
episode: 454 score: -162.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.58785624 0. ]]
episode: 455 score: -118.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.48881764 0. ]]
episode: 456 score: -178.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.54935973 0. ]]
episode: 457 score: -177.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46545698 0. ]]
episode: 458 score: -113.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.59434911 0. ]]
episode: 459 score: -113.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.52461367 0. ]]
episode: 460 score: -158.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.5577489 0. ]]
episode: 461 score: -159.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53849395 0. ]]
episode: 462 score: -159.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.55504775 0. ]]
episode: 463 score: -165.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.41387258 0. ]]
episode: 464 score: -84.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46649818 0. ]]
episode: 465 score: -97.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46896413 0. ]]
episode: 466 score: -154.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44514699 0. ]]
episode: 467 score: -90.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.51697761 0. ]]
episode: 468 score: -156.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.48799183 0. ]]
episode: 469 score: -160.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.53351568 0. ]]
episode: 470 score: -158.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57832872 0. ]]
episode: 471 score: -119.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57713982 0. ]]
episode: 472 score: -120.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.54895169 0. ]]
episode: 473 score: -114.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.43991613 0. ]]
episode: 474 score: -90.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44268352 0. ]]
episode: 475 score: -89.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.40888225 0. ]]
episode: 476 score: -85.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.55136662 0. ]]
episode: 477 score: -116.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.51995644 0. ]]
episode: 478 score: -157.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.56317807 0. ]]
episode: 479 score: -121.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.4838376 0. ]]
episode: 480 score: -157.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57445365 0. ]]
episode: 481 score: -110.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46989019 0. ]]
episode: 482 score: -99.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.48953526 0. ]]
episode: 483 score: -158.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.56655943 0. ]]
episode: 484 score: -121.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46430587 0. ]]
episode: 485 score: -156.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.5382927 0. ]]
episode: 486 score: -111.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.50613699 0. ]]
episode: 487 score: -111.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.48997816 0. ]]
episode: 488 score: -119.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.57075207 0. ]]
episode: 489 score: -121.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.51398182 0. ]]
episode: 490 score: -117.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.47987869 0. ]]
episode: 491 score: -107.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.44123889 0. ]]
episode: 492 score: -90.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.41439868 0. ]]
episode: 493 score: -84.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.46244858 0. ]]
episode: 494 score: -95.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.47837307 0. ]]
episode: 495 score: -105.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.40827071 0. ]]
episode: 496 score: -84.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.42474999 0. ]]
episode: 497 score: -86.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.56696973 0. ]]
episode: 498 score: -124.0 memory length: 10000 epsilon: 0.004980100000801017
[[-0.5458167 0. ]]
episode: 499 score: -119.0 memory length: 10000 epsilon: 0.004980100000801017
In [9]:
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
plt.figure(figsize=(20,10))
every_i = 5
scores_i = scores[::every_i]
plt.plot(range(len(scores))[::every_i], scores_i);
In [ ]:
Content source: akseshina/dl_course
Similar notebooks: