In [0]:
!pip install git+https://github.com/openai/baselines >/dev/null
!pip install gym >/dev/null
In [0]:
import numpy as np
import random
import gym
from gym.utils import seeding
from gym import spaces
def state_name_to_int(state):
state_name_map = {
'S': 0,
'A': 1,
'B': 2,
'C': 3,
'D': 4,
'E': 5,
'F': 6,
'G': 7,
'H': 8,
'K': 9,
'L': 10,
'M': 11,
'N': 12,
'O': 13
}
return state_name_map[state]
def int_to_state_name(state_as_int):
state_map = {
0: 'S',
1: 'A',
2: 'B',
3: 'C',
4: 'D',
5: 'E',
6: 'F',
7: 'G',
8: 'H',
9: 'K',
10: 'L',
11: 'M',
12: 'N',
13: 'O'
}
return state_map[state_as_int]
class BeraterEnv(gym.Env):
"""
The Berater Problem
Actions:
There are 4 discrete deterministic actions, each choosing one direction
"""
metadata = {'render.modes': ['ansi']}
showStep = False
showDone = True
envEpisodeModulo = 100
def __init__(self):
# self.map = {
# 'S': [('A', 100), ('B', 400), ('C', 200 )],
# 'A': [('B', 250), ('C', 400), ('S', 100 )],
# 'B': [('A', 250), ('C', 250), ('S', 400 )],
# 'C': [('A', 400), ('B', 250), ('S', 200 )]
# }
self.map = {
'S': [('A', 300), ('B', 100), ('C', 200 )],
'A': [('S', 300), ('B', 100), ('E', 100 ), ('D', 100 )],
'B': [('S', 100), ('A', 100), ('C', 50 ), ('K', 200 )],
'C': [('S', 200), ('B', 50), ('M', 100 ), ('L', 200 )],
'D': [('A', 100), ('F', 50)],
'E': [('A', 100), ('F', 100), ('H', 100)],
'F': [('D', 50), ('E', 100), ('G', 200)],
'G': [('F', 200), ('O', 300)],
'H': [('E', 100), ('K', 300)],
'K': [('B', 200), ('H', 300)],
'L': [('C', 200), ('M', 50)],
'M': [('C', 100), ('L', 50), ('N', 100)],
'N': [('M', 100), ('O', 100)],
'O': [('N', 100), ('G', 300)]
}
max_paths = 4
self.action_space = spaces.Discrete(max_paths)
positions = len(self.map)
# observations: position, reward of all 4 local paths, rest reward of all locations
# non existing path is -1000 and no position change
# look at what #getObservation returns if you are confused
low = np.append(np.append([0], np.full(max_paths, -1000)), np.full(positions, 0))
high = np.append(np.append([positions - 1], np.full(max_paths, 1000)), np.full(positions, 1000))
self.observation_space = spaces.Box(low=low,
high=high,
dtype=np.float32)
self.reward_range = (-1, 1)
self.totalReward = 0
self.stepCount = 0
self.isDone = False
self.envReward = 0
self.envEpisodeCount = 0
self.envStepCount = 0
self.reset()
self.optimum = self.calculate_customers_reward()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def iterate_path(self, state, action):
paths = self.map[state]
if action < len(paths):
return paths[action]
else:
# sorry, no such action, stay where you are and pay a high penalty
return (state, 1000)
def step(self, action):
destination, cost = self.iterate_path(self.state, action)
lastState = self.state
customerReward = self.customer_reward[destination]
reward = (customerReward - cost) / self.optimum
self.state = destination
self.customer_visited(destination)
done = destination == 'S' and self.all_customers_visited()
stateAsInt = state_name_to_int(self.state)
self.totalReward += reward
self.stepCount += 1
self.envReward += reward
self.envStepCount += 1
if self.showStep:
print( "Episode: " + ("%4.0f " % self.envEpisodeCount) +
" Step: " + ("%4.0f " % self.stepCount) +
lastState + ' --' + str(action) + '-> ' + self.state +
' R=' + ("% 2.2f" % reward) + ' totalR=' + ("% 3.2f" % self.totalReward) +
' cost=' + ("%4.0f" % cost) + ' customerR=' + ("%4.0f" % customerReward) + ' optimum=' + ("%4.0f" % self.optimum)
)
if done and not self.isDone:
self.envEpisodeCount += 1
if BeraterEnv.showDone:
episodes = BeraterEnv.envEpisodeModulo
if (self.envEpisodeCount % BeraterEnv.envEpisodeModulo != 0):
episodes = self.envEpisodeCount % BeraterEnv.envEpisodeModulo
print( "Done: " +
("episodes=%6.0f " % self.envEpisodeCount) +
("avgSteps=%6.2f " % (self.envStepCount/episodes)) +
("avgTotalReward=% 3.2f" % (self.envReward/episodes) )
)
if (self.envEpisodeCount%BeraterEnv.envEpisodeModulo) == 0:
self.envReward = 0
self.envStepCount = 0
self.isDone = done
observation = self.getObservation(stateAsInt)
info = {"from": self.state, "to": destination}
return observation, reward, done, info
def getObservation(self, position):
result = np.array([ position,
self.getPathObservation(position, 0),
self.getPathObservation(position, 1),
self.getPathObservation(position, 2),
self.getPathObservation(position, 3)
],
dtype=np.float32)
all_rest_rewards = list(self.customer_reward.values())
result = np.append(result, all_rest_rewards)
return result
def getPathObservation(self, position, path):
source = int_to_state_name(position)
paths = self.map[self.state]
if path < len(paths):
target, cost = paths[path]
reward = self.customer_reward[target]
result = reward - cost
else:
result = -1000
return result
def customer_visited(self, customer):
self.customer_reward[customer] = 0
def all_customers_visited(self):
return self.calculate_customers_reward() == 0
def calculate_customers_reward(self):
sum = 0
for value in self.customer_reward.values():
sum += value
return sum
def modulate_reward(self):
number_of_customers = len(self.map) - 1
number_per_consultant = int(number_of_customers/2)
# number_per_consultant = int(number_of_customers/1.5)
self.customer_reward = {
'S': 0
}
for customer_nr in range(1, number_of_customers + 1):
self.customer_reward[int_to_state_name(customer_nr)] = 0
# every consultant only visits a few random customers
samples = random.sample(range(1, number_of_customers + 1), k=number_per_consultant)
key_list = list(self.customer_reward.keys())
for sample in samples:
self.customer_reward[key_list[sample]] = 1000
def reset(self):
self.totalReward = 0
self.stepCount = 0
self.isDone = False
self.modulate_reward()
self.state = 'S'
return self.getObservation(state_name_to_int(self.state))
def render(self):
print(self.customer_reward)
In [0]:
env = BeraterEnv()
print(env.reset())
print(env.customer_reward)
[ 0. -300. 900. -200. -1000. 0. 0. 1000. 0. 1000.
0. 0. 1000. 0. 0. 1000. 1000. 1000. 0.]
{'S': 0, 'A': 0, 'B': 1000, 'C': 0, 'D': 1000, 'E': 0, 'F': 0, 'G': 1000, 'H': 0, 'K': 0, 'L': 1000, 'M': 1000, 'N': 1000, 'O': 0}
In [0]:
BeraterEnv.showStep = True
BeraterEnv.showDone = True
env = BeraterEnv()
print(env)
observation = env.reset()
print(observation)
for t in range(1000):
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
if done:
print("Episode finished after {} timesteps".format(t+1))
break
env.close()
print(observation)
<BeraterEnv instance>
[ 0. 700. 900. 800. -1000. 0. 1000. 1000. 1000. 0.
0. 0. 0. 0. 0. 1000. 1000. 1000. 0.]
Episode: 0 Step: 1 S --0-> A R= 0.12 totalR= 0.12 cost= 300 customerR=1000 optimum=6000
Episode: 0 Step: 2 A --3-> D R=-0.02 totalR= 0.10 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 3 D --1-> F R=-0.01 totalR= 0.09 cost= 50 customerR= 0 optimum=6000
Episode: 0 Step: 4 F --0-> D R=-0.01 totalR= 0.08 cost= 50 customerR= 0 optimum=6000
Episode: 0 Step: 5 D --3-> D R=-0.17 totalR=-0.08 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 6 D --3-> D R=-0.17 totalR=-0.25 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 7 D --3-> D R=-0.17 totalR=-0.42 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 8 D --3-> D R=-0.17 totalR=-0.58 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 9 D --1-> F R=-0.01 totalR=-0.59 cost= 50 customerR= 0 optimum=6000
Episode: 0 Step: 10 F --3-> F R=-0.17 totalR=-0.76 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 11 F --1-> E R=-0.02 totalR=-0.77 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 12 E --2-> H R=-0.02 totalR=-0.79 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 13 H --0-> E R=-0.02 totalR=-0.81 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 14 E --3-> E R=-0.17 totalR=-0.97 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 15 E --2-> H R=-0.02 totalR=-0.99 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 16 H --0-> E R=-0.02 totalR=-1.01 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 17 E --0-> A R=-0.02 totalR=-1.02 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 18 A --0-> S R=-0.05 totalR=-1.07 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 19 S --2-> C R= 0.13 totalR=-0.94 cost= 200 customerR=1000 optimum=6000
Episode: 0 Step: 20 C --1-> B R= 0.16 totalR=-0.78 cost= 50 customerR=1000 optimum=6000
Episode: 0 Step: 21 B --2-> C R=-0.01 totalR=-0.79 cost= 50 customerR= 0 optimum=6000
Episode: 0 Step: 22 C --3-> L R= 0.13 totalR=-0.66 cost= 200 customerR=1000 optimum=6000
Episode: 0 Step: 23 L --3-> L R=-0.17 totalR=-0.82 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 24 L --2-> L R=-0.17 totalR=-0.99 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 25 L --0-> C R=-0.03 totalR=-1.02 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 26 C --1-> B R=-0.01 totalR=-1.03 cost= 50 customerR= 0 optimum=6000
Episode: 0 Step: 27 B --1-> A R=-0.02 totalR=-1.05 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 28 A --1-> B R=-0.02 totalR=-1.07 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 29 B --1-> A R=-0.02 totalR=-1.08 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 30 A --0-> S R=-0.05 totalR=-1.13 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 31 S --1-> B R=-0.02 totalR=-1.15 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 32 B --0-> S R=-0.02 totalR=-1.17 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 33 S --3-> S R=-0.17 totalR=-1.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 34 S --0-> A R=-0.05 totalR=-1.38 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 35 A --3-> D R=-0.02 totalR=-1.40 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 36 D --1-> F R=-0.01 totalR=-1.41 cost= 50 customerR= 0 optimum=6000
Episode: 0 Step: 37 F --2-> G R=-0.03 totalR=-1.44 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 38 G --3-> G R=-0.17 totalR=-1.61 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 39 G --3-> G R=-0.17 totalR=-1.77 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 40 G --0-> F R=-0.03 totalR=-1.81 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 41 F --2-> G R=-0.03 totalR=-1.84 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 42 G --3-> G R=-0.17 totalR=-2.01 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 43 G --0-> F R=-0.03 totalR=-2.04 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 44 F --1-> E R=-0.02 totalR=-2.06 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 45 E --3-> E R=-0.17 totalR=-2.22 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 46 E --1-> F R=-0.02 totalR=-2.24 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 47 F --3-> F R=-0.17 totalR=-2.41 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 48 F --3-> F R=-0.17 totalR=-2.57 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 49 F --2-> G R=-0.03 totalR=-2.61 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 50 G --3-> G R=-0.17 totalR=-2.77 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 51 G --0-> F R=-0.03 totalR=-2.81 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 52 F --1-> E R=-0.02 totalR=-2.82 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 53 E --1-> F R=-0.02 totalR=-2.84 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 54 F --1-> E R=-0.02 totalR=-2.86 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 55 E --3-> E R=-0.17 totalR=-3.02 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 56 E --0-> A R=-0.02 totalR=-3.04 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 57 A --3-> D R=-0.02 totalR=-3.06 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 58 D --2-> D R=-0.17 totalR=-3.22 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 59 D --0-> A R=-0.02 totalR=-3.24 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 60 A --3-> D R=-0.02 totalR=-3.26 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 61 D --3-> D R=-0.17 totalR=-3.42 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 62 D --2-> D R=-0.17 totalR=-3.59 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 63 D --3-> D R=-0.17 totalR=-3.76 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 64 D --2-> D R=-0.17 totalR=-3.92 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 65 D --3-> D R=-0.17 totalR=-4.09 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 66 D --0-> A R=-0.02 totalR=-4.11 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 67 A --2-> E R=-0.02 totalR=-4.12 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 68 E --0-> A R=-0.02 totalR=-4.14 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 69 A --0-> S R=-0.05 totalR=-4.19 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 70 S --0-> A R=-0.05 totalR=-4.24 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 71 A --1-> B R=-0.02 totalR=-4.26 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 72 B --1-> A R=-0.02 totalR=-4.27 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 73 A --2-> E R=-0.02 totalR=-4.29 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 74 E --0-> A R=-0.02 totalR=-4.31 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 75 A --0-> S R=-0.05 totalR=-4.36 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 76 S --1-> B R=-0.02 totalR=-4.37 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 77 B --3-> K R=-0.03 totalR=-4.41 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 78 K --0-> B R=-0.03 totalR=-4.44 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 79 B --1-> A R=-0.02 totalR=-4.46 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 80 A --2-> E R=-0.02 totalR=-4.47 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 81 E --2-> H R=-0.02 totalR=-4.49 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 82 H --3-> H R=-0.17 totalR=-4.66 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 83 H --0-> E R=-0.02 totalR=-4.67 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 84 E --1-> F R=-0.02 totalR=-4.69 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 85 F --1-> E R=-0.02 totalR=-4.71 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 86 E --3-> E R=-0.17 totalR=-4.87 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 87 E --1-> F R=-0.02 totalR=-4.89 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 88 F --1-> E R=-0.02 totalR=-4.91 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 89 E --3-> E R=-0.17 totalR=-5.07 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 90 E --2-> H R=-0.02 totalR=-5.09 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 91 H --3-> H R=-0.17 totalR=-5.26 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 92 H --3-> H R=-0.17 totalR=-5.42 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 93 H --2-> H R=-0.17 totalR=-5.59 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 94 H --2-> H R=-0.17 totalR=-5.76 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 95 H --3-> H R=-0.17 totalR=-5.92 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 96 H --0-> E R=-0.02 totalR=-5.94 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 97 E --2-> H R=-0.02 totalR=-5.96 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 98 H --3-> H R=-0.17 totalR=-6.12 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 99 H --1-> K R=-0.05 totalR=-6.17 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 100 K --0-> B R=-0.03 totalR=-6.21 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 101 B --1-> A R=-0.02 totalR=-6.22 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 102 A --2-> E R=-0.02 totalR=-6.24 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 103 E --0-> A R=-0.02 totalR=-6.26 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 104 A --3-> D R=-0.02 totalR=-6.27 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 105 D --0-> A R=-0.02 totalR=-6.29 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 106 A --2-> E R=-0.02 totalR=-6.31 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 107 E --0-> A R=-0.02 totalR=-6.32 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 108 A --3-> D R=-0.02 totalR=-6.34 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 109 D --3-> D R=-0.17 totalR=-6.51 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 110 D --0-> A R=-0.02 totalR=-6.52 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 111 A --3-> D R=-0.02 totalR=-6.54 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 112 D --0-> A R=-0.02 totalR=-6.56 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 113 A --0-> S R=-0.05 totalR=-6.61 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 114 S --0-> A R=-0.05 totalR=-6.66 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 115 A --0-> S R=-0.05 totalR=-6.71 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 116 S --2-> C R=-0.03 totalR=-6.74 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 117 C --3-> L R=-0.03 totalR=-6.77 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 118 L --0-> C R=-0.03 totalR=-6.81 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 119 C --3-> L R=-0.03 totalR=-6.84 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 120 L --2-> L R=-0.17 totalR=-7.01 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 121 L --3-> L R=-0.17 totalR=-7.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 122 L --3-> L R=-0.17 totalR=-7.34 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 123 L --1-> M R= 0.16 totalR=-7.18 cost= 50 customerR=1000 optimum=6000
Episode: 0 Step: 124 M --1-> L R=-0.01 totalR=-7.19 cost= 50 customerR= 0 optimum=6000
Episode: 0 Step: 125 L --1-> M R=-0.01 totalR=-7.20 cost= 50 customerR= 0 optimum=6000
Episode: 0 Step: 126 M --0-> C R=-0.02 totalR=-7.22 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 127 C --1-> B R=-0.01 totalR=-7.22 cost= 50 customerR= 0 optimum=6000
Episode: 0 Step: 128 B --1-> A R=-0.02 totalR=-7.24 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 129 A --1-> B R=-0.02 totalR=-7.26 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 130 B --3-> K R=-0.03 totalR=-7.29 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 131 K --0-> B R=-0.03 totalR=-7.32 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 132 B --3-> K R=-0.03 totalR=-7.36 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 133 K --1-> H R=-0.05 totalR=-7.41 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 134 H --2-> H R=-0.17 totalR=-7.57 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 135 H --0-> E R=-0.02 totalR=-7.59 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 136 E --1-> F R=-0.02 totalR=-7.61 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 137 F --2-> G R=-0.03 totalR=-7.64 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 138 G --0-> F R=-0.03 totalR=-7.67 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 139 F --2-> G R=-0.03 totalR=-7.71 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 140 G --0-> F R=-0.03 totalR=-7.74 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 141 F --1-> E R=-0.02 totalR=-7.76 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 142 E --3-> E R=-0.17 totalR=-7.92 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 143 E --2-> H R=-0.02 totalR=-7.94 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 144 H --2-> H R=-0.17 totalR=-8.11 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 145 H --1-> K R=-0.05 totalR=-8.16 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 146 K --0-> B R=-0.03 totalR=-8.19 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 147 B --3-> K R=-0.03 totalR=-8.22 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 148 K --1-> H R=-0.05 totalR=-8.27 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 149 H --1-> K R=-0.05 totalR=-8.32 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 150 K --3-> K R=-0.17 totalR=-8.49 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 151 K --0-> B R=-0.03 totalR=-8.52 cost= 200 customerR= 0 optimum=6000
Episode: 0 Step: 152 B --2-> C R=-0.01 totalR=-8.53 cost= 50 customerR= 0 optimum=6000
Episode: 0 Step: 153 C --2-> M R=-0.02 totalR=-8.55 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 154 M --3-> M R=-0.17 totalR=-8.72 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 155 M --2-> N R= 0.15 totalR=-8.57 cost= 100 customerR=1000 optimum=6000
Episode: 0 Step: 156 N --3-> N R=-0.17 totalR=-8.73 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 157 N --3-> N R=-0.17 totalR=-8.90 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 158 N --3-> N R=-0.17 totalR=-9.07 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 159 N --2-> N R=-0.17 totalR=-9.23 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 160 N --1-> O R=-0.02 totalR=-9.25 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 161 O --2-> O R=-0.17 totalR=-9.42 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 162 O --2-> O R=-0.17 totalR=-9.58 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 163 O --3-> O R=-0.17 totalR=-9.75 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 164 O --2-> O R=-0.17 totalR=-9.92 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 165 O --3-> O R=-0.17 totalR=-10.08 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 166 O --3-> O R=-0.17 totalR=-10.25 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 167 O --2-> O R=-0.17 totalR=-10.42 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 168 O --2-> O R=-0.17 totalR=-10.58 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 169 O --3-> O R=-0.17 totalR=-10.75 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 170 O --0-> N R=-0.02 totalR=-10.77 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 171 N --1-> O R=-0.02 totalR=-10.78 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 172 O --2-> O R=-0.17 totalR=-10.95 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 173 O --3-> O R=-0.17 totalR=-11.12 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 174 O --2-> O R=-0.17 totalR=-11.28 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 175 O --1-> G R=-0.05 totalR=-11.33 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 176 G --2-> G R=-0.17 totalR=-11.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 177 G --1-> O R=-0.05 totalR=-11.55 cost= 300 customerR= 0 optimum=6000
Episode: 0 Step: 178 O --0-> N R=-0.02 totalR=-11.57 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 179 N --2-> N R=-0.17 totalR=-11.73 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 180 N --2-> N R=-0.17 totalR=-11.90 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 181 N --3-> N R=-0.17 totalR=-12.07 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 182 N --0-> M R=-0.02 totalR=-12.08 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 183 M --3-> M R=-0.17 totalR=-12.25 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 184 M --2-> N R=-0.02 totalR=-12.27 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 185 N --3-> N R=-0.17 totalR=-12.43 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 186 N --0-> M R=-0.02 totalR=-12.45 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 187 M --0-> C R=-0.02 totalR=-12.47 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 188 C --2-> M R=-0.02 totalR=-12.48 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 189 M --0-> C R=-0.02 totalR=-12.50 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 190 C --2-> M R=-0.02 totalR=-12.52 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 191 M --3-> M R=-0.17 totalR=-12.68 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 192 M --2-> N R=-0.02 totalR=-12.70 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 193 N --2-> N R=-0.17 totalR=-12.87 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 194 N --3-> N R=-0.17 totalR=-13.03 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 195 N --0-> M R=-0.02 totalR=-13.05 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 196 M --0-> C R=-0.02 totalR=-13.07 cost= 100 customerR= 0 optimum=6000
Episode: 0 Step: 197 C --0-> S R=-0.03 totalR=-13.10 cost= 200 customerR= 0 optimum=6000
Done: episodes= 1 avgSteps=197.00 avgTotalReward=-13.10
Episode finished after 197 timesteps
[ 0. -300. -100. -200. -1000. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0.]
In [0]:
from copy import deepcopy
import json
class Baseline():
def __init__(self, env, verbose=1):
self.env = env
self.verbose = verbose
self.reset()
def reset(self):
self.map = self.env.map
self.rewards = self.env.customer_reward.copy()
def as_string(self, state):
# reward/cost does not hurt, but is useless, path obsucres same state
new_state = {
'rewards': state['rewards'],
'position': state['position']
}
return json.dumps(new_state, sort_keys=True)
def is_goal(self, state):
if state['position'] != 'S': return False
for reward in state['rewards'].values():
if reward != 0: return False
return True
def expand(self, state):
states = []
for position, cost in self.map[state['position']]:
new_state = deepcopy(state)
new_state['position'] = position
new_state['rewards'][position] = 0
reward = state['rewards'][position]
new_state['reward'] += reward
new_state['cost'] += cost
new_state['path'].append(position)
states.append(new_state)
return states
def search(self, root, max_depth = 25):
closed = set()
open = [root]
while open:
state = open.pop(0)
if self.as_string(state) in closed: continue
closed.add(self.as_string(state))
depth = len(state['path'])
if depth > max_depth:
if self.verbose > 0:
print("Visited:", len(closed))
print("Reached max depth, without reaching goal")
return None
if self.is_goal(state):
scaled_reward = (state['reward'] - state['cost']) / 6000
state['scaled_reward'] = scaled_reward
if self.verbose > 0:
print("Scaled reward:", scaled_reward)
print("Perfect path", state['path'])
return state
expanded = self.expand(state)
open += expanded
# make this best first
open.sort(key=lambda state: state['cost'])
def find_optimum(self):
initial_state = {
'rewards': self.rewards.copy(),
'position': 'S',
'reward': 0,
'cost': 0,
'path': ['S']
}
return self.search(initial_state)
def benchmark(self, model, sample_runs=100):
self.verbose = 0
BeraterEnv.showStep = False
BeraterEnv.showDone = False
perfect_rewards = []
model_rewards = []
for run in range(sample_runs):
observation = self.env.reset()
self.reset()
optimum_state = self.find_optimum()
perfect_rewards.append(optimum_state['scaled_reward'])
state = np.zeros((1, 2*128))
dones = np.zeros((1))
for t in range(1000):
actions, _, state, _ = model.step(observation, S=state, M=dones)
observation, reward, done, info = self.env.step(actions[0])
if done:
break
model_rewards.append(env.totalReward)
return perfect_rewards, model_rewards
def score(self, model, sample_runs=100):
perfect_rewards, model_rewards = self.benchmark(model, sample_runs=100)
perfect_score_mean, perfect_score_std = np.array(perfect_rewards).mean(), np.array(perfect_rewards).std()
test_score_mean, test_score_std = np.array(model_rewards).mean(), np.array(model_rewards).std()
return perfect_score_mean, perfect_score_std, test_score_mean, test_score_std
Estimation
In [0]:
!rm -r logs
!mkdir logs
!mkdir logs/berater
In [0]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)
1.12.0
In [0]:
%%time
# https://github.com/openai/baselines/blob/master/baselines/deepq/experiments/train_pong.py
# log_dir = logger.get_dir()
log_dir = '/content/logs/berater/'
import gym
from baselines import bench
from baselines import logger
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from baselines.common.vec_env.vec_monitor import VecMonitor
from baselines.ppo2 import ppo2
from baselines.a2c import a2c
BeraterEnv.showStep = False
BeraterEnv.showDone = False
env = BeraterEnv()
wrapped_env = DummyVecEnv([lambda: BeraterEnv()])
monitored_env = VecMonitor(wrapped_env, log_dir)
# https://github.com/openai/baselines/blob/master/baselines/ppo2/ppo2.py
# https://github.com/openai/baselines/blob/master/baselines/common/models.py#L30
# https://arxiv.org/abs/1607.06450 for layer_norm
# lr linear from lr=1e-2 to lr=1e-4 (default lr=3e-4)
def lr_range(frac):
# we get the remaining updates between 1 and 0
start_lr = 1e-2
end_lr = 1e-4
diff_lr = start_lr - end_lr
lr = end_lr + diff_lr * frac
return lr
def mlp(num_layers=2, num_hidden=64, activation=tf.nn.relu, layer_norm=False):
def network_fn(X):
h = tf.layers.flatten(X)
for i in range(num_layers):
h = tf.layers.dense(h, units=num_hidden, kernel_initializer=tf.initializers.glorot_uniform(seed=17))
if layer_norm:
h = tf.contrib.layers.layer_norm(h, center=True, scale=True)
h = activation(h)
return h
return network_fn
network = mlp(num_hidden=500, num_layers=3, layer_norm=True)
# Parameters
# https://github.com/openai/baselines/blob/master/baselines/a2c/a2c.py
model = a2c.learn(
env=monitored_env,
network=network,
gamma=1.0,
ent_coef=0.05,
log_interval=50000,
total_timesteps=1000000)
# model.save('berater-ppo-v12.pkl')
monitored_env.close()
Logging to /tmp/openai-2019-01-31-16-37-25-273381
---------------------------------
| explained_variance | -0.85 |
| fps | 17 |
| nupdates | 1 |
| policy_entropy | 1.39 |
| total_timesteps | 5 |
| value_loss | 0.0133 |
---------------------------------
---------------------------------
| explained_variance | 0 |
| fps | 277 |
| nupdates | 50000 |
| policy_entropy | 4.05e-22 |
| total_timesteps | 250000 |
| value_loss | 0.307 |
---------------------------------
---------------------------------
| explained_variance | 0 |
| fps | 277 |
| nupdates | 100000 |
| policy_entropy | 0 |
| total_timesteps | 500000 |
| value_loss | 0.299 |
---------------------------------
---------------------------------
| explained_variance | 0 |
| fps | 278 |
| nupdates | 150000 |
| policy_entropy | 7.85e-34 |
| total_timesteps | 750000 |
| value_loss | 0.0625 |
---------------------------------
---------------------------------
| explained_variance | 0 |
| fps | 278 |
| nupdates | 200000 |
| policy_entropy | 7.85e-34 |
| total_timesteps | 1000000 |
| value_loss | 0.0625 |
---------------------------------
CPU times: user 1h 13min 31s, sys: 12min 4s, total: 1h 25min 35s
Wall time: 59min 55s
https://github.com/openai/baselines/blob/master/docs/viz/viz.ipynb
In [0]:
# !ls -l $log_dir
In [0]:
from baselines.common import plot_util as pu
results = pu.load_results(log_dir)
import matplotlib.pyplot as plt
import numpy as np
r = results[0]
plt.ylim(0, .75)
# plt.plot(np.cumsum(r.monitor.l), r.monitor.r)
plt.plot(np.cumsum(r.monitor.l), pu.smooth(r.monitor.r, radius=100))
/usr/local/lib/python3.6/dist-packages/baselines/bench/monitor.py:164: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access
df.headers = headers # HACK to preserve backwards compatibility
Out[0]:
[<matplotlib.lines.Line2D at 0x7f699835f860>]
In [0]:
import numpy as np
observation = env.reset()
env.render()
baseline = Baseline(env)
{'S': 0, 'A': 1000, 'B': 1000, 'C': 0, 'D': 1000, 'E': 1000, 'F': 0, 'G': 1000, 'H': 0, 'K': 0, 'L': 0, 'M': 0, 'N': 0, 'O': 1000}
In [0]:
state = np.zeros((1, 2*128))
dones = np.zeros((1))
BeraterEnv.showStep = True
BeraterEnv.showDone = False
for t in range(1000):
actions, _, state, _ = model.step(observation, S=state, M=dones)
observation, reward, done, info = env.step(actions[0])
if done:
print("Episode finished after {} timesteps, reward={}".format(t+1, env.totalReward))
break
env.close()
Episode: 0 Step: 1 S --3-> S R=-0.17 totalR=-0.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 2 S --3-> S R=-0.17 totalR=-0.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 3 S --3-> S R=-0.17 totalR=-0.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 4 S --3-> S R=-0.17 totalR=-0.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 5 S --3-> S R=-0.17 totalR=-0.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 6 S --3-> S R=-0.17 totalR=-1.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 7 S --3-> S R=-0.17 totalR=-1.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 8 S --3-> S R=-0.17 totalR=-1.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 9 S --3-> S R=-0.17 totalR=-1.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 10 S --3-> S R=-0.17 totalR=-1.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 11 S --3-> S R=-0.17 totalR=-1.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 12 S --3-> S R=-0.17 totalR=-2.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 13 S --3-> S R=-0.17 totalR=-2.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 14 S --3-> S R=-0.17 totalR=-2.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 15 S --3-> S R=-0.17 totalR=-2.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 16 S --3-> S R=-0.17 totalR=-2.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 17 S --3-> S R=-0.17 totalR=-2.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 18 S --3-> S R=-0.17 totalR=-3.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 19 S --3-> S R=-0.17 totalR=-3.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 20 S --3-> S R=-0.17 totalR=-3.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 21 S --3-> S R=-0.17 totalR=-3.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 22 S --3-> S R=-0.17 totalR=-3.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 23 S --3-> S R=-0.17 totalR=-3.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 24 S --3-> S R=-0.17 totalR=-4.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 25 S --3-> S R=-0.17 totalR=-4.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 26 S --3-> S R=-0.17 totalR=-4.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 27 S --3-> S R=-0.17 totalR=-4.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 28 S --3-> S R=-0.17 totalR=-4.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 29 S --3-> S R=-0.17 totalR=-4.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 30 S --3-> S R=-0.17 totalR=-5.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 31 S --3-> S R=-0.17 totalR=-5.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 32 S --3-> S R=-0.17 totalR=-5.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 33 S --3-> S R=-0.17 totalR=-5.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 34 S --3-> S R=-0.17 totalR=-5.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 35 S --3-> S R=-0.17 totalR=-5.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 36 S --3-> S R=-0.17 totalR=-6.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 37 S --3-> S R=-0.17 totalR=-6.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 38 S --3-> S R=-0.17 totalR=-6.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 39 S --3-> S R=-0.17 totalR=-6.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 40 S --3-> S R=-0.17 totalR=-6.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 41 S --3-> S R=-0.17 totalR=-6.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 42 S --3-> S R=-0.17 totalR=-7.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 43 S --3-> S R=-0.17 totalR=-7.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 44 S --3-> S R=-0.17 totalR=-7.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 45 S --3-> S R=-0.17 totalR=-7.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 46 S --3-> S R=-0.17 totalR=-7.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 47 S --3-> S R=-0.17 totalR=-7.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 48 S --3-> S R=-0.17 totalR=-8.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 49 S --3-> S R=-0.17 totalR=-8.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 50 S --3-> S R=-0.17 totalR=-8.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 51 S --3-> S R=-0.17 totalR=-8.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 52 S --3-> S R=-0.17 totalR=-8.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 53 S --3-> S R=-0.17 totalR=-8.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 54 S --3-> S R=-0.17 totalR=-9.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 55 S --3-> S R=-0.17 totalR=-9.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 56 S --3-> S R=-0.17 totalR=-9.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 57 S --3-> S R=-0.17 totalR=-9.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 58 S --3-> S R=-0.17 totalR=-9.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 59 S --3-> S R=-0.17 totalR=-9.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 60 S --3-> S R=-0.17 totalR=-10.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 61 S --3-> S R=-0.17 totalR=-10.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 62 S --3-> S R=-0.17 totalR=-10.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 63 S --3-> S R=-0.17 totalR=-10.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 64 S --3-> S R=-0.17 totalR=-10.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 65 S --3-> S R=-0.17 totalR=-10.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 66 S --3-> S R=-0.17 totalR=-11.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 67 S --3-> S R=-0.17 totalR=-11.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 68 S --3-> S R=-0.17 totalR=-11.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 69 S --3-> S R=-0.17 totalR=-11.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 70 S --3-> S R=-0.17 totalR=-11.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 71 S --3-> S R=-0.17 totalR=-11.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 72 S --3-> S R=-0.17 totalR=-12.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 73 S --3-> S R=-0.17 totalR=-12.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 74 S --3-> S R=-0.17 totalR=-12.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 75 S --3-> S R=-0.17 totalR=-12.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 76 S --3-> S R=-0.17 totalR=-12.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 77 S --3-> S R=-0.17 totalR=-12.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 78 S --3-> S R=-0.17 totalR=-13.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 79 S --3-> S R=-0.17 totalR=-13.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 80 S --3-> S R=-0.17 totalR=-13.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 81 S --3-> S R=-0.17 totalR=-13.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 82 S --3-> S R=-0.17 totalR=-13.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 83 S --3-> S R=-0.17 totalR=-13.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 84 S --3-> S R=-0.17 totalR=-14.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 85 S --3-> S R=-0.17 totalR=-14.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 86 S --3-> S R=-0.17 totalR=-14.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 87 S --3-> S R=-0.17 totalR=-14.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 88 S --3-> S R=-0.17 totalR=-14.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 89 S --3-> S R=-0.17 totalR=-14.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 90 S --3-> S R=-0.17 totalR=-15.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 91 S --3-> S R=-0.17 totalR=-15.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 92 S --3-> S R=-0.17 totalR=-15.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 93 S --3-> S R=-0.17 totalR=-15.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 94 S --3-> S R=-0.17 totalR=-15.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 95 S --3-> S R=-0.17 totalR=-15.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 96 S --3-> S R=-0.17 totalR=-16.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 97 S --3-> S R=-0.17 totalR=-16.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 98 S --3-> S R=-0.17 totalR=-16.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 99 S --3-> S R=-0.17 totalR=-16.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 100 S --3-> S R=-0.17 totalR=-16.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 101 S --3-> S R=-0.17 totalR=-16.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 102 S --3-> S R=-0.17 totalR=-17.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 103 S --3-> S R=-0.17 totalR=-17.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 104 S --3-> S R=-0.17 totalR=-17.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 105 S --3-> S R=-0.17 totalR=-17.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 106 S --3-> S R=-0.17 totalR=-17.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 107 S --3-> S R=-0.17 totalR=-17.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 108 S --3-> S R=-0.17 totalR=-18.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 109 S --3-> S R=-0.17 totalR=-18.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 110 S --3-> S R=-0.17 totalR=-18.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 111 S --3-> S R=-0.17 totalR=-18.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 112 S --3-> S R=-0.17 totalR=-18.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 113 S --3-> S R=-0.17 totalR=-18.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 114 S --3-> S R=-0.17 totalR=-19.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 115 S --3-> S R=-0.17 totalR=-19.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 116 S --3-> S R=-0.17 totalR=-19.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 117 S --3-> S R=-0.17 totalR=-19.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 118 S --3-> S R=-0.17 totalR=-19.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 119 S --3-> S R=-0.17 totalR=-19.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 120 S --3-> S R=-0.17 totalR=-20.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 121 S --3-> S R=-0.17 totalR=-20.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 122 S --3-> S R=-0.17 totalR=-20.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 123 S --3-> S R=-0.17 totalR=-20.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 124 S --3-> S R=-0.17 totalR=-20.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 125 S --3-> S R=-0.17 totalR=-20.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 126 S --3-> S R=-0.17 totalR=-21.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 127 S --3-> S R=-0.17 totalR=-21.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 128 S --3-> S R=-0.17 totalR=-21.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 129 S --3-> S R=-0.17 totalR=-21.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 130 S --3-> S R=-0.17 totalR=-21.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 131 S --3-> S R=-0.17 totalR=-21.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 132 S --3-> S R=-0.17 totalR=-22.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 133 S --3-> S R=-0.17 totalR=-22.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 134 S --3-> S R=-0.17 totalR=-22.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 135 S --3-> S R=-0.17 totalR=-22.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 136 S --3-> S R=-0.17 totalR=-22.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 137 S --3-> S R=-0.17 totalR=-22.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 138 S --3-> S R=-0.17 totalR=-23.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 139 S --3-> S R=-0.17 totalR=-23.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 140 S --3-> S R=-0.17 totalR=-23.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 141 S --3-> S R=-0.17 totalR=-23.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 142 S --3-> S R=-0.17 totalR=-23.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 143 S --3-> S R=-0.17 totalR=-23.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 144 S --3-> S R=-0.17 totalR=-24.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 145 S --3-> S R=-0.17 totalR=-24.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 146 S --3-> S R=-0.17 totalR=-24.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 147 S --3-> S R=-0.17 totalR=-24.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 148 S --3-> S R=-0.17 totalR=-24.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 149 S --3-> S R=-0.17 totalR=-24.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 150 S --3-> S R=-0.17 totalR=-25.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 151 S --3-> S R=-0.17 totalR=-25.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 152 S --3-> S R=-0.17 totalR=-25.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 153 S --3-> S R=-0.17 totalR=-25.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 154 S --3-> S R=-0.17 totalR=-25.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 155 S --3-> S R=-0.17 totalR=-25.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 156 S --3-> S R=-0.17 totalR=-26.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 157 S --3-> S R=-0.17 totalR=-26.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 158 S --3-> S R=-0.17 totalR=-26.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 159 S --3-> S R=-0.17 totalR=-26.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 160 S --3-> S R=-0.17 totalR=-26.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 161 S --3-> S R=-0.17 totalR=-26.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 162 S --3-> S R=-0.17 totalR=-27.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 163 S --3-> S R=-0.17 totalR=-27.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 164 S --3-> S R=-0.17 totalR=-27.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 165 S --3-> S R=-0.17 totalR=-27.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 166 S --3-> S R=-0.17 totalR=-27.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 167 S --3-> S R=-0.17 totalR=-27.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 168 S --3-> S R=-0.17 totalR=-28.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 169 S --3-> S R=-0.17 totalR=-28.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 170 S --3-> S R=-0.17 totalR=-28.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 171 S --3-> S R=-0.17 totalR=-28.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 172 S --3-> S R=-0.17 totalR=-28.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 173 S --3-> S R=-0.17 totalR=-28.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 174 S --3-> S R=-0.17 totalR=-29.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 175 S --3-> S R=-0.17 totalR=-29.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 176 S --3-> S R=-0.17 totalR=-29.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 177 S --3-> S R=-0.17 totalR=-29.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 178 S --3-> S R=-0.17 totalR=-29.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 179 S --3-> S R=-0.17 totalR=-29.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 180 S --3-> S R=-0.17 totalR=-30.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 181 S --3-> S R=-0.17 totalR=-30.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 182 S --3-> S R=-0.17 totalR=-30.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 183 S --3-> S R=-0.17 totalR=-30.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 184 S --3-> S R=-0.17 totalR=-30.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 185 S --3-> S R=-0.17 totalR=-30.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 186 S --3-> S R=-0.17 totalR=-31.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 187 S --3-> S R=-0.17 totalR=-31.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 188 S --3-> S R=-0.17 totalR=-31.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 189 S --3-> S R=-0.17 totalR=-31.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 190 S --3-> S R=-0.17 totalR=-31.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 191 S --3-> S R=-0.17 totalR=-31.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 192 S --3-> S R=-0.17 totalR=-32.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 193 S --3-> S R=-0.17 totalR=-32.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 194 S --3-> S R=-0.17 totalR=-32.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 195 S --3-> S R=-0.17 totalR=-32.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 196 S --3-> S R=-0.17 totalR=-32.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 197 S --3-> S R=-0.17 totalR=-32.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 198 S --3-> S R=-0.17 totalR=-33.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 199 S --3-> S R=-0.17 totalR=-33.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 200 S --3-> S R=-0.17 totalR=-33.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 201 S --3-> S R=-0.17 totalR=-33.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 202 S --3-> S R=-0.17 totalR=-33.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 203 S --3-> S R=-0.17 totalR=-33.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 204 S --3-> S R=-0.17 totalR=-34.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 205 S --3-> S R=-0.17 totalR=-34.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 206 S --3-> S R=-0.17 totalR=-34.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 207 S --3-> S R=-0.17 totalR=-34.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 208 S --3-> S R=-0.17 totalR=-34.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 209 S --3-> S R=-0.17 totalR=-34.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 210 S --3-> S R=-0.17 totalR=-35.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 211 S --3-> S R=-0.17 totalR=-35.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 212 S --3-> S R=-0.17 totalR=-35.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 213 S --3-> S R=-0.17 totalR=-35.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 214 S --3-> S R=-0.17 totalR=-35.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 215 S --3-> S R=-0.17 totalR=-35.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 216 S --3-> S R=-0.17 totalR=-36.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 217 S --3-> S R=-0.17 totalR=-36.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 218 S --3-> S R=-0.17 totalR=-36.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 219 S --3-> S R=-0.17 totalR=-36.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 220 S --3-> S R=-0.17 totalR=-36.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 221 S --3-> S R=-0.17 totalR=-36.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 222 S --3-> S R=-0.17 totalR=-37.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 223 S --3-> S R=-0.17 totalR=-37.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 224 S --3-> S R=-0.17 totalR=-37.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 225 S --3-> S R=-0.17 totalR=-37.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 226 S --3-> S R=-0.17 totalR=-37.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 227 S --3-> S R=-0.17 totalR=-37.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 228 S --3-> S R=-0.17 totalR=-38.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 229 S --3-> S R=-0.17 totalR=-38.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 230 S --3-> S R=-0.17 totalR=-38.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 231 S --3-> S R=-0.17 totalR=-38.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 232 S --3-> S R=-0.17 totalR=-38.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 233 S --3-> S R=-0.17 totalR=-38.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 234 S --3-> S R=-0.17 totalR=-39.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 235 S --3-> S R=-0.17 totalR=-39.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 236 S --3-> S R=-0.17 totalR=-39.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 237 S --3-> S R=-0.17 totalR=-39.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 238 S --3-> S R=-0.17 totalR=-39.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 239 S --3-> S R=-0.17 totalR=-39.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 240 S --3-> S R=-0.17 totalR=-40.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 241 S --3-> S R=-0.17 totalR=-40.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 242 S --3-> S R=-0.17 totalR=-40.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 243 S --3-> S R=-0.17 totalR=-40.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 244 S --3-> S R=-0.17 totalR=-40.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 245 S --3-> S R=-0.17 totalR=-40.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 246 S --3-> S R=-0.17 totalR=-41.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 247 S --3-> S R=-0.17 totalR=-41.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 248 S --3-> S R=-0.17 totalR=-41.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 249 S --3-> S R=-0.17 totalR=-41.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 250 S --3-> S R=-0.17 totalR=-41.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 251 S --3-> S R=-0.17 totalR=-41.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 252 S --3-> S R=-0.17 totalR=-42.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 253 S --3-> S R=-0.17 totalR=-42.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 254 S --3-> S R=-0.17 totalR=-42.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 255 S --3-> S R=-0.17 totalR=-42.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 256 S --3-> S R=-0.17 totalR=-42.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 257 S --3-> S R=-0.17 totalR=-42.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 258 S --3-> S R=-0.17 totalR=-43.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 259 S --3-> S R=-0.17 totalR=-43.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 260 S --3-> S R=-0.17 totalR=-43.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 261 S --3-> S R=-0.17 totalR=-43.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 262 S --3-> S R=-0.17 totalR=-43.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 263 S --3-> S R=-0.17 totalR=-43.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 264 S --3-> S R=-0.17 totalR=-44.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 265 S --3-> S R=-0.17 totalR=-44.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 266 S --3-> S R=-0.17 totalR=-44.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 267 S --3-> S R=-0.17 totalR=-44.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 268 S --3-> S R=-0.17 totalR=-44.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 269 S --3-> S R=-0.17 totalR=-44.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 270 S --3-> S R=-0.17 totalR=-45.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 271 S --3-> S R=-0.17 totalR=-45.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 272 S --3-> S R=-0.17 totalR=-45.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 273 S --3-> S R=-0.17 totalR=-45.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 274 S --3-> S R=-0.17 totalR=-45.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 275 S --3-> S R=-0.17 totalR=-45.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 276 S --3-> S R=-0.17 totalR=-46.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 277 S --3-> S R=-0.17 totalR=-46.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 278 S --3-> S R=-0.17 totalR=-46.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 279 S --3-> S R=-0.17 totalR=-46.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 280 S --3-> S R=-0.17 totalR=-46.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 281 S --3-> S R=-0.17 totalR=-46.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 282 S --3-> S R=-0.17 totalR=-47.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 283 S --3-> S R=-0.17 totalR=-47.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 284 S --3-> S R=-0.17 totalR=-47.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 285 S --3-> S R=-0.17 totalR=-47.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 286 S --3-> S R=-0.17 totalR=-47.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 287 S --3-> S R=-0.17 totalR=-47.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 288 S --3-> S R=-0.17 totalR=-48.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 289 S --3-> S R=-0.17 totalR=-48.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 290 S --3-> S R=-0.17 totalR=-48.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 291 S --3-> S R=-0.17 totalR=-48.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 292 S --3-> S R=-0.17 totalR=-48.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 293 S --3-> S R=-0.17 totalR=-48.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 294 S --3-> S R=-0.17 totalR=-49.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 295 S --3-> S R=-0.17 totalR=-49.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 296 S --3-> S R=-0.17 totalR=-49.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 297 S --3-> S R=-0.17 totalR=-49.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 298 S --3-> S R=-0.17 totalR=-49.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 299 S --3-> S R=-0.17 totalR=-49.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 300 S --3-> S R=-0.17 totalR=-50.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 301 S --3-> S R=-0.17 totalR=-50.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 302 S --3-> S R=-0.17 totalR=-50.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 303 S --3-> S R=-0.17 totalR=-50.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 304 S --3-> S R=-0.17 totalR=-50.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 305 S --3-> S R=-0.17 totalR=-50.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 306 S --3-> S R=-0.17 totalR=-51.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 307 S --3-> S R=-0.17 totalR=-51.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 308 S --3-> S R=-0.17 totalR=-51.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 309 S --3-> S R=-0.17 totalR=-51.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 310 S --3-> S R=-0.17 totalR=-51.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 311 S --3-> S R=-0.17 totalR=-51.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 312 S --3-> S R=-0.17 totalR=-52.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 313 S --3-> S R=-0.17 totalR=-52.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 314 S --3-> S R=-0.17 totalR=-52.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 315 S --3-> S R=-0.17 totalR=-52.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 316 S --3-> S R=-0.17 totalR=-52.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 317 S --3-> S R=-0.17 totalR=-52.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 318 S --3-> S R=-0.17 totalR=-53.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 319 S --3-> S R=-0.17 totalR=-53.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 320 S --3-> S R=-0.17 totalR=-53.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 321 S --3-> S R=-0.17 totalR=-53.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 322 S --3-> S R=-0.17 totalR=-53.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 323 S --3-> S R=-0.17 totalR=-53.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 324 S --3-> S R=-0.17 totalR=-54.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 325 S --3-> S R=-0.17 totalR=-54.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 326 S --3-> S R=-0.17 totalR=-54.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 327 S --3-> S R=-0.17 totalR=-54.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 328 S --3-> S R=-0.17 totalR=-54.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 329 S --3-> S R=-0.17 totalR=-54.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 330 S --3-> S R=-0.17 totalR=-55.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 331 S --3-> S R=-0.17 totalR=-55.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 332 S --3-> S R=-0.17 totalR=-55.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 333 S --3-> S R=-0.17 totalR=-55.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 334 S --3-> S R=-0.17 totalR=-55.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 335 S --3-> S R=-0.17 totalR=-55.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 336 S --3-> S R=-0.17 totalR=-56.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 337 S --3-> S R=-0.17 totalR=-56.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 338 S --3-> S R=-0.17 totalR=-56.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 339 S --3-> S R=-0.17 totalR=-56.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 340 S --3-> S R=-0.17 totalR=-56.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 341 S --3-> S R=-0.17 totalR=-56.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 342 S --3-> S R=-0.17 totalR=-57.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 343 S --3-> S R=-0.17 totalR=-57.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 344 S --3-> S R=-0.17 totalR=-57.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 345 S --3-> S R=-0.17 totalR=-57.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 346 S --3-> S R=-0.17 totalR=-57.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 347 S --3-> S R=-0.17 totalR=-57.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 348 S --3-> S R=-0.17 totalR=-58.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 349 S --3-> S R=-0.17 totalR=-58.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 350 S --3-> S R=-0.17 totalR=-58.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 351 S --3-> S R=-0.17 totalR=-58.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 352 S --3-> S R=-0.17 totalR=-58.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 353 S --3-> S R=-0.17 totalR=-58.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 354 S --3-> S R=-0.17 totalR=-59.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 355 S --3-> S R=-0.17 totalR=-59.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 356 S --3-> S R=-0.17 totalR=-59.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 357 S --3-> S R=-0.17 totalR=-59.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 358 S --3-> S R=-0.17 totalR=-59.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 359 S --3-> S R=-0.17 totalR=-59.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 360 S --3-> S R=-0.17 totalR=-60.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 361 S --3-> S R=-0.17 totalR=-60.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 362 S --3-> S R=-0.17 totalR=-60.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 363 S --3-> S R=-0.17 totalR=-60.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 364 S --3-> S R=-0.17 totalR=-60.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 365 S --3-> S R=-0.17 totalR=-60.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 366 S --3-> S R=-0.17 totalR=-61.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 367 S --3-> S R=-0.17 totalR=-61.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 368 S --3-> S R=-0.17 totalR=-61.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 369 S --3-> S R=-0.17 totalR=-61.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 370 S --3-> S R=-0.17 totalR=-61.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 371 S --3-> S R=-0.17 totalR=-61.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 372 S --3-> S R=-0.17 totalR=-62.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 373 S --3-> S R=-0.17 totalR=-62.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 374 S --3-> S R=-0.17 totalR=-62.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 375 S --3-> S R=-0.17 totalR=-62.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 376 S --3-> S R=-0.17 totalR=-62.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 377 S --3-> S R=-0.17 totalR=-62.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 378 S --3-> S R=-0.17 totalR=-63.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 379 S --3-> S R=-0.17 totalR=-63.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 380 S --3-> S R=-0.17 totalR=-63.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 381 S --3-> S R=-0.17 totalR=-63.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 382 S --3-> S R=-0.17 totalR=-63.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 383 S --3-> S R=-0.17 totalR=-63.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 384 S --3-> S R=-0.17 totalR=-64.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 385 S --3-> S R=-0.17 totalR=-64.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 386 S --3-> S R=-0.17 totalR=-64.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 387 S --3-> S R=-0.17 totalR=-64.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 388 S --3-> S R=-0.17 totalR=-64.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 389 S --3-> S R=-0.17 totalR=-64.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 390 S --3-> S R=-0.17 totalR=-65.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 391 S --3-> S R=-0.17 totalR=-65.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 392 S --3-> S R=-0.17 totalR=-65.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 393 S --3-> S R=-0.17 totalR=-65.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 394 S --3-> S R=-0.17 totalR=-65.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 395 S --3-> S R=-0.17 totalR=-65.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 396 S --3-> S R=-0.17 totalR=-66.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 397 S --3-> S R=-0.17 totalR=-66.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 398 S --3-> S R=-0.17 totalR=-66.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 399 S --3-> S R=-0.17 totalR=-66.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 400 S --3-> S R=-0.17 totalR=-66.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 401 S --3-> S R=-0.17 totalR=-66.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 402 S --3-> S R=-0.17 totalR=-67.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 403 S --3-> S R=-0.17 totalR=-67.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 404 S --3-> S R=-0.17 totalR=-67.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 405 S --3-> S R=-0.17 totalR=-67.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 406 S --3-> S R=-0.17 totalR=-67.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 407 S --3-> S R=-0.17 totalR=-67.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 408 S --3-> S R=-0.17 totalR=-68.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 409 S --3-> S R=-0.17 totalR=-68.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 410 S --3-> S R=-0.17 totalR=-68.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 411 S --3-> S R=-0.17 totalR=-68.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 412 S --3-> S R=-0.17 totalR=-68.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 413 S --3-> S R=-0.17 totalR=-68.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 414 S --3-> S R=-0.17 totalR=-69.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 415 S --3-> S R=-0.17 totalR=-69.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 416 S --3-> S R=-0.17 totalR=-69.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 417 S --3-> S R=-0.17 totalR=-69.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 418 S --3-> S R=-0.17 totalR=-69.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 419 S --3-> S R=-0.17 totalR=-69.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 420 S --3-> S R=-0.17 totalR=-70.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 421 S --3-> S R=-0.17 totalR=-70.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 422 S --3-> S R=-0.17 totalR=-70.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 423 S --3-> S R=-0.17 totalR=-70.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 424 S --3-> S R=-0.17 totalR=-70.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 425 S --3-> S R=-0.17 totalR=-70.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 426 S --3-> S R=-0.17 totalR=-71.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 427 S --3-> S R=-0.17 totalR=-71.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 428 S --3-> S R=-0.17 totalR=-71.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 429 S --3-> S R=-0.17 totalR=-71.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 430 S --3-> S R=-0.17 totalR=-71.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 431 S --3-> S R=-0.17 totalR=-71.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 432 S --3-> S R=-0.17 totalR=-72.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 433 S --3-> S R=-0.17 totalR=-72.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 434 S --3-> S R=-0.17 totalR=-72.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 435 S --3-> S R=-0.17 totalR=-72.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 436 S --3-> S R=-0.17 totalR=-72.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 437 S --3-> S R=-0.17 totalR=-72.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 438 S --3-> S R=-0.17 totalR=-73.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 439 S --3-> S R=-0.17 totalR=-73.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 440 S --3-> S R=-0.17 totalR=-73.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 441 S --3-> S R=-0.17 totalR=-73.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 442 S --3-> S R=-0.17 totalR=-73.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 443 S --3-> S R=-0.17 totalR=-73.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 444 S --3-> S R=-0.17 totalR=-74.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 445 S --3-> S R=-0.17 totalR=-74.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 446 S --3-> S R=-0.17 totalR=-74.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 447 S --3-> S R=-0.17 totalR=-74.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 448 S --3-> S R=-0.17 totalR=-74.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 449 S --3-> S R=-0.17 totalR=-74.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 450 S --3-> S R=-0.17 totalR=-75.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 451 S --3-> S R=-0.17 totalR=-75.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 452 S --3-> S R=-0.17 totalR=-75.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 453 S --3-> S R=-0.17 totalR=-75.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 454 S --3-> S R=-0.17 totalR=-75.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 455 S --3-> S R=-0.17 totalR=-75.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 456 S --3-> S R=-0.17 totalR=-76.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 457 S --3-> S R=-0.17 totalR=-76.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 458 S --3-> S R=-0.17 totalR=-76.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 459 S --3-> S R=-0.17 totalR=-76.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 460 S --3-> S R=-0.17 totalR=-76.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 461 S --3-> S R=-0.17 totalR=-76.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 462 S --3-> S R=-0.17 totalR=-77.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 463 S --3-> S R=-0.17 totalR=-77.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 464 S --3-> S R=-0.17 totalR=-77.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 465 S --3-> S R=-0.17 totalR=-77.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 466 S --3-> S R=-0.17 totalR=-77.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 467 S --3-> S R=-0.17 totalR=-77.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 468 S --3-> S R=-0.17 totalR=-78.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 469 S --3-> S R=-0.17 totalR=-78.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 470 S --3-> S R=-0.17 totalR=-78.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 471 S --3-> S R=-0.17 totalR=-78.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 472 S --3-> S R=-0.17 totalR=-78.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 473 S --3-> S R=-0.17 totalR=-78.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 474 S --3-> S R=-0.17 totalR=-79.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 475 S --3-> S R=-0.17 totalR=-79.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 476 S --3-> S R=-0.17 totalR=-79.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 477 S --3-> S R=-0.17 totalR=-79.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 478 S --3-> S R=-0.17 totalR=-79.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 479 S --3-> S R=-0.17 totalR=-79.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 480 S --3-> S R=-0.17 totalR=-80.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 481 S --3-> S R=-0.17 totalR=-80.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 482 S --3-> S R=-0.17 totalR=-80.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 483 S --3-> S R=-0.17 totalR=-80.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 484 S --3-> S R=-0.17 totalR=-80.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 485 S --3-> S R=-0.17 totalR=-80.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 486 S --3-> S R=-0.17 totalR=-81.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 487 S --3-> S R=-0.17 totalR=-81.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 488 S --3-> S R=-0.17 totalR=-81.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 489 S --3-> S R=-0.17 totalR=-81.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 490 S --3-> S R=-0.17 totalR=-81.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 491 S --3-> S R=-0.17 totalR=-81.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 492 S --3-> S R=-0.17 totalR=-82.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 493 S --3-> S R=-0.17 totalR=-82.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 494 S --3-> S R=-0.17 totalR=-82.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 495 S --3-> S R=-0.17 totalR=-82.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 496 S --3-> S R=-0.17 totalR=-82.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 497 S --3-> S R=-0.17 totalR=-82.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 498 S --3-> S R=-0.17 totalR=-83.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 499 S --3-> S R=-0.17 totalR=-83.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 500 S --3-> S R=-0.17 totalR=-83.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 501 S --3-> S R=-0.17 totalR=-83.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 502 S --3-> S R=-0.17 totalR=-83.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 503 S --3-> S R=-0.17 totalR=-83.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 504 S --3-> S R=-0.17 totalR=-84.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 505 S --3-> S R=-0.17 totalR=-84.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 506 S --3-> S R=-0.17 totalR=-84.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 507 S --3-> S R=-0.17 totalR=-84.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 508 S --3-> S R=-0.17 totalR=-84.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 509 S --3-> S R=-0.17 totalR=-84.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 510 S --3-> S R=-0.17 totalR=-85.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 511 S --3-> S R=-0.17 totalR=-85.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 512 S --3-> S R=-0.17 totalR=-85.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 513 S --3-> S R=-0.17 totalR=-85.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 514 S --3-> S R=-0.17 totalR=-85.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 515 S --3-> S R=-0.17 totalR=-85.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 516 S --3-> S R=-0.17 totalR=-86.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 517 S --3-> S R=-0.17 totalR=-86.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 518 S --3-> S R=-0.17 totalR=-86.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 519 S --3-> S R=-0.17 totalR=-86.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 520 S --3-> S R=-0.17 totalR=-86.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 521 S --3-> S R=-0.17 totalR=-86.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 522 S --3-> S R=-0.17 totalR=-87.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 523 S --3-> S R=-0.17 totalR=-87.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 524 S --3-> S R=-0.17 totalR=-87.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 525 S --3-> S R=-0.17 totalR=-87.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 526 S --3-> S R=-0.17 totalR=-87.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 527 S --3-> S R=-0.17 totalR=-87.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 528 S --3-> S R=-0.17 totalR=-88.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 529 S --3-> S R=-0.17 totalR=-88.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 530 S --3-> S R=-0.17 totalR=-88.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 531 S --3-> S R=-0.17 totalR=-88.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 532 S --3-> S R=-0.17 totalR=-88.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 533 S --3-> S R=-0.17 totalR=-88.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 534 S --3-> S R=-0.17 totalR=-89.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 535 S --3-> S R=-0.17 totalR=-89.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 536 S --3-> S R=-0.17 totalR=-89.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 537 S --3-> S R=-0.17 totalR=-89.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 538 S --3-> S R=-0.17 totalR=-89.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 539 S --3-> S R=-0.17 totalR=-89.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 540 S --3-> S R=-0.17 totalR=-90.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 541 S --3-> S R=-0.17 totalR=-90.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 542 S --3-> S R=-0.17 totalR=-90.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 543 S --3-> S R=-0.17 totalR=-90.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 544 S --3-> S R=-0.17 totalR=-90.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 545 S --3-> S R=-0.17 totalR=-90.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 546 S --3-> S R=-0.17 totalR=-91.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 547 S --3-> S R=-0.17 totalR=-91.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 548 S --3-> S R=-0.17 totalR=-91.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 549 S --3-> S R=-0.17 totalR=-91.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 550 S --3-> S R=-0.17 totalR=-91.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 551 S --3-> S R=-0.17 totalR=-91.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 552 S --3-> S R=-0.17 totalR=-92.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 553 S --3-> S R=-0.17 totalR=-92.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 554 S --3-> S R=-0.17 totalR=-92.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 555 S --3-> S R=-0.17 totalR=-92.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 556 S --3-> S R=-0.17 totalR=-92.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 557 S --3-> S R=-0.17 totalR=-92.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 558 S --3-> S R=-0.17 totalR=-93.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 559 S --3-> S R=-0.17 totalR=-93.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 560 S --3-> S R=-0.17 totalR=-93.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 561 S --3-> S R=-0.17 totalR=-93.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 562 S --3-> S R=-0.17 totalR=-93.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 563 S --3-> S R=-0.17 totalR=-93.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 564 S --3-> S R=-0.17 totalR=-94.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 565 S --3-> S R=-0.17 totalR=-94.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 566 S --3-> S R=-0.17 totalR=-94.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 567 S --3-> S R=-0.17 totalR=-94.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 568 S --3-> S R=-0.17 totalR=-94.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 569 S --3-> S R=-0.17 totalR=-94.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 570 S --3-> S R=-0.17 totalR=-95.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 571 S --3-> S R=-0.17 totalR=-95.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 572 S --3-> S R=-0.17 totalR=-95.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 573 S --3-> S R=-0.17 totalR=-95.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 574 S --3-> S R=-0.17 totalR=-95.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 575 S --3-> S R=-0.17 totalR=-95.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 576 S --3-> S R=-0.17 totalR=-96.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 577 S --3-> S R=-0.17 totalR=-96.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 578 S --3-> S R=-0.17 totalR=-96.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 579 S --3-> S R=-0.17 totalR=-96.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 580 S --3-> S R=-0.17 totalR=-96.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 581 S --3-> S R=-0.17 totalR=-96.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 582 S --3-> S R=-0.17 totalR=-97.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 583 S --3-> S R=-0.17 totalR=-97.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 584 S --3-> S R=-0.17 totalR=-97.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 585 S --3-> S R=-0.17 totalR=-97.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 586 S --3-> S R=-0.17 totalR=-97.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 587 S --3-> S R=-0.17 totalR=-97.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 588 S --3-> S R=-0.17 totalR=-98.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 589 S --3-> S R=-0.17 totalR=-98.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 590 S --3-> S R=-0.17 totalR=-98.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 591 S --3-> S R=-0.17 totalR=-98.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 592 S --3-> S R=-0.17 totalR=-98.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 593 S --3-> S R=-0.17 totalR=-98.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 594 S --3-> S R=-0.17 totalR=-99.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 595 S --3-> S R=-0.17 totalR=-99.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 596 S --3-> S R=-0.17 totalR=-99.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 597 S --3-> S R=-0.17 totalR=-99.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 598 S --3-> S R=-0.17 totalR=-99.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 599 S --3-> S R=-0.17 totalR=-99.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 600 S --3-> S R=-0.17 totalR=-100.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 601 S --3-> S R=-0.17 totalR=-100.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 602 S --3-> S R=-0.17 totalR=-100.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 603 S --3-> S R=-0.17 totalR=-100.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 604 S --3-> S R=-0.17 totalR=-100.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 605 S --3-> S R=-0.17 totalR=-100.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 606 S --3-> S R=-0.17 totalR=-101.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 607 S --3-> S R=-0.17 totalR=-101.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 608 S --3-> S R=-0.17 totalR=-101.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 609 S --3-> S R=-0.17 totalR=-101.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 610 S --3-> S R=-0.17 totalR=-101.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 611 S --3-> S R=-0.17 totalR=-101.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 612 S --3-> S R=-0.17 totalR=-102.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 613 S --3-> S R=-0.17 totalR=-102.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 614 S --3-> S R=-0.17 totalR=-102.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 615 S --3-> S R=-0.17 totalR=-102.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 616 S --3-> S R=-0.17 totalR=-102.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 617 S --3-> S R=-0.17 totalR=-102.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 618 S --3-> S R=-0.17 totalR=-103.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 619 S --3-> S R=-0.17 totalR=-103.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 620 S --3-> S R=-0.17 totalR=-103.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 621 S --3-> S R=-0.17 totalR=-103.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 622 S --3-> S R=-0.17 totalR=-103.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 623 S --3-> S R=-0.17 totalR=-103.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 624 S --3-> S R=-0.17 totalR=-104.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 625 S --3-> S R=-0.17 totalR=-104.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 626 S --3-> S R=-0.17 totalR=-104.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 627 S --3-> S R=-0.17 totalR=-104.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 628 S --3-> S R=-0.17 totalR=-104.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 629 S --3-> S R=-0.17 totalR=-104.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 630 S --3-> S R=-0.17 totalR=-105.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 631 S --3-> S R=-0.17 totalR=-105.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 632 S --3-> S R=-0.17 totalR=-105.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 633 S --3-> S R=-0.17 totalR=-105.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 634 S --3-> S R=-0.17 totalR=-105.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 635 S --3-> S R=-0.17 totalR=-105.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 636 S --3-> S R=-0.17 totalR=-106.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 637 S --3-> S R=-0.17 totalR=-106.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 638 S --3-> S R=-0.17 totalR=-106.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 639 S --3-> S R=-0.17 totalR=-106.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 640 S --3-> S R=-0.17 totalR=-106.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 641 S --3-> S R=-0.17 totalR=-106.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 642 S --3-> S R=-0.17 totalR=-107.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 643 S --3-> S R=-0.17 totalR=-107.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 644 S --3-> S R=-0.17 totalR=-107.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 645 S --3-> S R=-0.17 totalR=-107.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 646 S --3-> S R=-0.17 totalR=-107.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 647 S --3-> S R=-0.17 totalR=-107.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 648 S --3-> S R=-0.17 totalR=-108.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 649 S --3-> S R=-0.17 totalR=-108.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 650 S --3-> S R=-0.17 totalR=-108.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 651 S --3-> S R=-0.17 totalR=-108.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 652 S --3-> S R=-0.17 totalR=-108.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 653 S --3-> S R=-0.17 totalR=-108.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 654 S --3-> S R=-0.17 totalR=-109.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 655 S --3-> S R=-0.17 totalR=-109.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 656 S --3-> S R=-0.17 totalR=-109.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 657 S --3-> S R=-0.17 totalR=-109.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 658 S --3-> S R=-0.17 totalR=-109.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 659 S --3-> S R=-0.17 totalR=-109.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 660 S --3-> S R=-0.17 totalR=-110.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 661 S --3-> S R=-0.17 totalR=-110.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 662 S --3-> S R=-0.17 totalR=-110.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 663 S --3-> S R=-0.17 totalR=-110.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 664 S --3-> S R=-0.17 totalR=-110.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 665 S --3-> S R=-0.17 totalR=-110.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 666 S --3-> S R=-0.17 totalR=-111.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 667 S --3-> S R=-0.17 totalR=-111.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 668 S --3-> S R=-0.17 totalR=-111.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 669 S --3-> S R=-0.17 totalR=-111.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 670 S --3-> S R=-0.17 totalR=-111.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 671 S --3-> S R=-0.17 totalR=-111.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 672 S --3-> S R=-0.17 totalR=-112.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 673 S --3-> S R=-0.17 totalR=-112.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 674 S --3-> S R=-0.17 totalR=-112.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 675 S --3-> S R=-0.17 totalR=-112.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 676 S --3-> S R=-0.17 totalR=-112.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 677 S --3-> S R=-0.17 totalR=-112.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 678 S --3-> S R=-0.17 totalR=-113.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 679 S --3-> S R=-0.17 totalR=-113.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 680 S --3-> S R=-0.17 totalR=-113.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 681 S --3-> S R=-0.17 totalR=-113.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 682 S --3-> S R=-0.17 totalR=-113.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 683 S --3-> S R=-0.17 totalR=-113.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 684 S --3-> S R=-0.17 totalR=-114.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 685 S --3-> S R=-0.17 totalR=-114.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 686 S --3-> S R=-0.17 totalR=-114.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 687 S --3-> S R=-0.17 totalR=-114.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 688 S --3-> S R=-0.17 totalR=-114.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 689 S --3-> S R=-0.17 totalR=-114.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 690 S --3-> S R=-0.17 totalR=-115.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 691 S --3-> S R=-0.17 totalR=-115.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 692 S --3-> S R=-0.17 totalR=-115.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 693 S --3-> S R=-0.17 totalR=-115.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 694 S --3-> S R=-0.17 totalR=-115.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 695 S --3-> S R=-0.17 totalR=-115.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 696 S --3-> S R=-0.17 totalR=-116.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 697 S --3-> S R=-0.17 totalR=-116.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 698 S --3-> S R=-0.17 totalR=-116.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 699 S --3-> S R=-0.17 totalR=-116.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 700 S --3-> S R=-0.17 totalR=-116.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 701 S --3-> S R=-0.17 totalR=-116.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 702 S --3-> S R=-0.17 totalR=-117.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 703 S --3-> S R=-0.17 totalR=-117.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 704 S --3-> S R=-0.17 totalR=-117.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 705 S --3-> S R=-0.17 totalR=-117.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 706 S --3-> S R=-0.17 totalR=-117.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 707 S --3-> S R=-0.17 totalR=-117.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 708 S --3-> S R=-0.17 totalR=-118.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 709 S --3-> S R=-0.17 totalR=-118.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 710 S --3-> S R=-0.17 totalR=-118.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 711 S --3-> S R=-0.17 totalR=-118.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 712 S --3-> S R=-0.17 totalR=-118.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 713 S --3-> S R=-0.17 totalR=-118.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 714 S --3-> S R=-0.17 totalR=-119.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 715 S --3-> S R=-0.17 totalR=-119.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 716 S --3-> S R=-0.17 totalR=-119.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 717 S --3-> S R=-0.17 totalR=-119.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 718 S --3-> S R=-0.17 totalR=-119.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 719 S --3-> S R=-0.17 totalR=-119.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 720 S --3-> S R=-0.17 totalR=-120.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 721 S --3-> S R=-0.17 totalR=-120.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 722 S --3-> S R=-0.17 totalR=-120.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 723 S --3-> S R=-0.17 totalR=-120.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 724 S --3-> S R=-0.17 totalR=-120.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 725 S --3-> S R=-0.17 totalR=-120.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 726 S --3-> S R=-0.17 totalR=-121.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 727 S --3-> S R=-0.17 totalR=-121.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 728 S --3-> S R=-0.17 totalR=-121.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 729 S --3-> S R=-0.17 totalR=-121.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 730 S --3-> S R=-0.17 totalR=-121.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 731 S --3-> S R=-0.17 totalR=-121.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 732 S --3-> S R=-0.17 totalR=-122.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 733 S --3-> S R=-0.17 totalR=-122.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 734 S --3-> S R=-0.17 totalR=-122.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 735 S --3-> S R=-0.17 totalR=-122.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 736 S --3-> S R=-0.17 totalR=-122.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 737 S --3-> S R=-0.17 totalR=-122.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 738 S --3-> S R=-0.17 totalR=-123.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 739 S --3-> S R=-0.17 totalR=-123.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 740 S --3-> S R=-0.17 totalR=-123.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 741 S --3-> S R=-0.17 totalR=-123.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 742 S --3-> S R=-0.17 totalR=-123.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 743 S --3-> S R=-0.17 totalR=-123.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 744 S --3-> S R=-0.17 totalR=-124.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 745 S --3-> S R=-0.17 totalR=-124.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 746 S --3-> S R=-0.17 totalR=-124.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 747 S --3-> S R=-0.17 totalR=-124.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 748 S --3-> S R=-0.17 totalR=-124.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 749 S --3-> S R=-0.17 totalR=-124.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 750 S --3-> S R=-0.17 totalR=-125.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 751 S --3-> S R=-0.17 totalR=-125.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 752 S --3-> S R=-0.17 totalR=-125.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 753 S --3-> S R=-0.17 totalR=-125.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 754 S --3-> S R=-0.17 totalR=-125.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 755 S --3-> S R=-0.17 totalR=-125.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 756 S --3-> S R=-0.17 totalR=-126.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 757 S --3-> S R=-0.17 totalR=-126.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 758 S --3-> S R=-0.17 totalR=-126.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 759 S --3-> S R=-0.17 totalR=-126.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 760 S --3-> S R=-0.17 totalR=-126.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 761 S --3-> S R=-0.17 totalR=-126.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 762 S --3-> S R=-0.17 totalR=-127.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 763 S --3-> S R=-0.17 totalR=-127.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 764 S --3-> S R=-0.17 totalR=-127.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 765 S --3-> S R=-0.17 totalR=-127.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 766 S --3-> S R=-0.17 totalR=-127.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 767 S --3-> S R=-0.17 totalR=-127.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 768 S --3-> S R=-0.17 totalR=-128.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 769 S --3-> S R=-0.17 totalR=-128.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 770 S --3-> S R=-0.17 totalR=-128.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 771 S --3-> S R=-0.17 totalR=-128.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 772 S --3-> S R=-0.17 totalR=-128.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 773 S --3-> S R=-0.17 totalR=-128.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 774 S --3-> S R=-0.17 totalR=-129.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 775 S --3-> S R=-0.17 totalR=-129.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 776 S --3-> S R=-0.17 totalR=-129.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 777 S --3-> S R=-0.17 totalR=-129.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 778 S --3-> S R=-0.17 totalR=-129.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 779 S --3-> S R=-0.17 totalR=-129.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 780 S --3-> S R=-0.17 totalR=-130.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 781 S --3-> S R=-0.17 totalR=-130.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 782 S --3-> S R=-0.17 totalR=-130.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 783 S --3-> S R=-0.17 totalR=-130.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 784 S --3-> S R=-0.17 totalR=-130.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 785 S --3-> S R=-0.17 totalR=-130.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 786 S --3-> S R=-0.17 totalR=-131.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 787 S --3-> S R=-0.17 totalR=-131.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 788 S --3-> S R=-0.17 totalR=-131.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 789 S --3-> S R=-0.17 totalR=-131.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 790 S --3-> S R=-0.17 totalR=-131.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 791 S --3-> S R=-0.17 totalR=-131.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 792 S --3-> S R=-0.17 totalR=-132.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 793 S --3-> S R=-0.17 totalR=-132.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 794 S --3-> S R=-0.17 totalR=-132.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 795 S --3-> S R=-0.17 totalR=-132.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 796 S --3-> S R=-0.17 totalR=-132.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 797 S --3-> S R=-0.17 totalR=-132.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 798 S --3-> S R=-0.17 totalR=-133.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 799 S --3-> S R=-0.17 totalR=-133.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 800 S --3-> S R=-0.17 totalR=-133.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 801 S --3-> S R=-0.17 totalR=-133.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 802 S --3-> S R=-0.17 totalR=-133.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 803 S --3-> S R=-0.17 totalR=-133.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 804 S --3-> S R=-0.17 totalR=-134.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 805 S --3-> S R=-0.17 totalR=-134.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 806 S --3-> S R=-0.17 totalR=-134.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 807 S --3-> S R=-0.17 totalR=-134.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 808 S --3-> S R=-0.17 totalR=-134.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 809 S --3-> S R=-0.17 totalR=-134.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 810 S --3-> S R=-0.17 totalR=-135.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 811 S --3-> S R=-0.17 totalR=-135.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 812 S --3-> S R=-0.17 totalR=-135.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 813 S --3-> S R=-0.17 totalR=-135.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 814 S --3-> S R=-0.17 totalR=-135.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 815 S --3-> S R=-0.17 totalR=-135.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 816 S --3-> S R=-0.17 totalR=-136.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 817 S --3-> S R=-0.17 totalR=-136.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 818 S --3-> S R=-0.17 totalR=-136.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 819 S --3-> S R=-0.17 totalR=-136.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 820 S --3-> S R=-0.17 totalR=-136.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 821 S --3-> S R=-0.17 totalR=-136.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 822 S --3-> S R=-0.17 totalR=-137.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 823 S --3-> S R=-0.17 totalR=-137.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 824 S --3-> S R=-0.17 totalR=-137.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 825 S --3-> S R=-0.17 totalR=-137.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 826 S --3-> S R=-0.17 totalR=-137.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 827 S --3-> S R=-0.17 totalR=-137.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 828 S --3-> S R=-0.17 totalR=-138.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 829 S --3-> S R=-0.17 totalR=-138.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 830 S --3-> S R=-0.17 totalR=-138.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 831 S --3-> S R=-0.17 totalR=-138.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 832 S --3-> S R=-0.17 totalR=-138.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 833 S --3-> S R=-0.17 totalR=-138.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 834 S --3-> S R=-0.17 totalR=-139.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 835 S --3-> S R=-0.17 totalR=-139.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 836 S --3-> S R=-0.17 totalR=-139.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 837 S --3-> S R=-0.17 totalR=-139.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 838 S --3-> S R=-0.17 totalR=-139.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 839 S --3-> S R=-0.17 totalR=-139.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 840 S --3-> S R=-0.17 totalR=-140.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 841 S --3-> S R=-0.17 totalR=-140.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 842 S --3-> S R=-0.17 totalR=-140.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 843 S --3-> S R=-0.17 totalR=-140.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 844 S --3-> S R=-0.17 totalR=-140.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 845 S --3-> S R=-0.17 totalR=-140.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 846 S --3-> S R=-0.17 totalR=-141.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 847 S --3-> S R=-0.17 totalR=-141.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 848 S --3-> S R=-0.17 totalR=-141.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 849 S --3-> S R=-0.17 totalR=-141.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 850 S --3-> S R=-0.17 totalR=-141.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 851 S --3-> S R=-0.17 totalR=-141.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 852 S --3-> S R=-0.17 totalR=-142.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 853 S --3-> S R=-0.17 totalR=-142.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 854 S --3-> S R=-0.17 totalR=-142.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 855 S --3-> S R=-0.17 totalR=-142.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 856 S --3-> S R=-0.17 totalR=-142.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 857 S --3-> S R=-0.17 totalR=-142.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 858 S --3-> S R=-0.17 totalR=-143.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 859 S --3-> S R=-0.17 totalR=-143.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 860 S --3-> S R=-0.17 totalR=-143.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 861 S --3-> S R=-0.17 totalR=-143.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 862 S --3-> S R=-0.17 totalR=-143.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 863 S --3-> S R=-0.17 totalR=-143.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 864 S --3-> S R=-0.17 totalR=-144.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 865 S --3-> S R=-0.17 totalR=-144.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 866 S --3-> S R=-0.17 totalR=-144.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 867 S --3-> S R=-0.17 totalR=-144.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 868 S --3-> S R=-0.17 totalR=-144.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 869 S --3-> S R=-0.17 totalR=-144.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 870 S --3-> S R=-0.17 totalR=-145.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 871 S --3-> S R=-0.17 totalR=-145.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 872 S --3-> S R=-0.17 totalR=-145.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 873 S --3-> S R=-0.17 totalR=-145.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 874 S --3-> S R=-0.17 totalR=-145.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 875 S --3-> S R=-0.17 totalR=-145.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 876 S --3-> S R=-0.17 totalR=-146.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 877 S --3-> S R=-0.17 totalR=-146.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 878 S --3-> S R=-0.17 totalR=-146.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 879 S --3-> S R=-0.17 totalR=-146.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 880 S --3-> S R=-0.17 totalR=-146.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 881 S --3-> S R=-0.17 totalR=-146.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 882 S --3-> S R=-0.17 totalR=-147.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 883 S --3-> S R=-0.17 totalR=-147.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 884 S --3-> S R=-0.17 totalR=-147.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 885 S --3-> S R=-0.17 totalR=-147.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 886 S --3-> S R=-0.17 totalR=-147.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 887 S --3-> S R=-0.17 totalR=-147.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 888 S --3-> S R=-0.17 totalR=-148.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 889 S --3-> S R=-0.17 totalR=-148.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 890 S --3-> S R=-0.17 totalR=-148.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 891 S --3-> S R=-0.17 totalR=-148.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 892 S --3-> S R=-0.17 totalR=-148.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 893 S --3-> S R=-0.17 totalR=-148.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 894 S --3-> S R=-0.17 totalR=-149.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 895 S --3-> S R=-0.17 totalR=-149.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 896 S --3-> S R=-0.17 totalR=-149.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 897 S --3-> S R=-0.17 totalR=-149.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 898 S --3-> S R=-0.17 totalR=-149.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 899 S --3-> S R=-0.17 totalR=-149.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 900 S --3-> S R=-0.17 totalR=-150.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 901 S --3-> S R=-0.17 totalR=-150.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 902 S --3-> S R=-0.17 totalR=-150.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 903 S --3-> S R=-0.17 totalR=-150.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 904 S --3-> S R=-0.17 totalR=-150.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 905 S --3-> S R=-0.17 totalR=-150.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 906 S --3-> S R=-0.17 totalR=-151.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 907 S --3-> S R=-0.17 totalR=-151.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 908 S --3-> S R=-0.17 totalR=-151.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 909 S --3-> S R=-0.17 totalR=-151.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 910 S --3-> S R=-0.17 totalR=-151.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 911 S --3-> S R=-0.17 totalR=-151.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 912 S --3-> S R=-0.17 totalR=-152.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 913 S --3-> S R=-0.17 totalR=-152.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 914 S --3-> S R=-0.17 totalR=-152.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 915 S --3-> S R=-0.17 totalR=-152.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 916 S --3-> S R=-0.17 totalR=-152.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 917 S --3-> S R=-0.17 totalR=-152.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 918 S --3-> S R=-0.17 totalR=-153.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 919 S --3-> S R=-0.17 totalR=-153.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 920 S --3-> S R=-0.17 totalR=-153.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 921 S --3-> S R=-0.17 totalR=-153.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 922 S --3-> S R=-0.17 totalR=-153.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 923 S --3-> S R=-0.17 totalR=-153.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 924 S --3-> S R=-0.17 totalR=-154.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 925 S --3-> S R=-0.17 totalR=-154.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 926 S --3-> S R=-0.17 totalR=-154.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 927 S --3-> S R=-0.17 totalR=-154.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 928 S --3-> S R=-0.17 totalR=-154.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 929 S --3-> S R=-0.17 totalR=-154.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 930 S --3-> S R=-0.17 totalR=-155.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 931 S --3-> S R=-0.17 totalR=-155.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 932 S --3-> S R=-0.17 totalR=-155.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 933 S --3-> S R=-0.17 totalR=-155.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 934 S --3-> S R=-0.17 totalR=-155.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 935 S --3-> S R=-0.17 totalR=-155.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 936 S --3-> S R=-0.17 totalR=-156.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 937 S --3-> S R=-0.17 totalR=-156.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 938 S --3-> S R=-0.17 totalR=-156.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 939 S --3-> S R=-0.17 totalR=-156.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 940 S --3-> S R=-0.17 totalR=-156.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 941 S --3-> S R=-0.17 totalR=-156.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 942 S --3-> S R=-0.17 totalR=-157.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 943 S --3-> S R=-0.17 totalR=-157.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 944 S --3-> S R=-0.17 totalR=-157.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 945 S --3-> S R=-0.17 totalR=-157.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 946 S --3-> S R=-0.17 totalR=-157.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 947 S --3-> S R=-0.17 totalR=-157.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 948 S --3-> S R=-0.17 totalR=-158.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 949 S --3-> S R=-0.17 totalR=-158.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 950 S --3-> S R=-0.17 totalR=-158.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 951 S --3-> S R=-0.17 totalR=-158.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 952 S --3-> S R=-0.17 totalR=-158.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 953 S --3-> S R=-0.17 totalR=-158.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 954 S --3-> S R=-0.17 totalR=-159.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 955 S --3-> S R=-0.17 totalR=-159.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 956 S --3-> S R=-0.17 totalR=-159.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 957 S --3-> S R=-0.17 totalR=-159.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 958 S --3-> S R=-0.17 totalR=-159.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 959 S --3-> S R=-0.17 totalR=-159.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 960 S --3-> S R=-0.17 totalR=-160.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 961 S --3-> S R=-0.17 totalR=-160.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 962 S --3-> S R=-0.17 totalR=-160.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 963 S --3-> S R=-0.17 totalR=-160.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 964 S --3-> S R=-0.17 totalR=-160.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 965 S --3-> S R=-0.17 totalR=-160.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 966 S --3-> S R=-0.17 totalR=-161.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 967 S --3-> S R=-0.17 totalR=-161.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 968 S --3-> S R=-0.17 totalR=-161.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 969 S --3-> S R=-0.17 totalR=-161.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 970 S --3-> S R=-0.17 totalR=-161.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 971 S --3-> S R=-0.17 totalR=-161.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 972 S --3-> S R=-0.17 totalR=-162.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 973 S --3-> S R=-0.17 totalR=-162.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 974 S --3-> S R=-0.17 totalR=-162.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 975 S --3-> S R=-0.17 totalR=-162.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 976 S --3-> S R=-0.17 totalR=-162.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 977 S --3-> S R=-0.17 totalR=-162.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 978 S --3-> S R=-0.17 totalR=-163.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 979 S --3-> S R=-0.17 totalR=-163.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 980 S --3-> S R=-0.17 totalR=-163.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 981 S --3-> S R=-0.17 totalR=-163.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 982 S --3-> S R=-0.17 totalR=-163.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 983 S --3-> S R=-0.17 totalR=-163.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 984 S --3-> S R=-0.17 totalR=-164.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 985 S --3-> S R=-0.17 totalR=-164.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 986 S --3-> S R=-0.17 totalR=-164.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 987 S --3-> S R=-0.17 totalR=-164.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 988 S --3-> S R=-0.17 totalR=-164.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 989 S --3-> S R=-0.17 totalR=-164.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 990 S --3-> S R=-0.17 totalR=-165.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 991 S --3-> S R=-0.17 totalR=-165.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 992 S --3-> S R=-0.17 totalR=-165.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 993 S --3-> S R=-0.17 totalR=-165.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 994 S --3-> S R=-0.17 totalR=-165.67 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 995 S --3-> S R=-0.17 totalR=-165.83 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 996 S --3-> S R=-0.17 totalR=-166.00 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 997 S --3-> S R=-0.17 totalR=-166.17 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 998 S --3-> S R=-0.17 totalR=-166.33 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 999 S --3-> S R=-0.17 totalR=-166.50 cost=1000 customerR= 0 optimum=6000
Episode: 0 Step: 1000 S --3-> S R=-0.17 totalR=-166.67 cost=1000 customerR= 0 optimum=6000
In [0]:
%time baseline.find_optimum()
Scaled reward: 0.7583333333333333
Perfect path ['S', 'B', 'C', 'M', 'N', 'O', 'G', 'F', 'D', 'F', 'E', 'A', 'B', 'S']
CPU times: user 78.5 ms, sys: 542 µs, total: 79 ms
Wall time: 80.4 ms
Out[0]:
{'cost': 1450,
'path': ['S',
'B',
'C',
'M',
'N',
'O',
'G',
'F',
'D',
'F',
'E',
'A',
'B',
'S'],
'position': 'S',
'reward': 6000,
'rewards': {'A': 0,
'B': 0,
'C': 0,
'D': 0,
'E': 0,
'F': 0,
'G': 0,
'H': 0,
'K': 0,
'L': 0,
'M': 0,
'N': 0,
'O': 0,
'S': 0},
'scaled_reward': 0.7583333333333333}
In [0]:
baseline = Baseline(env)
perfect_score_mean, perfect_score_std, test_score_mean, test_score_std = baseline.score(model, sample_runs=100)
In [0]:
# perfect scores
perfect_score_mean, perfect_score_std
Out[0]:
(0.7325833333333331, 0.034117749535005)
In [0]:
# test scores for our model
test_score_mean, test_score_std
Out[0]:
(-166.6666666666659, 2.842170943040401e-14)
In [0]:
Content source: DJCordhose/ai
Similar notebooks: