Berater Environment v12

Changes from v12

  • Work In Progress:
    • alternative model trained using A2C
      • does not train, yet
      • need to explore parameters to learn

next steps

  • port to A2C in TF 2.0:

Installation (required for colab)


In [0]:
!pip install git+https://github.com/openai/baselines >/dev/null
!pip install gym >/dev/null

Environment


In [0]:
import numpy as np
import random

import gym
from gym.utils import seeding
from gym import spaces

def state_name_to_int(state):
    state_name_map = {
        'S': 0,
        'A': 1,
        'B': 2,
        'C': 3,
        'D': 4,
        'E': 5,
        'F': 6,
        'G': 7,
        'H': 8,
        'K': 9,
        'L': 10,
        'M': 11,
        'N': 12,
        'O': 13
    }
    return state_name_map[state]

def int_to_state_name(state_as_int):
    state_map = {
        0: 'S',
        1: 'A',
        2: 'B',
        3: 'C',
        4: 'D',
        5: 'E',
        6: 'F',
        7: 'G',
        8: 'H',
        9: 'K',
        10: 'L',
        11: 'M',
        12: 'N',
        13: 'O'
    }
    return state_map[state_as_int]
    
class BeraterEnv(gym.Env):
    """
    The Berater Problem

    Actions: 
    There are 4 discrete deterministic actions, each choosing one direction
    """
    metadata = {'render.modes': ['ansi']}
    
    showStep = False
    showDone = True
    envEpisodeModulo = 100

    def __init__(self):
#         self.map = {
#             'S': [('A', 100), ('B', 400), ('C', 200 )],
#             'A': [('B', 250), ('C', 400), ('S', 100 )],
#             'B': [('A', 250), ('C', 250), ('S', 400 )],
#             'C': [('A', 400), ('B', 250), ('S', 200 )]
#         }
        self.map = {
            'S': [('A', 300), ('B', 100), ('C', 200 )],
            'A': [('S', 300), ('B', 100), ('E', 100 ), ('D', 100 )],
            'B': [('S', 100), ('A', 100), ('C', 50 ), ('K', 200 )],
            'C': [('S', 200), ('B', 50), ('M', 100 ), ('L', 200 )],
            'D': [('A', 100), ('F', 50)],
            'E': [('A', 100), ('F', 100), ('H', 100)],
            'F': [('D', 50), ('E', 100), ('G', 200)],
            'G': [('F', 200), ('O', 300)],
            'H': [('E', 100), ('K', 300)],
            'K': [('B', 200), ('H', 300)],
            'L': [('C', 200), ('M', 50)],
            'M': [('C', 100), ('L', 50), ('N', 100)],
            'N': [('M', 100), ('O', 100)],
            'O': [('N', 100), ('G', 300)]
        }
        max_paths = 4
        self.action_space = spaces.Discrete(max_paths)
      
        positions = len(self.map)
        # observations: position, reward of all 4 local paths, rest reward of all locations
        # non existing path is -1000 and no position change
        # look at what #getObservation returns if you are confused
        low = np.append(np.append([0], np.full(max_paths, -1000)), np.full(positions, 0))
        high = np.append(np.append([positions - 1], np.full(max_paths, 1000)), np.full(positions, 1000))
        self.observation_space = spaces.Box(low=low,
                                             high=high,
                                             dtype=np.float32)
        self.reward_range = (-1, 1)

        self.totalReward = 0
        self.stepCount = 0
        self.isDone = False

        self.envReward = 0
        self.envEpisodeCount = 0
        self.envStepCount = 0

        self.reset()
        self.optimum = self.calculate_customers_reward()

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def iterate_path(self, state, action):
        paths = self.map[state]
        if action < len(paths):
          return paths[action]
        else:
          # sorry, no such action, stay where you are and pay a high penalty
          return (state, 1000)
      
    def step(self, action):
        destination, cost = self.iterate_path(self.state, action)
        lastState = self.state
        customerReward = self.customer_reward[destination]
        reward = (customerReward - cost) / self.optimum

        self.state = destination
        self.customer_visited(destination)
        done = destination == 'S' and self.all_customers_visited()

        stateAsInt = state_name_to_int(self.state)
        self.totalReward += reward
        self.stepCount += 1
        self.envReward += reward
        self.envStepCount += 1

        if self.showStep:
            print( "Episode: " + ("%4.0f  " % self.envEpisodeCount) + 
                   " Step: " + ("%4.0f  " % self.stepCount) + 
                   lastState + ' --' + str(action) + '-> ' + self.state + 
                   ' R=' + ("% 2.2f" % reward) + ' totalR=' + ("% 3.2f" % self.totalReward) + 
                   ' cost=' + ("%4.0f" % cost) + ' customerR=' + ("%4.0f" % customerReward) + ' optimum=' + ("%4.0f" % self.optimum)      
                   )

        if done and not self.isDone:
            self.envEpisodeCount += 1
            if BeraterEnv.showDone:
                episodes = BeraterEnv.envEpisodeModulo
                if (self.envEpisodeCount % BeraterEnv.envEpisodeModulo != 0):
                    episodes = self.envEpisodeCount % BeraterEnv.envEpisodeModulo
                print( "Done: " + 
                        ("episodes=%6.0f  " % self.envEpisodeCount) + 
                        ("avgSteps=%6.2f  " % (self.envStepCount/episodes)) + 
                        ("avgTotalReward=% 3.2f" % (self.envReward/episodes) )
                        )
                if (self.envEpisodeCount%BeraterEnv.envEpisodeModulo) == 0:
                    self.envReward = 0
                    self.envStepCount = 0

        self.isDone = done
        observation = self.getObservation(stateAsInt)
        info = {"from": self.state, "to": destination}

        return observation, reward, done, info

    def getObservation(self, position):
        result = np.array([ position, 
                               self.getPathObservation(position, 0),
                               self.getPathObservation(position, 1),
                               self.getPathObservation(position, 2),
                               self.getPathObservation(position, 3)
                              ],
                             dtype=np.float32)
        all_rest_rewards = list(self.customer_reward.values())
        result = np.append(result, all_rest_rewards)
        return result

    def getPathObservation(self, position, path):
        source = int_to_state_name(position)
        paths = self.map[self.state]
        if path < len(paths):
          target, cost = paths[path]
          reward = self.customer_reward[target] 
          result = reward - cost
        else:
          result = -1000

        return result

    def customer_visited(self, customer):
        self.customer_reward[customer] = 0

    def all_customers_visited(self):
        return self.calculate_customers_reward() == 0

    def calculate_customers_reward(self):
        sum = 0
        for value in self.customer_reward.values():
            sum += value
        return sum

      
    def modulate_reward(self):
      number_of_customers = len(self.map) - 1
      number_per_consultant = int(number_of_customers/2)
#       number_per_consultant = int(number_of_customers/1.5)
      self.customer_reward = {
          'S': 0
      }
      for customer_nr in range(1, number_of_customers + 1):
        self.customer_reward[int_to_state_name(customer_nr)] = 0
      
      # every consultant only visits a few random customers
      samples = random.sample(range(1, number_of_customers + 1), k=number_per_consultant)
      key_list = list(self.customer_reward.keys())
      for sample in samples:
        self.customer_reward[key_list[sample]] = 1000

      
    def reset(self):
        self.totalReward = 0
        self.stepCount = 0
        self.isDone = False

        self.modulate_reward()
        self.state = 'S'
        return self.getObservation(state_name_to_int(self.state))
      
    def render(self):
      print(self.customer_reward)

In [0]:
env = BeraterEnv()
print(env.reset())
print(env.customer_reward)


[    0.  -300.   900.  -200. -1000.     0.     0.  1000.     0.  1000.
     0.     0.  1000.     0.     0.  1000.  1000.  1000.     0.]
{'S': 0, 'A': 0, 'B': 1000, 'C': 0, 'D': 1000, 'E': 0, 'F': 0, 'G': 1000, 'H': 0, 'K': 0, 'L': 1000, 'M': 1000, 'N': 1000, 'O': 0}

Try out Environment


In [0]:
BeraterEnv.showStep = True
BeraterEnv.showDone = True

env = BeraterEnv()
print(env)
observation = env.reset()
print(observation)

for t in range(1000):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    if done:
        print("Episode finished after {} timesteps".format(t+1))
        break
env.close()
print(observation)


<BeraterEnv instance>
[    0.   700.   900.   800. -1000.     0.  1000.  1000.  1000.     0.
     0.     0.     0.     0.     0.  1000.  1000.  1000.     0.]
Episode:    0   Step:    1  S --0-> A R= 0.12 totalR= 0.12 cost= 300 customerR=1000 optimum=6000
Episode:    0   Step:    2  A --3-> D R=-0.02 totalR= 0.10 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:    3  D --1-> F R=-0.01 totalR= 0.09 cost=  50 customerR=   0 optimum=6000
Episode:    0   Step:    4  F --0-> D R=-0.01 totalR= 0.08 cost=  50 customerR=   0 optimum=6000
Episode:    0   Step:    5  D --3-> D R=-0.17 totalR=-0.08 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:    6  D --3-> D R=-0.17 totalR=-0.25 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:    7  D --3-> D R=-0.17 totalR=-0.42 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:    8  D --3-> D R=-0.17 totalR=-0.58 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:    9  D --1-> F R=-0.01 totalR=-0.59 cost=  50 customerR=   0 optimum=6000
Episode:    0   Step:   10  F --3-> F R=-0.17 totalR=-0.76 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   11  F --1-> E R=-0.02 totalR=-0.77 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   12  E --2-> H R=-0.02 totalR=-0.79 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   13  H --0-> E R=-0.02 totalR=-0.81 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   14  E --3-> E R=-0.17 totalR=-0.97 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   15  E --2-> H R=-0.02 totalR=-0.99 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   16  H --0-> E R=-0.02 totalR=-1.01 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   17  E --0-> A R=-0.02 totalR=-1.02 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   18  A --0-> S R=-0.05 totalR=-1.07 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:   19  S --2-> C R= 0.13 totalR=-0.94 cost= 200 customerR=1000 optimum=6000
Episode:    0   Step:   20  C --1-> B R= 0.16 totalR=-0.78 cost=  50 customerR=1000 optimum=6000
Episode:    0   Step:   21  B --2-> C R=-0.01 totalR=-0.79 cost=  50 customerR=   0 optimum=6000
Episode:    0   Step:   22  C --3-> L R= 0.13 totalR=-0.66 cost= 200 customerR=1000 optimum=6000
Episode:    0   Step:   23  L --3-> L R=-0.17 totalR=-0.82 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   24  L --2-> L R=-0.17 totalR=-0.99 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   25  L --0-> C R=-0.03 totalR=-1.02 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:   26  C --1-> B R=-0.01 totalR=-1.03 cost=  50 customerR=   0 optimum=6000
Episode:    0   Step:   27  B --1-> A R=-0.02 totalR=-1.05 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   28  A --1-> B R=-0.02 totalR=-1.07 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   29  B --1-> A R=-0.02 totalR=-1.08 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   30  A --0-> S R=-0.05 totalR=-1.13 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:   31  S --1-> B R=-0.02 totalR=-1.15 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   32  B --0-> S R=-0.02 totalR=-1.17 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   33  S --3-> S R=-0.17 totalR=-1.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   34  S --0-> A R=-0.05 totalR=-1.38 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:   35  A --3-> D R=-0.02 totalR=-1.40 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   36  D --1-> F R=-0.01 totalR=-1.41 cost=  50 customerR=   0 optimum=6000
Episode:    0   Step:   37  F --2-> G R=-0.03 totalR=-1.44 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:   38  G --3-> G R=-0.17 totalR=-1.61 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   39  G --3-> G R=-0.17 totalR=-1.77 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   40  G --0-> F R=-0.03 totalR=-1.81 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:   41  F --2-> G R=-0.03 totalR=-1.84 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:   42  G --3-> G R=-0.17 totalR=-2.01 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   43  G --0-> F R=-0.03 totalR=-2.04 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:   44  F --1-> E R=-0.02 totalR=-2.06 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   45  E --3-> E R=-0.17 totalR=-2.22 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   46  E --1-> F R=-0.02 totalR=-2.24 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   47  F --3-> F R=-0.17 totalR=-2.41 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   48  F --3-> F R=-0.17 totalR=-2.57 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   49  F --2-> G R=-0.03 totalR=-2.61 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:   50  G --3-> G R=-0.17 totalR=-2.77 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   51  G --0-> F R=-0.03 totalR=-2.81 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:   52  F --1-> E R=-0.02 totalR=-2.82 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   53  E --1-> F R=-0.02 totalR=-2.84 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   54  F --1-> E R=-0.02 totalR=-2.86 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   55  E --3-> E R=-0.17 totalR=-3.02 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   56  E --0-> A R=-0.02 totalR=-3.04 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   57  A --3-> D R=-0.02 totalR=-3.06 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   58  D --2-> D R=-0.17 totalR=-3.22 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   59  D --0-> A R=-0.02 totalR=-3.24 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   60  A --3-> D R=-0.02 totalR=-3.26 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   61  D --3-> D R=-0.17 totalR=-3.42 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   62  D --2-> D R=-0.17 totalR=-3.59 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   63  D --3-> D R=-0.17 totalR=-3.76 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   64  D --2-> D R=-0.17 totalR=-3.92 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   65  D --3-> D R=-0.17 totalR=-4.09 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   66  D --0-> A R=-0.02 totalR=-4.11 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   67  A --2-> E R=-0.02 totalR=-4.12 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   68  E --0-> A R=-0.02 totalR=-4.14 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   69  A --0-> S R=-0.05 totalR=-4.19 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:   70  S --0-> A R=-0.05 totalR=-4.24 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:   71  A --1-> B R=-0.02 totalR=-4.26 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   72  B --1-> A R=-0.02 totalR=-4.27 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   73  A --2-> E R=-0.02 totalR=-4.29 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   74  E --0-> A R=-0.02 totalR=-4.31 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   75  A --0-> S R=-0.05 totalR=-4.36 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:   76  S --1-> B R=-0.02 totalR=-4.37 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   77  B --3-> K R=-0.03 totalR=-4.41 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:   78  K --0-> B R=-0.03 totalR=-4.44 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:   79  B --1-> A R=-0.02 totalR=-4.46 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   80  A --2-> E R=-0.02 totalR=-4.47 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   81  E --2-> H R=-0.02 totalR=-4.49 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   82  H --3-> H R=-0.17 totalR=-4.66 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   83  H --0-> E R=-0.02 totalR=-4.67 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   84  E --1-> F R=-0.02 totalR=-4.69 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   85  F --1-> E R=-0.02 totalR=-4.71 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   86  E --3-> E R=-0.17 totalR=-4.87 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   87  E --1-> F R=-0.02 totalR=-4.89 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   88  F --1-> E R=-0.02 totalR=-4.91 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   89  E --3-> E R=-0.17 totalR=-5.07 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   90  E --2-> H R=-0.02 totalR=-5.09 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   91  H --3-> H R=-0.17 totalR=-5.26 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   92  H --3-> H R=-0.17 totalR=-5.42 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   93  H --2-> H R=-0.17 totalR=-5.59 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   94  H --2-> H R=-0.17 totalR=-5.76 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   95  H --3-> H R=-0.17 totalR=-5.92 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   96  H --0-> E R=-0.02 totalR=-5.94 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   97  E --2-> H R=-0.02 totalR=-5.96 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:   98  H --3-> H R=-0.17 totalR=-6.12 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   99  H --1-> K R=-0.05 totalR=-6.17 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:  100  K --0-> B R=-0.03 totalR=-6.21 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  101  B --1-> A R=-0.02 totalR=-6.22 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  102  A --2-> E R=-0.02 totalR=-6.24 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  103  E --0-> A R=-0.02 totalR=-6.26 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  104  A --3-> D R=-0.02 totalR=-6.27 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  105  D --0-> A R=-0.02 totalR=-6.29 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  106  A --2-> E R=-0.02 totalR=-6.31 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  107  E --0-> A R=-0.02 totalR=-6.32 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  108  A --3-> D R=-0.02 totalR=-6.34 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  109  D --3-> D R=-0.17 totalR=-6.51 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  110  D --0-> A R=-0.02 totalR=-6.52 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  111  A --3-> D R=-0.02 totalR=-6.54 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  112  D --0-> A R=-0.02 totalR=-6.56 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  113  A --0-> S R=-0.05 totalR=-6.61 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:  114  S --0-> A R=-0.05 totalR=-6.66 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:  115  A --0-> S R=-0.05 totalR=-6.71 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:  116  S --2-> C R=-0.03 totalR=-6.74 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  117  C --3-> L R=-0.03 totalR=-6.77 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  118  L --0-> C R=-0.03 totalR=-6.81 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  119  C --3-> L R=-0.03 totalR=-6.84 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  120  L --2-> L R=-0.17 totalR=-7.01 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  121  L --3-> L R=-0.17 totalR=-7.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  122  L --3-> L R=-0.17 totalR=-7.34 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  123  L --1-> M R= 0.16 totalR=-7.18 cost=  50 customerR=1000 optimum=6000
Episode:    0   Step:  124  M --1-> L R=-0.01 totalR=-7.19 cost=  50 customerR=   0 optimum=6000
Episode:    0   Step:  125  L --1-> M R=-0.01 totalR=-7.20 cost=  50 customerR=   0 optimum=6000
Episode:    0   Step:  126  M --0-> C R=-0.02 totalR=-7.22 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  127  C --1-> B R=-0.01 totalR=-7.22 cost=  50 customerR=   0 optimum=6000
Episode:    0   Step:  128  B --1-> A R=-0.02 totalR=-7.24 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  129  A --1-> B R=-0.02 totalR=-7.26 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  130  B --3-> K R=-0.03 totalR=-7.29 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  131  K --0-> B R=-0.03 totalR=-7.32 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  132  B --3-> K R=-0.03 totalR=-7.36 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  133  K --1-> H R=-0.05 totalR=-7.41 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:  134  H --2-> H R=-0.17 totalR=-7.57 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  135  H --0-> E R=-0.02 totalR=-7.59 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  136  E --1-> F R=-0.02 totalR=-7.61 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  137  F --2-> G R=-0.03 totalR=-7.64 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  138  G --0-> F R=-0.03 totalR=-7.67 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  139  F --2-> G R=-0.03 totalR=-7.71 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  140  G --0-> F R=-0.03 totalR=-7.74 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  141  F --1-> E R=-0.02 totalR=-7.76 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  142  E --3-> E R=-0.17 totalR=-7.92 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  143  E --2-> H R=-0.02 totalR=-7.94 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  144  H --2-> H R=-0.17 totalR=-8.11 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  145  H --1-> K R=-0.05 totalR=-8.16 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:  146  K --0-> B R=-0.03 totalR=-8.19 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  147  B --3-> K R=-0.03 totalR=-8.22 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  148  K --1-> H R=-0.05 totalR=-8.27 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:  149  H --1-> K R=-0.05 totalR=-8.32 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:  150  K --3-> K R=-0.17 totalR=-8.49 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  151  K --0-> B R=-0.03 totalR=-8.52 cost= 200 customerR=   0 optimum=6000
Episode:    0   Step:  152  B --2-> C R=-0.01 totalR=-8.53 cost=  50 customerR=   0 optimum=6000
Episode:    0   Step:  153  C --2-> M R=-0.02 totalR=-8.55 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  154  M --3-> M R=-0.17 totalR=-8.72 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  155  M --2-> N R= 0.15 totalR=-8.57 cost= 100 customerR=1000 optimum=6000
Episode:    0   Step:  156  N --3-> N R=-0.17 totalR=-8.73 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  157  N --3-> N R=-0.17 totalR=-8.90 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  158  N --3-> N R=-0.17 totalR=-9.07 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  159  N --2-> N R=-0.17 totalR=-9.23 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  160  N --1-> O R=-0.02 totalR=-9.25 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  161  O --2-> O R=-0.17 totalR=-9.42 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  162  O --2-> O R=-0.17 totalR=-9.58 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  163  O --3-> O R=-0.17 totalR=-9.75 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  164  O --2-> O R=-0.17 totalR=-9.92 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  165  O --3-> O R=-0.17 totalR=-10.08 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  166  O --3-> O R=-0.17 totalR=-10.25 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  167  O --2-> O R=-0.17 totalR=-10.42 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  168  O --2-> O R=-0.17 totalR=-10.58 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  169  O --3-> O R=-0.17 totalR=-10.75 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  170  O --0-> N R=-0.02 totalR=-10.77 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  171  N --1-> O R=-0.02 totalR=-10.78 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  172  O --2-> O R=-0.17 totalR=-10.95 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  173  O --3-> O R=-0.17 totalR=-11.12 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  174  O --2-> O R=-0.17 totalR=-11.28 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  175  O --1-> G R=-0.05 totalR=-11.33 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:  176  G --2-> G R=-0.17 totalR=-11.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  177  G --1-> O R=-0.05 totalR=-11.55 cost= 300 customerR=   0 optimum=6000
Episode:    0   Step:  178  O --0-> N R=-0.02 totalR=-11.57 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  179  N --2-> N R=-0.17 totalR=-11.73 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  180  N --2-> N R=-0.17 totalR=-11.90 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  181  N --3-> N R=-0.17 totalR=-12.07 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  182  N --0-> M R=-0.02 totalR=-12.08 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  183  M --3-> M R=-0.17 totalR=-12.25 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  184  M --2-> N R=-0.02 totalR=-12.27 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  185  N --3-> N R=-0.17 totalR=-12.43 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  186  N --0-> M R=-0.02 totalR=-12.45 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  187  M --0-> C R=-0.02 totalR=-12.47 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  188  C --2-> M R=-0.02 totalR=-12.48 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  189  M --0-> C R=-0.02 totalR=-12.50 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  190  C --2-> M R=-0.02 totalR=-12.52 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  191  M --3-> M R=-0.17 totalR=-12.68 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  192  M --2-> N R=-0.02 totalR=-12.70 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  193  N --2-> N R=-0.17 totalR=-12.87 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  194  N --3-> N R=-0.17 totalR=-13.03 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  195  N --0-> M R=-0.02 totalR=-13.05 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  196  M --0-> C R=-0.02 totalR=-13.07 cost= 100 customerR=   0 optimum=6000
Episode:    0   Step:  197  C --0-> S R=-0.03 totalR=-13.10 cost= 200 customerR=   0 optimum=6000
Done: episodes=     1  avgSteps=197.00  avgTotalReward=-13.10
Episode finished after 197 timesteps
[    0.  -300.  -100.  -200. -1000.     0.     0.     0.     0.     0.
     0.     0.     0.     0.     0.     0.     0.     0.     0.]

Baseline


In [0]:
from copy import deepcopy
import json

class Baseline():

  def __init__(self, env, verbose=1):
    self.env = env
    self.verbose = verbose
    self.reset()

  def reset(self):
    self.map = self.env.map
    self.rewards = self.env.customer_reward.copy()
    
  def as_string(self, state):
    # reward/cost does not hurt, but is useless, path obsucres same state
    new_state = {
        'rewards': state['rewards'],
        'position': state['position']
    }
    return json.dumps(new_state, sort_keys=True)
  
  def is_goal(self, state):
    if state['position'] != 'S': return False
    for reward in state['rewards'].values():
      if reward != 0: return False
    return True
    

  def expand(self, state):
    states = []
    for position, cost in self.map[state['position']]:
      new_state = deepcopy(state)
      new_state['position'] = position
      new_state['rewards'][position] = 0
      reward = state['rewards'][position]
      new_state['reward'] += reward
      new_state['cost'] += cost
      new_state['path'].append(position)
      states.append(new_state)
    return states

  def search(self, root, max_depth = 25):
      closed = set()
      open = [root]

      while open:
          state = open.pop(0)
          if self.as_string(state) in closed: continue  

          closed.add(self.as_string(state))

          depth = len(state['path'])
          if depth > max_depth:
            if self.verbose > 0:
              print("Visited:", len(closed))
              print("Reached max depth, without reaching goal")
            return None

          if self.is_goal(state):
            scaled_reward = (state['reward'] - state['cost']) / 6000
            state['scaled_reward'] = scaled_reward
            if self.verbose > 0:
              print("Scaled reward:", scaled_reward)            
              print("Perfect path", state['path'])
            return state

          expanded = self.expand(state)
          open += expanded
          # make this best first
          open.sort(key=lambda state: state['cost'])
        
  def find_optimum(self):
    initial_state = {
        'rewards': self.rewards.copy(),
        'position': 'S',
        'reward': 0,
        'cost': 0,
        'path': ['S']
    }
    return self.search(initial_state)
  
  def benchmark(self, model, sample_runs=100):
    self.verbose = 0
    BeraterEnv.showStep = False
    BeraterEnv.showDone = False

    perfect_rewards = []
    model_rewards = []
    for run in range(sample_runs):
      observation = self.env.reset()
      self.reset()
      
      optimum_state = self.find_optimum()
      perfect_rewards.append(optimum_state['scaled_reward'])
      
      state = np.zeros((1, 2*128))
      dones = np.zeros((1))

      for t in range(1000):
        actions, _, state, _ = model.step(observation, S=state, M=dones)
        observation, reward, done, info = self.env.step(actions[0])
        if done:
          break
      model_rewards.append(env.totalReward)
    return perfect_rewards, model_rewards
  
  def score(self, model, sample_runs=100):
    perfect_rewards, model_rewards = self.benchmark(model, sample_runs=100)
    
    perfect_score_mean, perfect_score_std = np.array(perfect_rewards).mean(), np.array(perfect_rewards).std()
    test_score_mean, test_score_std = np.array(model_rewards).mean(), np.array(model_rewards).std()
    
    return perfect_score_mean, perfect_score_std, test_score_mean, test_score_std

Train model

Estimation

  • total cost when travelling all paths (back and forth): 2500
  • all rewards: 6000
  • but: rewards are much more sparse while routes stay the same, maybe expect less
  • estimate: no illegal moves and between
    • half the travel cost: (6000 - 1250) / 6000 = .79
    • and full traval cost (6000 - 2500) / 6000 = 0.58
  • additionally: the agent only sees very little of the whole scenario
    • changes with every episode
    • was ok when network can learn fixed scenario

In [0]:
!rm -r logs
!mkdir logs
!mkdir logs/berater

In [0]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)


1.12.0

In [0]:
%%time

# https://github.com/openai/baselines/blob/master/baselines/deepq/experiments/train_pong.py
# log_dir = logger.get_dir()
log_dir = '/content/logs/berater/'

import gym
from baselines import bench
from baselines import logger

from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from baselines.common.vec_env.vec_monitor import VecMonitor
from baselines.ppo2 import ppo2
from baselines.a2c import a2c

BeraterEnv.showStep = False
BeraterEnv.showDone = False

env = BeraterEnv()

wrapped_env = DummyVecEnv([lambda: BeraterEnv()])
monitored_env = VecMonitor(wrapped_env, log_dir)

# https://github.com/openai/baselines/blob/master/baselines/ppo2/ppo2.py
# https://github.com/openai/baselines/blob/master/baselines/common/models.py#L30
# https://arxiv.org/abs/1607.06450 for layer_norm

# lr linear from lr=1e-2 to lr=1e-4 (default lr=3e-4)
def lr_range(frac):
  # we get the remaining updates between 1 and 0
  start_lr = 1e-2
  end_lr = 1e-4
  diff_lr = start_lr - end_lr
  lr = end_lr + diff_lr * frac
  return lr

def mlp(num_layers=2, num_hidden=64, activation=tf.nn.relu, layer_norm=False):
    def network_fn(X):
        h = tf.layers.flatten(X)
        for i in range(num_layers):
            h = tf.layers.dense(h, units=num_hidden, kernel_initializer=tf.initializers.glorot_uniform(seed=17))
            if layer_norm:
              h = tf.contrib.layers.layer_norm(h, center=True, scale=True)
            h = activation(h)
        return h

    return network_fn
  
network = mlp(num_hidden=500, num_layers=3, layer_norm=True)

# Parameters
# https://github.com/openai/baselines/blob/master/baselines/a2c/a2c.py
model = a2c.learn(
    env=monitored_env,
    network=network,
    gamma=1.0,
    ent_coef=0.05,
    log_interval=50000,
    total_timesteps=1000000)


# model.save('berater-ppo-v12.pkl')
monitored_env.close()


Logging to /tmp/openai-2019-01-31-16-37-25-273381
---------------------------------
| explained_variance | -0.85    |
| fps                | 17       |
| nupdates           | 1        |
| policy_entropy     | 1.39     |
| total_timesteps    | 5        |
| value_loss         | 0.0133   |
---------------------------------
---------------------------------
| explained_variance | 0        |
| fps                | 277      |
| nupdates           | 50000    |
| policy_entropy     | 4.05e-22 |
| total_timesteps    | 250000   |
| value_loss         | 0.307    |
---------------------------------
---------------------------------
| explained_variance | 0        |
| fps                | 277      |
| nupdates           | 100000   |
| policy_entropy     | 0        |
| total_timesteps    | 500000   |
| value_loss         | 0.299    |
---------------------------------
---------------------------------
| explained_variance | 0        |
| fps                | 278      |
| nupdates           | 150000   |
| policy_entropy     | 7.85e-34 |
| total_timesteps    | 750000   |
| value_loss         | 0.0625   |
---------------------------------
---------------------------------
| explained_variance | 0        |
| fps                | 278      |
| nupdates           | 200000   |
| policy_entropy     | 7.85e-34 |
| total_timesteps    | 1000000  |
| value_loss         | 0.0625   |
---------------------------------
CPU times: user 1h 13min 31s, sys: 12min 4s, total: 1h 25min 35s
Wall time: 59min 55s

In [0]:
# !ls -l $log_dir

In [0]:
from baselines.common import plot_util as pu
results = pu.load_results(log_dir)

import matplotlib.pyplot as plt
import numpy as np
r = results[0]
plt.ylim(0, .75)
# plt.plot(np.cumsum(r.monitor.l), r.monitor.r)
plt.plot(np.cumsum(r.monitor.l), pu.smooth(r.monitor.r, radius=100))


/usr/local/lib/python3.6/dist-packages/baselines/bench/monitor.py:164: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access
  df.headers = headers # HACK to preserve backwards compatibility
Out[0]:
[<matplotlib.lines.Line2D at 0x7f699835f860>]

Enjoy model


In [0]:
import numpy as np 

observation = env.reset()
env.render()
baseline = Baseline(env)


{'S': 0, 'A': 1000, 'B': 1000, 'C': 0, 'D': 1000, 'E': 1000, 'F': 0, 'G': 1000, 'H': 0, 'K': 0, 'L': 0, 'M': 0, 'N': 0, 'O': 1000}

In [0]:
state = np.zeros((1, 2*128))
dones = np.zeros((1))

BeraterEnv.showStep = True
BeraterEnv.showDone = False

for t in range(1000):
    actions, _, state, _ = model.step(observation, S=state, M=dones)
    observation, reward, done, info = env.step(actions[0])
    if done:
        print("Episode finished after {} timesteps, reward={}".format(t+1, env.totalReward))
        break
env.close()


Episode:    0   Step:    1  S --3-> S R=-0.17 totalR=-0.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:    2  S --3-> S R=-0.17 totalR=-0.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:    3  S --3-> S R=-0.17 totalR=-0.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:    4  S --3-> S R=-0.17 totalR=-0.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:    5  S --3-> S R=-0.17 totalR=-0.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:    6  S --3-> S R=-0.17 totalR=-1.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:    7  S --3-> S R=-0.17 totalR=-1.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:    8  S --3-> S R=-0.17 totalR=-1.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:    9  S --3-> S R=-0.17 totalR=-1.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   10  S --3-> S R=-0.17 totalR=-1.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   11  S --3-> S R=-0.17 totalR=-1.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   12  S --3-> S R=-0.17 totalR=-2.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   13  S --3-> S R=-0.17 totalR=-2.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   14  S --3-> S R=-0.17 totalR=-2.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   15  S --3-> S R=-0.17 totalR=-2.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   16  S --3-> S R=-0.17 totalR=-2.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   17  S --3-> S R=-0.17 totalR=-2.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   18  S --3-> S R=-0.17 totalR=-3.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   19  S --3-> S R=-0.17 totalR=-3.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   20  S --3-> S R=-0.17 totalR=-3.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   21  S --3-> S R=-0.17 totalR=-3.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   22  S --3-> S R=-0.17 totalR=-3.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   23  S --3-> S R=-0.17 totalR=-3.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   24  S --3-> S R=-0.17 totalR=-4.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   25  S --3-> S R=-0.17 totalR=-4.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   26  S --3-> S R=-0.17 totalR=-4.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   27  S --3-> S R=-0.17 totalR=-4.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   28  S --3-> S R=-0.17 totalR=-4.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   29  S --3-> S R=-0.17 totalR=-4.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   30  S --3-> S R=-0.17 totalR=-5.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   31  S --3-> S R=-0.17 totalR=-5.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   32  S --3-> S R=-0.17 totalR=-5.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   33  S --3-> S R=-0.17 totalR=-5.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   34  S --3-> S R=-0.17 totalR=-5.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   35  S --3-> S R=-0.17 totalR=-5.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   36  S --3-> S R=-0.17 totalR=-6.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   37  S --3-> S R=-0.17 totalR=-6.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   38  S --3-> S R=-0.17 totalR=-6.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   39  S --3-> S R=-0.17 totalR=-6.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   40  S --3-> S R=-0.17 totalR=-6.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   41  S --3-> S R=-0.17 totalR=-6.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   42  S --3-> S R=-0.17 totalR=-7.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   43  S --3-> S R=-0.17 totalR=-7.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   44  S --3-> S R=-0.17 totalR=-7.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   45  S --3-> S R=-0.17 totalR=-7.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   46  S --3-> S R=-0.17 totalR=-7.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   47  S --3-> S R=-0.17 totalR=-7.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   48  S --3-> S R=-0.17 totalR=-8.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   49  S --3-> S R=-0.17 totalR=-8.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   50  S --3-> S R=-0.17 totalR=-8.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   51  S --3-> S R=-0.17 totalR=-8.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   52  S --3-> S R=-0.17 totalR=-8.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   53  S --3-> S R=-0.17 totalR=-8.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   54  S --3-> S R=-0.17 totalR=-9.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   55  S --3-> S R=-0.17 totalR=-9.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   56  S --3-> S R=-0.17 totalR=-9.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   57  S --3-> S R=-0.17 totalR=-9.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   58  S --3-> S R=-0.17 totalR=-9.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   59  S --3-> S R=-0.17 totalR=-9.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   60  S --3-> S R=-0.17 totalR=-10.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   61  S --3-> S R=-0.17 totalR=-10.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   62  S --3-> S R=-0.17 totalR=-10.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   63  S --3-> S R=-0.17 totalR=-10.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   64  S --3-> S R=-0.17 totalR=-10.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   65  S --3-> S R=-0.17 totalR=-10.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   66  S --3-> S R=-0.17 totalR=-11.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   67  S --3-> S R=-0.17 totalR=-11.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   68  S --3-> S R=-0.17 totalR=-11.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   69  S --3-> S R=-0.17 totalR=-11.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   70  S --3-> S R=-0.17 totalR=-11.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   71  S --3-> S R=-0.17 totalR=-11.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   72  S --3-> S R=-0.17 totalR=-12.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   73  S --3-> S R=-0.17 totalR=-12.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   74  S --3-> S R=-0.17 totalR=-12.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   75  S --3-> S R=-0.17 totalR=-12.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   76  S --3-> S R=-0.17 totalR=-12.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   77  S --3-> S R=-0.17 totalR=-12.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   78  S --3-> S R=-0.17 totalR=-13.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   79  S --3-> S R=-0.17 totalR=-13.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   80  S --3-> S R=-0.17 totalR=-13.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   81  S --3-> S R=-0.17 totalR=-13.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   82  S --3-> S R=-0.17 totalR=-13.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   83  S --3-> S R=-0.17 totalR=-13.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   84  S --3-> S R=-0.17 totalR=-14.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   85  S --3-> S R=-0.17 totalR=-14.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   86  S --3-> S R=-0.17 totalR=-14.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   87  S --3-> S R=-0.17 totalR=-14.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   88  S --3-> S R=-0.17 totalR=-14.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   89  S --3-> S R=-0.17 totalR=-14.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   90  S --3-> S R=-0.17 totalR=-15.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   91  S --3-> S R=-0.17 totalR=-15.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   92  S --3-> S R=-0.17 totalR=-15.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   93  S --3-> S R=-0.17 totalR=-15.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   94  S --3-> S R=-0.17 totalR=-15.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   95  S --3-> S R=-0.17 totalR=-15.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   96  S --3-> S R=-0.17 totalR=-16.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   97  S --3-> S R=-0.17 totalR=-16.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   98  S --3-> S R=-0.17 totalR=-16.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:   99  S --3-> S R=-0.17 totalR=-16.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  100  S --3-> S R=-0.17 totalR=-16.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  101  S --3-> S R=-0.17 totalR=-16.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  102  S --3-> S R=-0.17 totalR=-17.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  103  S --3-> S R=-0.17 totalR=-17.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  104  S --3-> S R=-0.17 totalR=-17.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  105  S --3-> S R=-0.17 totalR=-17.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  106  S --3-> S R=-0.17 totalR=-17.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  107  S --3-> S R=-0.17 totalR=-17.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  108  S --3-> S R=-0.17 totalR=-18.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  109  S --3-> S R=-0.17 totalR=-18.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  110  S --3-> S R=-0.17 totalR=-18.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  111  S --3-> S R=-0.17 totalR=-18.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  112  S --3-> S R=-0.17 totalR=-18.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  113  S --3-> S R=-0.17 totalR=-18.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  114  S --3-> S R=-0.17 totalR=-19.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  115  S --3-> S R=-0.17 totalR=-19.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  116  S --3-> S R=-0.17 totalR=-19.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  117  S --3-> S R=-0.17 totalR=-19.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  118  S --3-> S R=-0.17 totalR=-19.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  119  S --3-> S R=-0.17 totalR=-19.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  120  S --3-> S R=-0.17 totalR=-20.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  121  S --3-> S R=-0.17 totalR=-20.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  122  S --3-> S R=-0.17 totalR=-20.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  123  S --3-> S R=-0.17 totalR=-20.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  124  S --3-> S R=-0.17 totalR=-20.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  125  S --3-> S R=-0.17 totalR=-20.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  126  S --3-> S R=-0.17 totalR=-21.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  127  S --3-> S R=-0.17 totalR=-21.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  128  S --3-> S R=-0.17 totalR=-21.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  129  S --3-> S R=-0.17 totalR=-21.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  130  S --3-> S R=-0.17 totalR=-21.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  131  S --3-> S R=-0.17 totalR=-21.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  132  S --3-> S R=-0.17 totalR=-22.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  133  S --3-> S R=-0.17 totalR=-22.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  134  S --3-> S R=-0.17 totalR=-22.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  135  S --3-> S R=-0.17 totalR=-22.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  136  S --3-> S R=-0.17 totalR=-22.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  137  S --3-> S R=-0.17 totalR=-22.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  138  S --3-> S R=-0.17 totalR=-23.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  139  S --3-> S R=-0.17 totalR=-23.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  140  S --3-> S R=-0.17 totalR=-23.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  141  S --3-> S R=-0.17 totalR=-23.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  142  S --3-> S R=-0.17 totalR=-23.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  143  S --3-> S R=-0.17 totalR=-23.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  144  S --3-> S R=-0.17 totalR=-24.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  145  S --3-> S R=-0.17 totalR=-24.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  146  S --3-> S R=-0.17 totalR=-24.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  147  S --3-> S R=-0.17 totalR=-24.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  148  S --3-> S R=-0.17 totalR=-24.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  149  S --3-> S R=-0.17 totalR=-24.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  150  S --3-> S R=-0.17 totalR=-25.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  151  S --3-> S R=-0.17 totalR=-25.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  152  S --3-> S R=-0.17 totalR=-25.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  153  S --3-> S R=-0.17 totalR=-25.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  154  S --3-> S R=-0.17 totalR=-25.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  155  S --3-> S R=-0.17 totalR=-25.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  156  S --3-> S R=-0.17 totalR=-26.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  157  S --3-> S R=-0.17 totalR=-26.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  158  S --3-> S R=-0.17 totalR=-26.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  159  S --3-> S R=-0.17 totalR=-26.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  160  S --3-> S R=-0.17 totalR=-26.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  161  S --3-> S R=-0.17 totalR=-26.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  162  S --3-> S R=-0.17 totalR=-27.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  163  S --3-> S R=-0.17 totalR=-27.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  164  S --3-> S R=-0.17 totalR=-27.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  165  S --3-> S R=-0.17 totalR=-27.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  166  S --3-> S R=-0.17 totalR=-27.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  167  S --3-> S R=-0.17 totalR=-27.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  168  S --3-> S R=-0.17 totalR=-28.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  169  S --3-> S R=-0.17 totalR=-28.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  170  S --3-> S R=-0.17 totalR=-28.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  171  S --3-> S R=-0.17 totalR=-28.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  172  S --3-> S R=-0.17 totalR=-28.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  173  S --3-> S R=-0.17 totalR=-28.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  174  S --3-> S R=-0.17 totalR=-29.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  175  S --3-> S R=-0.17 totalR=-29.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  176  S --3-> S R=-0.17 totalR=-29.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  177  S --3-> S R=-0.17 totalR=-29.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  178  S --3-> S R=-0.17 totalR=-29.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  179  S --3-> S R=-0.17 totalR=-29.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  180  S --3-> S R=-0.17 totalR=-30.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  181  S --3-> S R=-0.17 totalR=-30.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  182  S --3-> S R=-0.17 totalR=-30.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  183  S --3-> S R=-0.17 totalR=-30.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  184  S --3-> S R=-0.17 totalR=-30.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  185  S --3-> S R=-0.17 totalR=-30.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  186  S --3-> S R=-0.17 totalR=-31.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  187  S --3-> S R=-0.17 totalR=-31.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  188  S --3-> S R=-0.17 totalR=-31.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  189  S --3-> S R=-0.17 totalR=-31.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  190  S --3-> S R=-0.17 totalR=-31.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  191  S --3-> S R=-0.17 totalR=-31.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  192  S --3-> S R=-0.17 totalR=-32.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  193  S --3-> S R=-0.17 totalR=-32.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  194  S --3-> S R=-0.17 totalR=-32.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  195  S --3-> S R=-0.17 totalR=-32.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  196  S --3-> S R=-0.17 totalR=-32.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  197  S --3-> S R=-0.17 totalR=-32.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  198  S --3-> S R=-0.17 totalR=-33.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  199  S --3-> S R=-0.17 totalR=-33.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  200  S --3-> S R=-0.17 totalR=-33.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  201  S --3-> S R=-0.17 totalR=-33.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  202  S --3-> S R=-0.17 totalR=-33.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  203  S --3-> S R=-0.17 totalR=-33.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  204  S --3-> S R=-0.17 totalR=-34.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  205  S --3-> S R=-0.17 totalR=-34.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  206  S --3-> S R=-0.17 totalR=-34.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  207  S --3-> S R=-0.17 totalR=-34.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  208  S --3-> S R=-0.17 totalR=-34.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  209  S --3-> S R=-0.17 totalR=-34.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  210  S --3-> S R=-0.17 totalR=-35.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  211  S --3-> S R=-0.17 totalR=-35.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  212  S --3-> S R=-0.17 totalR=-35.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  213  S --3-> S R=-0.17 totalR=-35.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  214  S --3-> S R=-0.17 totalR=-35.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  215  S --3-> S R=-0.17 totalR=-35.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  216  S --3-> S R=-0.17 totalR=-36.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  217  S --3-> S R=-0.17 totalR=-36.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  218  S --3-> S R=-0.17 totalR=-36.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  219  S --3-> S R=-0.17 totalR=-36.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  220  S --3-> S R=-0.17 totalR=-36.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  221  S --3-> S R=-0.17 totalR=-36.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  222  S --3-> S R=-0.17 totalR=-37.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  223  S --3-> S R=-0.17 totalR=-37.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  224  S --3-> S R=-0.17 totalR=-37.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  225  S --3-> S R=-0.17 totalR=-37.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  226  S --3-> S R=-0.17 totalR=-37.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  227  S --3-> S R=-0.17 totalR=-37.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  228  S --3-> S R=-0.17 totalR=-38.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  229  S --3-> S R=-0.17 totalR=-38.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  230  S --3-> S R=-0.17 totalR=-38.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  231  S --3-> S R=-0.17 totalR=-38.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  232  S --3-> S R=-0.17 totalR=-38.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  233  S --3-> S R=-0.17 totalR=-38.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  234  S --3-> S R=-0.17 totalR=-39.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  235  S --3-> S R=-0.17 totalR=-39.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  236  S --3-> S R=-0.17 totalR=-39.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  237  S --3-> S R=-0.17 totalR=-39.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  238  S --3-> S R=-0.17 totalR=-39.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  239  S --3-> S R=-0.17 totalR=-39.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  240  S --3-> S R=-0.17 totalR=-40.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  241  S --3-> S R=-0.17 totalR=-40.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  242  S --3-> S R=-0.17 totalR=-40.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  243  S --3-> S R=-0.17 totalR=-40.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  244  S --3-> S R=-0.17 totalR=-40.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  245  S --3-> S R=-0.17 totalR=-40.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  246  S --3-> S R=-0.17 totalR=-41.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  247  S --3-> S R=-0.17 totalR=-41.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  248  S --3-> S R=-0.17 totalR=-41.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  249  S --3-> S R=-0.17 totalR=-41.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  250  S --3-> S R=-0.17 totalR=-41.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  251  S --3-> S R=-0.17 totalR=-41.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  252  S --3-> S R=-0.17 totalR=-42.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  253  S --3-> S R=-0.17 totalR=-42.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  254  S --3-> S R=-0.17 totalR=-42.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  255  S --3-> S R=-0.17 totalR=-42.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  256  S --3-> S R=-0.17 totalR=-42.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  257  S --3-> S R=-0.17 totalR=-42.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  258  S --3-> S R=-0.17 totalR=-43.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  259  S --3-> S R=-0.17 totalR=-43.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  260  S --3-> S R=-0.17 totalR=-43.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  261  S --3-> S R=-0.17 totalR=-43.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  262  S --3-> S R=-0.17 totalR=-43.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  263  S --3-> S R=-0.17 totalR=-43.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  264  S --3-> S R=-0.17 totalR=-44.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  265  S --3-> S R=-0.17 totalR=-44.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  266  S --3-> S R=-0.17 totalR=-44.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  267  S --3-> S R=-0.17 totalR=-44.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  268  S --3-> S R=-0.17 totalR=-44.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  269  S --3-> S R=-0.17 totalR=-44.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  270  S --3-> S R=-0.17 totalR=-45.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  271  S --3-> S R=-0.17 totalR=-45.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  272  S --3-> S R=-0.17 totalR=-45.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  273  S --3-> S R=-0.17 totalR=-45.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  274  S --3-> S R=-0.17 totalR=-45.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  275  S --3-> S R=-0.17 totalR=-45.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  276  S --3-> S R=-0.17 totalR=-46.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  277  S --3-> S R=-0.17 totalR=-46.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  278  S --3-> S R=-0.17 totalR=-46.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  279  S --3-> S R=-0.17 totalR=-46.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  280  S --3-> S R=-0.17 totalR=-46.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  281  S --3-> S R=-0.17 totalR=-46.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  282  S --3-> S R=-0.17 totalR=-47.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  283  S --3-> S R=-0.17 totalR=-47.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  284  S --3-> S R=-0.17 totalR=-47.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  285  S --3-> S R=-0.17 totalR=-47.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  286  S --3-> S R=-0.17 totalR=-47.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  287  S --3-> S R=-0.17 totalR=-47.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  288  S --3-> S R=-0.17 totalR=-48.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  289  S --3-> S R=-0.17 totalR=-48.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  290  S --3-> S R=-0.17 totalR=-48.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  291  S --3-> S R=-0.17 totalR=-48.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  292  S --3-> S R=-0.17 totalR=-48.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  293  S --3-> S R=-0.17 totalR=-48.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  294  S --3-> S R=-0.17 totalR=-49.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  295  S --3-> S R=-0.17 totalR=-49.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  296  S --3-> S R=-0.17 totalR=-49.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  297  S --3-> S R=-0.17 totalR=-49.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  298  S --3-> S R=-0.17 totalR=-49.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  299  S --3-> S R=-0.17 totalR=-49.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  300  S --3-> S R=-0.17 totalR=-50.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  301  S --3-> S R=-0.17 totalR=-50.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  302  S --3-> S R=-0.17 totalR=-50.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  303  S --3-> S R=-0.17 totalR=-50.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  304  S --3-> S R=-0.17 totalR=-50.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  305  S --3-> S R=-0.17 totalR=-50.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  306  S --3-> S R=-0.17 totalR=-51.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  307  S --3-> S R=-0.17 totalR=-51.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  308  S --3-> S R=-0.17 totalR=-51.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  309  S --3-> S R=-0.17 totalR=-51.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  310  S --3-> S R=-0.17 totalR=-51.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  311  S --3-> S R=-0.17 totalR=-51.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  312  S --3-> S R=-0.17 totalR=-52.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  313  S --3-> S R=-0.17 totalR=-52.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  314  S --3-> S R=-0.17 totalR=-52.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  315  S --3-> S R=-0.17 totalR=-52.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  316  S --3-> S R=-0.17 totalR=-52.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  317  S --3-> S R=-0.17 totalR=-52.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  318  S --3-> S R=-0.17 totalR=-53.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  319  S --3-> S R=-0.17 totalR=-53.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  320  S --3-> S R=-0.17 totalR=-53.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  321  S --3-> S R=-0.17 totalR=-53.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  322  S --3-> S R=-0.17 totalR=-53.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  323  S --3-> S R=-0.17 totalR=-53.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  324  S --3-> S R=-0.17 totalR=-54.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  325  S --3-> S R=-0.17 totalR=-54.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  326  S --3-> S R=-0.17 totalR=-54.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  327  S --3-> S R=-0.17 totalR=-54.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  328  S --3-> S R=-0.17 totalR=-54.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  329  S --3-> S R=-0.17 totalR=-54.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  330  S --3-> S R=-0.17 totalR=-55.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  331  S --3-> S R=-0.17 totalR=-55.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  332  S --3-> S R=-0.17 totalR=-55.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  333  S --3-> S R=-0.17 totalR=-55.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  334  S --3-> S R=-0.17 totalR=-55.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  335  S --3-> S R=-0.17 totalR=-55.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  336  S --3-> S R=-0.17 totalR=-56.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  337  S --3-> S R=-0.17 totalR=-56.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  338  S --3-> S R=-0.17 totalR=-56.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  339  S --3-> S R=-0.17 totalR=-56.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  340  S --3-> S R=-0.17 totalR=-56.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  341  S --3-> S R=-0.17 totalR=-56.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  342  S --3-> S R=-0.17 totalR=-57.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  343  S --3-> S R=-0.17 totalR=-57.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  344  S --3-> S R=-0.17 totalR=-57.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  345  S --3-> S R=-0.17 totalR=-57.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  346  S --3-> S R=-0.17 totalR=-57.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  347  S --3-> S R=-0.17 totalR=-57.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  348  S --3-> S R=-0.17 totalR=-58.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  349  S --3-> S R=-0.17 totalR=-58.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  350  S --3-> S R=-0.17 totalR=-58.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  351  S --3-> S R=-0.17 totalR=-58.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  352  S --3-> S R=-0.17 totalR=-58.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  353  S --3-> S R=-0.17 totalR=-58.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  354  S --3-> S R=-0.17 totalR=-59.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  355  S --3-> S R=-0.17 totalR=-59.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  356  S --3-> S R=-0.17 totalR=-59.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  357  S --3-> S R=-0.17 totalR=-59.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  358  S --3-> S R=-0.17 totalR=-59.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  359  S --3-> S R=-0.17 totalR=-59.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  360  S --3-> S R=-0.17 totalR=-60.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  361  S --3-> S R=-0.17 totalR=-60.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  362  S --3-> S R=-0.17 totalR=-60.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  363  S --3-> S R=-0.17 totalR=-60.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  364  S --3-> S R=-0.17 totalR=-60.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  365  S --3-> S R=-0.17 totalR=-60.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  366  S --3-> S R=-0.17 totalR=-61.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  367  S --3-> S R=-0.17 totalR=-61.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  368  S --3-> S R=-0.17 totalR=-61.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  369  S --3-> S R=-0.17 totalR=-61.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  370  S --3-> S R=-0.17 totalR=-61.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  371  S --3-> S R=-0.17 totalR=-61.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  372  S --3-> S R=-0.17 totalR=-62.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  373  S --3-> S R=-0.17 totalR=-62.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  374  S --3-> S R=-0.17 totalR=-62.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  375  S --3-> S R=-0.17 totalR=-62.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  376  S --3-> S R=-0.17 totalR=-62.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  377  S --3-> S R=-0.17 totalR=-62.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  378  S --3-> S R=-0.17 totalR=-63.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  379  S --3-> S R=-0.17 totalR=-63.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  380  S --3-> S R=-0.17 totalR=-63.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  381  S --3-> S R=-0.17 totalR=-63.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  382  S --3-> S R=-0.17 totalR=-63.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  383  S --3-> S R=-0.17 totalR=-63.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  384  S --3-> S R=-0.17 totalR=-64.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  385  S --3-> S R=-0.17 totalR=-64.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  386  S --3-> S R=-0.17 totalR=-64.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  387  S --3-> S R=-0.17 totalR=-64.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  388  S --3-> S R=-0.17 totalR=-64.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  389  S --3-> S R=-0.17 totalR=-64.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  390  S --3-> S R=-0.17 totalR=-65.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  391  S --3-> S R=-0.17 totalR=-65.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  392  S --3-> S R=-0.17 totalR=-65.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  393  S --3-> S R=-0.17 totalR=-65.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  394  S --3-> S R=-0.17 totalR=-65.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  395  S --3-> S R=-0.17 totalR=-65.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  396  S --3-> S R=-0.17 totalR=-66.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  397  S --3-> S R=-0.17 totalR=-66.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  398  S --3-> S R=-0.17 totalR=-66.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  399  S --3-> S R=-0.17 totalR=-66.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  400  S --3-> S R=-0.17 totalR=-66.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  401  S --3-> S R=-0.17 totalR=-66.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  402  S --3-> S R=-0.17 totalR=-67.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  403  S --3-> S R=-0.17 totalR=-67.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  404  S --3-> S R=-0.17 totalR=-67.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  405  S --3-> S R=-0.17 totalR=-67.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  406  S --3-> S R=-0.17 totalR=-67.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  407  S --3-> S R=-0.17 totalR=-67.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  408  S --3-> S R=-0.17 totalR=-68.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  409  S --3-> S R=-0.17 totalR=-68.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  410  S --3-> S R=-0.17 totalR=-68.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  411  S --3-> S R=-0.17 totalR=-68.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  412  S --3-> S R=-0.17 totalR=-68.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  413  S --3-> S R=-0.17 totalR=-68.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  414  S --3-> S R=-0.17 totalR=-69.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  415  S --3-> S R=-0.17 totalR=-69.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  416  S --3-> S R=-0.17 totalR=-69.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  417  S --3-> S R=-0.17 totalR=-69.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  418  S --3-> S R=-0.17 totalR=-69.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  419  S --3-> S R=-0.17 totalR=-69.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  420  S --3-> S R=-0.17 totalR=-70.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  421  S --3-> S R=-0.17 totalR=-70.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  422  S --3-> S R=-0.17 totalR=-70.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  423  S --3-> S R=-0.17 totalR=-70.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  424  S --3-> S R=-0.17 totalR=-70.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  425  S --3-> S R=-0.17 totalR=-70.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  426  S --3-> S R=-0.17 totalR=-71.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  427  S --3-> S R=-0.17 totalR=-71.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  428  S --3-> S R=-0.17 totalR=-71.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  429  S --3-> S R=-0.17 totalR=-71.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  430  S --3-> S R=-0.17 totalR=-71.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  431  S --3-> S R=-0.17 totalR=-71.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  432  S --3-> S R=-0.17 totalR=-72.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  433  S --3-> S R=-0.17 totalR=-72.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  434  S --3-> S R=-0.17 totalR=-72.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  435  S --3-> S R=-0.17 totalR=-72.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  436  S --3-> S R=-0.17 totalR=-72.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  437  S --3-> S R=-0.17 totalR=-72.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  438  S --3-> S R=-0.17 totalR=-73.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  439  S --3-> S R=-0.17 totalR=-73.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  440  S --3-> S R=-0.17 totalR=-73.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  441  S --3-> S R=-0.17 totalR=-73.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  442  S --3-> S R=-0.17 totalR=-73.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  443  S --3-> S R=-0.17 totalR=-73.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  444  S --3-> S R=-0.17 totalR=-74.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  445  S --3-> S R=-0.17 totalR=-74.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  446  S --3-> S R=-0.17 totalR=-74.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  447  S --3-> S R=-0.17 totalR=-74.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  448  S --3-> S R=-0.17 totalR=-74.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  449  S --3-> S R=-0.17 totalR=-74.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  450  S --3-> S R=-0.17 totalR=-75.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  451  S --3-> S R=-0.17 totalR=-75.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  452  S --3-> S R=-0.17 totalR=-75.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  453  S --3-> S R=-0.17 totalR=-75.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  454  S --3-> S R=-0.17 totalR=-75.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  455  S --3-> S R=-0.17 totalR=-75.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  456  S --3-> S R=-0.17 totalR=-76.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  457  S --3-> S R=-0.17 totalR=-76.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  458  S --3-> S R=-0.17 totalR=-76.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  459  S --3-> S R=-0.17 totalR=-76.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  460  S --3-> S R=-0.17 totalR=-76.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  461  S --3-> S R=-0.17 totalR=-76.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  462  S --3-> S R=-0.17 totalR=-77.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  463  S --3-> S R=-0.17 totalR=-77.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  464  S --3-> S R=-0.17 totalR=-77.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  465  S --3-> S R=-0.17 totalR=-77.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  466  S --3-> S R=-0.17 totalR=-77.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  467  S --3-> S R=-0.17 totalR=-77.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  468  S --3-> S R=-0.17 totalR=-78.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  469  S --3-> S R=-0.17 totalR=-78.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  470  S --3-> S R=-0.17 totalR=-78.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  471  S --3-> S R=-0.17 totalR=-78.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  472  S --3-> S R=-0.17 totalR=-78.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  473  S --3-> S R=-0.17 totalR=-78.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  474  S --3-> S R=-0.17 totalR=-79.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  475  S --3-> S R=-0.17 totalR=-79.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  476  S --3-> S R=-0.17 totalR=-79.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  477  S --3-> S R=-0.17 totalR=-79.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  478  S --3-> S R=-0.17 totalR=-79.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  479  S --3-> S R=-0.17 totalR=-79.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  480  S --3-> S R=-0.17 totalR=-80.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  481  S --3-> S R=-0.17 totalR=-80.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  482  S --3-> S R=-0.17 totalR=-80.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  483  S --3-> S R=-0.17 totalR=-80.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  484  S --3-> S R=-0.17 totalR=-80.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  485  S --3-> S R=-0.17 totalR=-80.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  486  S --3-> S R=-0.17 totalR=-81.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  487  S --3-> S R=-0.17 totalR=-81.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  488  S --3-> S R=-0.17 totalR=-81.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  489  S --3-> S R=-0.17 totalR=-81.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  490  S --3-> S R=-0.17 totalR=-81.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  491  S --3-> S R=-0.17 totalR=-81.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  492  S --3-> S R=-0.17 totalR=-82.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  493  S --3-> S R=-0.17 totalR=-82.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  494  S --3-> S R=-0.17 totalR=-82.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  495  S --3-> S R=-0.17 totalR=-82.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  496  S --3-> S R=-0.17 totalR=-82.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  497  S --3-> S R=-0.17 totalR=-82.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  498  S --3-> S R=-0.17 totalR=-83.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  499  S --3-> S R=-0.17 totalR=-83.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  500  S --3-> S R=-0.17 totalR=-83.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  501  S --3-> S R=-0.17 totalR=-83.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  502  S --3-> S R=-0.17 totalR=-83.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  503  S --3-> S R=-0.17 totalR=-83.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  504  S --3-> S R=-0.17 totalR=-84.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  505  S --3-> S R=-0.17 totalR=-84.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  506  S --3-> S R=-0.17 totalR=-84.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  507  S --3-> S R=-0.17 totalR=-84.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  508  S --3-> S R=-0.17 totalR=-84.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  509  S --3-> S R=-0.17 totalR=-84.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  510  S --3-> S R=-0.17 totalR=-85.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  511  S --3-> S R=-0.17 totalR=-85.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  512  S --3-> S R=-0.17 totalR=-85.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  513  S --3-> S R=-0.17 totalR=-85.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  514  S --3-> S R=-0.17 totalR=-85.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  515  S --3-> S R=-0.17 totalR=-85.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  516  S --3-> S R=-0.17 totalR=-86.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  517  S --3-> S R=-0.17 totalR=-86.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  518  S --3-> S R=-0.17 totalR=-86.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  519  S --3-> S R=-0.17 totalR=-86.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  520  S --3-> S R=-0.17 totalR=-86.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  521  S --3-> S R=-0.17 totalR=-86.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  522  S --3-> S R=-0.17 totalR=-87.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  523  S --3-> S R=-0.17 totalR=-87.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  524  S --3-> S R=-0.17 totalR=-87.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  525  S --3-> S R=-0.17 totalR=-87.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  526  S --3-> S R=-0.17 totalR=-87.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  527  S --3-> S R=-0.17 totalR=-87.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  528  S --3-> S R=-0.17 totalR=-88.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  529  S --3-> S R=-0.17 totalR=-88.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  530  S --3-> S R=-0.17 totalR=-88.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  531  S --3-> S R=-0.17 totalR=-88.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  532  S --3-> S R=-0.17 totalR=-88.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  533  S --3-> S R=-0.17 totalR=-88.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  534  S --3-> S R=-0.17 totalR=-89.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  535  S --3-> S R=-0.17 totalR=-89.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  536  S --3-> S R=-0.17 totalR=-89.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  537  S --3-> S R=-0.17 totalR=-89.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  538  S --3-> S R=-0.17 totalR=-89.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  539  S --3-> S R=-0.17 totalR=-89.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  540  S --3-> S R=-0.17 totalR=-90.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  541  S --3-> S R=-0.17 totalR=-90.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  542  S --3-> S R=-0.17 totalR=-90.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  543  S --3-> S R=-0.17 totalR=-90.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  544  S --3-> S R=-0.17 totalR=-90.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  545  S --3-> S R=-0.17 totalR=-90.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  546  S --3-> S R=-0.17 totalR=-91.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  547  S --3-> S R=-0.17 totalR=-91.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  548  S --3-> S R=-0.17 totalR=-91.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  549  S --3-> S R=-0.17 totalR=-91.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  550  S --3-> S R=-0.17 totalR=-91.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  551  S --3-> S R=-0.17 totalR=-91.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  552  S --3-> S R=-0.17 totalR=-92.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  553  S --3-> S R=-0.17 totalR=-92.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  554  S --3-> S R=-0.17 totalR=-92.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  555  S --3-> S R=-0.17 totalR=-92.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  556  S --3-> S R=-0.17 totalR=-92.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  557  S --3-> S R=-0.17 totalR=-92.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  558  S --3-> S R=-0.17 totalR=-93.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  559  S --3-> S R=-0.17 totalR=-93.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  560  S --3-> S R=-0.17 totalR=-93.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  561  S --3-> S R=-0.17 totalR=-93.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  562  S --3-> S R=-0.17 totalR=-93.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  563  S --3-> S R=-0.17 totalR=-93.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  564  S --3-> S R=-0.17 totalR=-94.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  565  S --3-> S R=-0.17 totalR=-94.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  566  S --3-> S R=-0.17 totalR=-94.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  567  S --3-> S R=-0.17 totalR=-94.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  568  S --3-> S R=-0.17 totalR=-94.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  569  S --3-> S R=-0.17 totalR=-94.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  570  S --3-> S R=-0.17 totalR=-95.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  571  S --3-> S R=-0.17 totalR=-95.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  572  S --3-> S R=-0.17 totalR=-95.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  573  S --3-> S R=-0.17 totalR=-95.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  574  S --3-> S R=-0.17 totalR=-95.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  575  S --3-> S R=-0.17 totalR=-95.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  576  S --3-> S R=-0.17 totalR=-96.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  577  S --3-> S R=-0.17 totalR=-96.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  578  S --3-> S R=-0.17 totalR=-96.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  579  S --3-> S R=-0.17 totalR=-96.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  580  S --3-> S R=-0.17 totalR=-96.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  581  S --3-> S R=-0.17 totalR=-96.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  582  S --3-> S R=-0.17 totalR=-97.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  583  S --3-> S R=-0.17 totalR=-97.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  584  S --3-> S R=-0.17 totalR=-97.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  585  S --3-> S R=-0.17 totalR=-97.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  586  S --3-> S R=-0.17 totalR=-97.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  587  S --3-> S R=-0.17 totalR=-97.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  588  S --3-> S R=-0.17 totalR=-98.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  589  S --3-> S R=-0.17 totalR=-98.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  590  S --3-> S R=-0.17 totalR=-98.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  591  S --3-> S R=-0.17 totalR=-98.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  592  S --3-> S R=-0.17 totalR=-98.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  593  S --3-> S R=-0.17 totalR=-98.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  594  S --3-> S R=-0.17 totalR=-99.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  595  S --3-> S R=-0.17 totalR=-99.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  596  S --3-> S R=-0.17 totalR=-99.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  597  S --3-> S R=-0.17 totalR=-99.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  598  S --3-> S R=-0.17 totalR=-99.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  599  S --3-> S R=-0.17 totalR=-99.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  600  S --3-> S R=-0.17 totalR=-100.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  601  S --3-> S R=-0.17 totalR=-100.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  602  S --3-> S R=-0.17 totalR=-100.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  603  S --3-> S R=-0.17 totalR=-100.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  604  S --3-> S R=-0.17 totalR=-100.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  605  S --3-> S R=-0.17 totalR=-100.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  606  S --3-> S R=-0.17 totalR=-101.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  607  S --3-> S R=-0.17 totalR=-101.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  608  S --3-> S R=-0.17 totalR=-101.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  609  S --3-> S R=-0.17 totalR=-101.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  610  S --3-> S R=-0.17 totalR=-101.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  611  S --3-> S R=-0.17 totalR=-101.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  612  S --3-> S R=-0.17 totalR=-102.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  613  S --3-> S R=-0.17 totalR=-102.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  614  S --3-> S R=-0.17 totalR=-102.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  615  S --3-> S R=-0.17 totalR=-102.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  616  S --3-> S R=-0.17 totalR=-102.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  617  S --3-> S R=-0.17 totalR=-102.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  618  S --3-> S R=-0.17 totalR=-103.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  619  S --3-> S R=-0.17 totalR=-103.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  620  S --3-> S R=-0.17 totalR=-103.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  621  S --3-> S R=-0.17 totalR=-103.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  622  S --3-> S R=-0.17 totalR=-103.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  623  S --3-> S R=-0.17 totalR=-103.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  624  S --3-> S R=-0.17 totalR=-104.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  625  S --3-> S R=-0.17 totalR=-104.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  626  S --3-> S R=-0.17 totalR=-104.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  627  S --3-> S R=-0.17 totalR=-104.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  628  S --3-> S R=-0.17 totalR=-104.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  629  S --3-> S R=-0.17 totalR=-104.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  630  S --3-> S R=-0.17 totalR=-105.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  631  S --3-> S R=-0.17 totalR=-105.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  632  S --3-> S R=-0.17 totalR=-105.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  633  S --3-> S R=-0.17 totalR=-105.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  634  S --3-> S R=-0.17 totalR=-105.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  635  S --3-> S R=-0.17 totalR=-105.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  636  S --3-> S R=-0.17 totalR=-106.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  637  S --3-> S R=-0.17 totalR=-106.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  638  S --3-> S R=-0.17 totalR=-106.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  639  S --3-> S R=-0.17 totalR=-106.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  640  S --3-> S R=-0.17 totalR=-106.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  641  S --3-> S R=-0.17 totalR=-106.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  642  S --3-> S R=-0.17 totalR=-107.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  643  S --3-> S R=-0.17 totalR=-107.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  644  S --3-> S R=-0.17 totalR=-107.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  645  S --3-> S R=-0.17 totalR=-107.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  646  S --3-> S R=-0.17 totalR=-107.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  647  S --3-> S R=-0.17 totalR=-107.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  648  S --3-> S R=-0.17 totalR=-108.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  649  S --3-> S R=-0.17 totalR=-108.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  650  S --3-> S R=-0.17 totalR=-108.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  651  S --3-> S R=-0.17 totalR=-108.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  652  S --3-> S R=-0.17 totalR=-108.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  653  S --3-> S R=-0.17 totalR=-108.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  654  S --3-> S R=-0.17 totalR=-109.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  655  S --3-> S R=-0.17 totalR=-109.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  656  S --3-> S R=-0.17 totalR=-109.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  657  S --3-> S R=-0.17 totalR=-109.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  658  S --3-> S R=-0.17 totalR=-109.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  659  S --3-> S R=-0.17 totalR=-109.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  660  S --3-> S R=-0.17 totalR=-110.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  661  S --3-> S R=-0.17 totalR=-110.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  662  S --3-> S R=-0.17 totalR=-110.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  663  S --3-> S R=-0.17 totalR=-110.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  664  S --3-> S R=-0.17 totalR=-110.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  665  S --3-> S R=-0.17 totalR=-110.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  666  S --3-> S R=-0.17 totalR=-111.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  667  S --3-> S R=-0.17 totalR=-111.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  668  S --3-> S R=-0.17 totalR=-111.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  669  S --3-> S R=-0.17 totalR=-111.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  670  S --3-> S R=-0.17 totalR=-111.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  671  S --3-> S R=-0.17 totalR=-111.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  672  S --3-> S R=-0.17 totalR=-112.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  673  S --3-> S R=-0.17 totalR=-112.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  674  S --3-> S R=-0.17 totalR=-112.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  675  S --3-> S R=-0.17 totalR=-112.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  676  S --3-> S R=-0.17 totalR=-112.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  677  S --3-> S R=-0.17 totalR=-112.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  678  S --3-> S R=-0.17 totalR=-113.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  679  S --3-> S R=-0.17 totalR=-113.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  680  S --3-> S R=-0.17 totalR=-113.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  681  S --3-> S R=-0.17 totalR=-113.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  682  S --3-> S R=-0.17 totalR=-113.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  683  S --3-> S R=-0.17 totalR=-113.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  684  S --3-> S R=-0.17 totalR=-114.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  685  S --3-> S R=-0.17 totalR=-114.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  686  S --3-> S R=-0.17 totalR=-114.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  687  S --3-> S R=-0.17 totalR=-114.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  688  S --3-> S R=-0.17 totalR=-114.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  689  S --3-> S R=-0.17 totalR=-114.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  690  S --3-> S R=-0.17 totalR=-115.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  691  S --3-> S R=-0.17 totalR=-115.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  692  S --3-> S R=-0.17 totalR=-115.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  693  S --3-> S R=-0.17 totalR=-115.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  694  S --3-> S R=-0.17 totalR=-115.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  695  S --3-> S R=-0.17 totalR=-115.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  696  S --3-> S R=-0.17 totalR=-116.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  697  S --3-> S R=-0.17 totalR=-116.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  698  S --3-> S R=-0.17 totalR=-116.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  699  S --3-> S R=-0.17 totalR=-116.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  700  S --3-> S R=-0.17 totalR=-116.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  701  S --3-> S R=-0.17 totalR=-116.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  702  S --3-> S R=-0.17 totalR=-117.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  703  S --3-> S R=-0.17 totalR=-117.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  704  S --3-> S R=-0.17 totalR=-117.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  705  S --3-> S R=-0.17 totalR=-117.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  706  S --3-> S R=-0.17 totalR=-117.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  707  S --3-> S R=-0.17 totalR=-117.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  708  S --3-> S R=-0.17 totalR=-118.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  709  S --3-> S R=-0.17 totalR=-118.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  710  S --3-> S R=-0.17 totalR=-118.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  711  S --3-> S R=-0.17 totalR=-118.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  712  S --3-> S R=-0.17 totalR=-118.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  713  S --3-> S R=-0.17 totalR=-118.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  714  S --3-> S R=-0.17 totalR=-119.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  715  S --3-> S R=-0.17 totalR=-119.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  716  S --3-> S R=-0.17 totalR=-119.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  717  S --3-> S R=-0.17 totalR=-119.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  718  S --3-> S R=-0.17 totalR=-119.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  719  S --3-> S R=-0.17 totalR=-119.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  720  S --3-> S R=-0.17 totalR=-120.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  721  S --3-> S R=-0.17 totalR=-120.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  722  S --3-> S R=-0.17 totalR=-120.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  723  S --3-> S R=-0.17 totalR=-120.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  724  S --3-> S R=-0.17 totalR=-120.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  725  S --3-> S R=-0.17 totalR=-120.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  726  S --3-> S R=-0.17 totalR=-121.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  727  S --3-> S R=-0.17 totalR=-121.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  728  S --3-> S R=-0.17 totalR=-121.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  729  S --3-> S R=-0.17 totalR=-121.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  730  S --3-> S R=-0.17 totalR=-121.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  731  S --3-> S R=-0.17 totalR=-121.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  732  S --3-> S R=-0.17 totalR=-122.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  733  S --3-> S R=-0.17 totalR=-122.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  734  S --3-> S R=-0.17 totalR=-122.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  735  S --3-> S R=-0.17 totalR=-122.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  736  S --3-> S R=-0.17 totalR=-122.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  737  S --3-> S R=-0.17 totalR=-122.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  738  S --3-> S R=-0.17 totalR=-123.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  739  S --3-> S R=-0.17 totalR=-123.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  740  S --3-> S R=-0.17 totalR=-123.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  741  S --3-> S R=-0.17 totalR=-123.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  742  S --3-> S R=-0.17 totalR=-123.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  743  S --3-> S R=-0.17 totalR=-123.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  744  S --3-> S R=-0.17 totalR=-124.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  745  S --3-> S R=-0.17 totalR=-124.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  746  S --3-> S R=-0.17 totalR=-124.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  747  S --3-> S R=-0.17 totalR=-124.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  748  S --3-> S R=-0.17 totalR=-124.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  749  S --3-> S R=-0.17 totalR=-124.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  750  S --3-> S R=-0.17 totalR=-125.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  751  S --3-> S R=-0.17 totalR=-125.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  752  S --3-> S R=-0.17 totalR=-125.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  753  S --3-> S R=-0.17 totalR=-125.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  754  S --3-> S R=-0.17 totalR=-125.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  755  S --3-> S R=-0.17 totalR=-125.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  756  S --3-> S R=-0.17 totalR=-126.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  757  S --3-> S R=-0.17 totalR=-126.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  758  S --3-> S R=-0.17 totalR=-126.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  759  S --3-> S R=-0.17 totalR=-126.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  760  S --3-> S R=-0.17 totalR=-126.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  761  S --3-> S R=-0.17 totalR=-126.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  762  S --3-> S R=-0.17 totalR=-127.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  763  S --3-> S R=-0.17 totalR=-127.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  764  S --3-> S R=-0.17 totalR=-127.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  765  S --3-> S R=-0.17 totalR=-127.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  766  S --3-> S R=-0.17 totalR=-127.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  767  S --3-> S R=-0.17 totalR=-127.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  768  S --3-> S R=-0.17 totalR=-128.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  769  S --3-> S R=-0.17 totalR=-128.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  770  S --3-> S R=-0.17 totalR=-128.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  771  S --3-> S R=-0.17 totalR=-128.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  772  S --3-> S R=-0.17 totalR=-128.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  773  S --3-> S R=-0.17 totalR=-128.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  774  S --3-> S R=-0.17 totalR=-129.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  775  S --3-> S R=-0.17 totalR=-129.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  776  S --3-> S R=-0.17 totalR=-129.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  777  S --3-> S R=-0.17 totalR=-129.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  778  S --3-> S R=-0.17 totalR=-129.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  779  S --3-> S R=-0.17 totalR=-129.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  780  S --3-> S R=-0.17 totalR=-130.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  781  S --3-> S R=-0.17 totalR=-130.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  782  S --3-> S R=-0.17 totalR=-130.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  783  S --3-> S R=-0.17 totalR=-130.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  784  S --3-> S R=-0.17 totalR=-130.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  785  S --3-> S R=-0.17 totalR=-130.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  786  S --3-> S R=-0.17 totalR=-131.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  787  S --3-> S R=-0.17 totalR=-131.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  788  S --3-> S R=-0.17 totalR=-131.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  789  S --3-> S R=-0.17 totalR=-131.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  790  S --3-> S R=-0.17 totalR=-131.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  791  S --3-> S R=-0.17 totalR=-131.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  792  S --3-> S R=-0.17 totalR=-132.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  793  S --3-> S R=-0.17 totalR=-132.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  794  S --3-> S R=-0.17 totalR=-132.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  795  S --3-> S R=-0.17 totalR=-132.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  796  S --3-> S R=-0.17 totalR=-132.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  797  S --3-> S R=-0.17 totalR=-132.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  798  S --3-> S R=-0.17 totalR=-133.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  799  S --3-> S R=-0.17 totalR=-133.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  800  S --3-> S R=-0.17 totalR=-133.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  801  S --3-> S R=-0.17 totalR=-133.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  802  S --3-> S R=-0.17 totalR=-133.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  803  S --3-> S R=-0.17 totalR=-133.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  804  S --3-> S R=-0.17 totalR=-134.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  805  S --3-> S R=-0.17 totalR=-134.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  806  S --3-> S R=-0.17 totalR=-134.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  807  S --3-> S R=-0.17 totalR=-134.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  808  S --3-> S R=-0.17 totalR=-134.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  809  S --3-> S R=-0.17 totalR=-134.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  810  S --3-> S R=-0.17 totalR=-135.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  811  S --3-> S R=-0.17 totalR=-135.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  812  S --3-> S R=-0.17 totalR=-135.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  813  S --3-> S R=-0.17 totalR=-135.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  814  S --3-> S R=-0.17 totalR=-135.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  815  S --3-> S R=-0.17 totalR=-135.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  816  S --3-> S R=-0.17 totalR=-136.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  817  S --3-> S R=-0.17 totalR=-136.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  818  S --3-> S R=-0.17 totalR=-136.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  819  S --3-> S R=-0.17 totalR=-136.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  820  S --3-> S R=-0.17 totalR=-136.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  821  S --3-> S R=-0.17 totalR=-136.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  822  S --3-> S R=-0.17 totalR=-137.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  823  S --3-> S R=-0.17 totalR=-137.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  824  S --3-> S R=-0.17 totalR=-137.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  825  S --3-> S R=-0.17 totalR=-137.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  826  S --3-> S R=-0.17 totalR=-137.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  827  S --3-> S R=-0.17 totalR=-137.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  828  S --3-> S R=-0.17 totalR=-138.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  829  S --3-> S R=-0.17 totalR=-138.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  830  S --3-> S R=-0.17 totalR=-138.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  831  S --3-> S R=-0.17 totalR=-138.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  832  S --3-> S R=-0.17 totalR=-138.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  833  S --3-> S R=-0.17 totalR=-138.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  834  S --3-> S R=-0.17 totalR=-139.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  835  S --3-> S R=-0.17 totalR=-139.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  836  S --3-> S R=-0.17 totalR=-139.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  837  S --3-> S R=-0.17 totalR=-139.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  838  S --3-> S R=-0.17 totalR=-139.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  839  S --3-> S R=-0.17 totalR=-139.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  840  S --3-> S R=-0.17 totalR=-140.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  841  S --3-> S R=-0.17 totalR=-140.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  842  S --3-> S R=-0.17 totalR=-140.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  843  S --3-> S R=-0.17 totalR=-140.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  844  S --3-> S R=-0.17 totalR=-140.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  845  S --3-> S R=-0.17 totalR=-140.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  846  S --3-> S R=-0.17 totalR=-141.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  847  S --3-> S R=-0.17 totalR=-141.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  848  S --3-> S R=-0.17 totalR=-141.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  849  S --3-> S R=-0.17 totalR=-141.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  850  S --3-> S R=-0.17 totalR=-141.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  851  S --3-> S R=-0.17 totalR=-141.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  852  S --3-> S R=-0.17 totalR=-142.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  853  S --3-> S R=-0.17 totalR=-142.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  854  S --3-> S R=-0.17 totalR=-142.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  855  S --3-> S R=-0.17 totalR=-142.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  856  S --3-> S R=-0.17 totalR=-142.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  857  S --3-> S R=-0.17 totalR=-142.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  858  S --3-> S R=-0.17 totalR=-143.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  859  S --3-> S R=-0.17 totalR=-143.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  860  S --3-> S R=-0.17 totalR=-143.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  861  S --3-> S R=-0.17 totalR=-143.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  862  S --3-> S R=-0.17 totalR=-143.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  863  S --3-> S R=-0.17 totalR=-143.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  864  S --3-> S R=-0.17 totalR=-144.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  865  S --3-> S R=-0.17 totalR=-144.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  866  S --3-> S R=-0.17 totalR=-144.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  867  S --3-> S R=-0.17 totalR=-144.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  868  S --3-> S R=-0.17 totalR=-144.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  869  S --3-> S R=-0.17 totalR=-144.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  870  S --3-> S R=-0.17 totalR=-145.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  871  S --3-> S R=-0.17 totalR=-145.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  872  S --3-> S R=-0.17 totalR=-145.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  873  S --3-> S R=-0.17 totalR=-145.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  874  S --3-> S R=-0.17 totalR=-145.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  875  S --3-> S R=-0.17 totalR=-145.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  876  S --3-> S R=-0.17 totalR=-146.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  877  S --3-> S R=-0.17 totalR=-146.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  878  S --3-> S R=-0.17 totalR=-146.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  879  S --3-> S R=-0.17 totalR=-146.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  880  S --3-> S R=-0.17 totalR=-146.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  881  S --3-> S R=-0.17 totalR=-146.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  882  S --3-> S R=-0.17 totalR=-147.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  883  S --3-> S R=-0.17 totalR=-147.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  884  S --3-> S R=-0.17 totalR=-147.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  885  S --3-> S R=-0.17 totalR=-147.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  886  S --3-> S R=-0.17 totalR=-147.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  887  S --3-> S R=-0.17 totalR=-147.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  888  S --3-> S R=-0.17 totalR=-148.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  889  S --3-> S R=-0.17 totalR=-148.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  890  S --3-> S R=-0.17 totalR=-148.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  891  S --3-> S R=-0.17 totalR=-148.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  892  S --3-> S R=-0.17 totalR=-148.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  893  S --3-> S R=-0.17 totalR=-148.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  894  S --3-> S R=-0.17 totalR=-149.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  895  S --3-> S R=-0.17 totalR=-149.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  896  S --3-> S R=-0.17 totalR=-149.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  897  S --3-> S R=-0.17 totalR=-149.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  898  S --3-> S R=-0.17 totalR=-149.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  899  S --3-> S R=-0.17 totalR=-149.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  900  S --3-> S R=-0.17 totalR=-150.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  901  S --3-> S R=-0.17 totalR=-150.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  902  S --3-> S R=-0.17 totalR=-150.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  903  S --3-> S R=-0.17 totalR=-150.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  904  S --3-> S R=-0.17 totalR=-150.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  905  S --3-> S R=-0.17 totalR=-150.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  906  S --3-> S R=-0.17 totalR=-151.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  907  S --3-> S R=-0.17 totalR=-151.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  908  S --3-> S R=-0.17 totalR=-151.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  909  S --3-> S R=-0.17 totalR=-151.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  910  S --3-> S R=-0.17 totalR=-151.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  911  S --3-> S R=-0.17 totalR=-151.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  912  S --3-> S R=-0.17 totalR=-152.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  913  S --3-> S R=-0.17 totalR=-152.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  914  S --3-> S R=-0.17 totalR=-152.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  915  S --3-> S R=-0.17 totalR=-152.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  916  S --3-> S R=-0.17 totalR=-152.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  917  S --3-> S R=-0.17 totalR=-152.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  918  S --3-> S R=-0.17 totalR=-153.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  919  S --3-> S R=-0.17 totalR=-153.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  920  S --3-> S R=-0.17 totalR=-153.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  921  S --3-> S R=-0.17 totalR=-153.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  922  S --3-> S R=-0.17 totalR=-153.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  923  S --3-> S R=-0.17 totalR=-153.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  924  S --3-> S R=-0.17 totalR=-154.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  925  S --3-> S R=-0.17 totalR=-154.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  926  S --3-> S R=-0.17 totalR=-154.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  927  S --3-> S R=-0.17 totalR=-154.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  928  S --3-> S R=-0.17 totalR=-154.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  929  S --3-> S R=-0.17 totalR=-154.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  930  S --3-> S R=-0.17 totalR=-155.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  931  S --3-> S R=-0.17 totalR=-155.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  932  S --3-> S R=-0.17 totalR=-155.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  933  S --3-> S R=-0.17 totalR=-155.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  934  S --3-> S R=-0.17 totalR=-155.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  935  S --3-> S R=-0.17 totalR=-155.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  936  S --3-> S R=-0.17 totalR=-156.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  937  S --3-> S R=-0.17 totalR=-156.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  938  S --3-> S R=-0.17 totalR=-156.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  939  S --3-> S R=-0.17 totalR=-156.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  940  S --3-> S R=-0.17 totalR=-156.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  941  S --3-> S R=-0.17 totalR=-156.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  942  S --3-> S R=-0.17 totalR=-157.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  943  S --3-> S R=-0.17 totalR=-157.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  944  S --3-> S R=-0.17 totalR=-157.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  945  S --3-> S R=-0.17 totalR=-157.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  946  S --3-> S R=-0.17 totalR=-157.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  947  S --3-> S R=-0.17 totalR=-157.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  948  S --3-> S R=-0.17 totalR=-158.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  949  S --3-> S R=-0.17 totalR=-158.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  950  S --3-> S R=-0.17 totalR=-158.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  951  S --3-> S R=-0.17 totalR=-158.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  952  S --3-> S R=-0.17 totalR=-158.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  953  S --3-> S R=-0.17 totalR=-158.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  954  S --3-> S R=-0.17 totalR=-159.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  955  S --3-> S R=-0.17 totalR=-159.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  956  S --3-> S R=-0.17 totalR=-159.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  957  S --3-> S R=-0.17 totalR=-159.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  958  S --3-> S R=-0.17 totalR=-159.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  959  S --3-> S R=-0.17 totalR=-159.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  960  S --3-> S R=-0.17 totalR=-160.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  961  S --3-> S R=-0.17 totalR=-160.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  962  S --3-> S R=-0.17 totalR=-160.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  963  S --3-> S R=-0.17 totalR=-160.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  964  S --3-> S R=-0.17 totalR=-160.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  965  S --3-> S R=-0.17 totalR=-160.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  966  S --3-> S R=-0.17 totalR=-161.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  967  S --3-> S R=-0.17 totalR=-161.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  968  S --3-> S R=-0.17 totalR=-161.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  969  S --3-> S R=-0.17 totalR=-161.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  970  S --3-> S R=-0.17 totalR=-161.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  971  S --3-> S R=-0.17 totalR=-161.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  972  S --3-> S R=-0.17 totalR=-162.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  973  S --3-> S R=-0.17 totalR=-162.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  974  S --3-> S R=-0.17 totalR=-162.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  975  S --3-> S R=-0.17 totalR=-162.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  976  S --3-> S R=-0.17 totalR=-162.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  977  S --3-> S R=-0.17 totalR=-162.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  978  S --3-> S R=-0.17 totalR=-163.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  979  S --3-> S R=-0.17 totalR=-163.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  980  S --3-> S R=-0.17 totalR=-163.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  981  S --3-> S R=-0.17 totalR=-163.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  982  S --3-> S R=-0.17 totalR=-163.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  983  S --3-> S R=-0.17 totalR=-163.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  984  S --3-> S R=-0.17 totalR=-164.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  985  S --3-> S R=-0.17 totalR=-164.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  986  S --3-> S R=-0.17 totalR=-164.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  987  S --3-> S R=-0.17 totalR=-164.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  988  S --3-> S R=-0.17 totalR=-164.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  989  S --3-> S R=-0.17 totalR=-164.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  990  S --3-> S R=-0.17 totalR=-165.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  991  S --3-> S R=-0.17 totalR=-165.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  992  S --3-> S R=-0.17 totalR=-165.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  993  S --3-> S R=-0.17 totalR=-165.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  994  S --3-> S R=-0.17 totalR=-165.67 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  995  S --3-> S R=-0.17 totalR=-165.83 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  996  S --3-> S R=-0.17 totalR=-166.00 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  997  S --3-> S R=-0.17 totalR=-166.17 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  998  S --3-> S R=-0.17 totalR=-166.33 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step:  999  S --3-> S R=-0.17 totalR=-166.50 cost=1000 customerR=   0 optimum=6000
Episode:    0   Step: 1000  S --3-> S R=-0.17 totalR=-166.67 cost=1000 customerR=   0 optimum=6000

In [0]:
%time baseline.find_optimum()


Scaled reward: 0.7583333333333333
Perfect path ['S', 'B', 'C', 'M', 'N', 'O', 'G', 'F', 'D', 'F', 'E', 'A', 'B', 'S']
CPU times: user 78.5 ms, sys: 542 µs, total: 79 ms
Wall time: 80.4 ms
Out[0]:
{'cost': 1450,
 'path': ['S',
  'B',
  'C',
  'M',
  'N',
  'O',
  'G',
  'F',
  'D',
  'F',
  'E',
  'A',
  'B',
  'S'],
 'position': 'S',
 'reward': 6000,
 'rewards': {'A': 0,
  'B': 0,
  'C': 0,
  'D': 0,
  'E': 0,
  'F': 0,
  'G': 0,
  'H': 0,
  'K': 0,
  'L': 0,
  'M': 0,
  'N': 0,
  'O': 0,
  'S': 0},
 'scaled_reward': 0.7583333333333333}

Evaluation


In [0]:
baseline = Baseline(env)
perfect_score_mean, perfect_score_std, test_score_mean, test_score_std = baseline.score(model, sample_runs=100)

In [0]:
# perfect scores
perfect_score_mean, perfect_score_std


Out[0]:
(0.7325833333333331, 0.034117749535005)

In [0]:
# test scores for our model
test_score_mean, test_score_std


Out[0]:
(-166.6666666666659, 2.842170943040401e-14)

In [0]: