异步调参-checkpoint



In [1]:
import numpy as np
from ACNet import ACNet
import gym

STATE_SIZE = 4
ACTION_SIZE = 2
MAX_EPISODE_LENGTH = 100
MAX_EPISODES = 100
GAMMA = .99
GAME = 'CartPole-v0'
GD = {}

class ExplorerFramework(object):
    def __init__(self, access, name, state_size, action_size):
        self.Access = access
        self.AC = ACNet(self.Access, state_size, action_size, name)
        self.env = gym.make(GAME).unwrapped
        self.name = name

    def get_bootstrap(self, done, sess, next_state):
        if done:
            terminal = 0
        else:
            terminal = self.AC.get_value(
                sess, np.expand_dims(next_state, axis=0))[0][0]
        return terminal

    def get_output(self, sess, inputs, actions, targets):
        return self.AC.get_losses(sess, inputs, actions, targets)

    def run(self, sess, max_episodes, t_max= 32):
        episode_score_list = []
        episode = 0
        while episode < max_episodes:
            episode += 1
            episode_socre = self.run_episode(sess, t_max)
            episode_score_list.append(episode_socre)
        GD[str(self.name)] = episode_score_list

    def run_episode(self, sess, t_max= 32):
        t_start = t = 0
        episode_score = 0
        buffer_state = []
        buffer_action = []
        buffer_reward = []

        self.AC.init_network(sess)
        state = self.env.reset()
        while True:
            t += 1
            action = self.AC.action_choose(sess, state)
            next_state, reward, done, info = self.env.step(action)
            # buffer for loop
            episode_score += reward
            buffer_state.append(state)
            buffer_action.append(action)
            buffer_reward.append(reward)
            state = next_state

            if t - t_start == t_max or done:
                t_start = t
                terminal = self.get_bootstrap(done, sess, next_state)

                buffer_target = []
                for r in buffer_reward[::-1]:
                    terminal = r + GAMMA * terminal
                    buffer_target.append(terminal)
                buffer_target.reverse()

                inputs = np.vstack(buffer_state)
                actions = np.squeeze(np.vstack(buffer_action), axis=1)
                targets = np.squeeze(np.vstack(buffer_target), axis=1)
                buffer_state = []
                buffer_action = []
                buffer_reward = []
                # update Access gradients
                self.AC.train_step(sess, inputs, actions, targets)
                # if self.name == 'W0':
                #    print(t, self.get_output(sess, inputs, actions, targets))

                # update local network
                self.AC.init_network(sess)
                #if self.name == 'W0':
                #    print(t, self.get_output(sess, inputs, actions, targets))

            if done or t > MAX_EPISODE_LENGTH:
                if self.name == 'W0':
                    outputs = tuple(self.get_output(sess, inputs, actions, targets))
                    print(t)
                    print('actor: %f, actor_grad: %f, policy mean: %f, policy: %f, entropy: %f, actor_norm: %f, critic: %f, critic_grad: %f, value: %f, critic_norm: %f, value_mean: %f, advantage: %f'%outputs)
                return episode_score

In [2]:
import multiprocessing
import threading
import tensorflow as tf
from Access import Access


NUMS_CPU = multiprocessing.cpu_count()
state_size = 4
action_size = 2
max_episodes = 1000


tf.reset_default_graph()
sess = tf.Session()
with tf.device("/cpu:0"):
    A = Access(state_size, action_size)
    F_list = []
    for i in range(NUMS_CPU):
        F_list.append(ExplorerFramework(A, 'W%i' % i, state_size, action_size))

    COORD = tf.train.Coordinator()
    sess.run(tf.global_variables_initializer())
    sess.graph.finalize()

    threads_list = []
    for ac in F_list:
        job = lambda: ac.run(sess, max_episodes)
        t = threading.Thread(target=job)
        t.start()
        threads_list.append(t)
    COORD.join(threads_list)


graph W0
graph W1
graph W2
graph W3
graph W4
graph W5
graph W6
graph W7
graph W8
graph W9
graph W10
graph W11
graph W12
graph W13
graph W14
graph W15
graph W16
graph W17
graph W18
graph W19
graph W20
graph W21
graph W22
graph W23
graph W24
graph W25
graph W26
graph W27
graph W28
graph W29
graph W30
graph W31
graph W32
graph W33
graph W34
graph W35
graph W36
graph W37
graph W38
graph W39
24
actor: 7.812781, actor_grad: 1.346462, policy mean: 0.693037, policy: 7.791586, entropy: -0.069283, actor_norm: 0.090477, critic: 164.012405, critic_grad: 26.890173, value: 163.078079, critic_norm: 0.934333, value_mean: 0.398911, advantage: 11.193321
17
actor: 5.358743, actor_grad: 1.952462, policy mean: 0.689872, policy: 5.337696, entropy: -0.069310, actor_norm: 0.090356, critic: 81.150177, critic_grad: 20.393137, value: 80.189346, critic_norm: 0.960828, value_mean: 0.795888, advantage: 7.741619
21
actor: 6.225276, actor_grad: 1.391839, policy mean: 0.697941, policy: 6.204336, entropy: -0.069293, actor_norm: 0.090232, critic: 110.285431, critic_grad: 28.457619, value: 109.252708, critic_norm: 1.032722, value_mean: 1.389001, advantage: 8.911281
19
actor: 4.915297, actor_grad: 1.139397, policy mean: 0.690929, policy: 4.894560, entropy: -0.069312, actor_norm: 0.090049, critic: 75.969261, critic_grad: 29.763460, value: 74.803452, critic_norm: 1.165811, value_mean: 2.345189, advantage: 7.079515
23
actor: 4.660617, actor_grad: 0.553329, policy mean: 0.692100, policy: 4.640096, entropy: -0.069314, actor_norm: 0.089835, critic: 81.729195, critic_grad: 45.297878, value: 80.305580, critic_norm: 1.423616, value_mean: 4.459117, advantage: 6.705292
48
actor: -1.511194, actor_grad: 0.590060, policy mean: 0.691684, policy: -1.531125, entropy: -0.069314, actor_norm: 0.089245, critic: 23.653564, critic_grad: 40.296818, value: 21.584896, critic_norm: 2.068668, value_mean: 10.300561, advantage: -2.211065
16
actor: -1.999608, actor_grad: 1.043831, policy mean: 0.691722, policy: -2.019187, entropy: -0.069249, actor_norm: 0.088827, critic: 26.503191, critic_grad: 51.897133, value: 24.406845, critic_norm: 2.096345, value_mean: 10.948929, advantage: -2.859433
30
actor: 3.224799, actor_grad: 0.482422, policy mean: 0.688559, policy: 3.205638, entropy: -0.069282, actor_norm: 0.088442, critic: 69.729065, critic_grad: 68.710335, value: 67.796608, critic_norm: 1.932461, value_mean: 9.482555, advantage: 4.618568
19
actor: 0.086568, actor_grad: 1.105798, policy mean: 0.690078, policy: 0.067965, entropy: -0.069295, actor_norm: 0.087898, critic: 22.205856, critic_grad: 29.638811, value: 20.324312, critic_norm: 1.881544, value_mean: 9.313327, advantage: 0.111377
13
actor: -1.991609, actor_grad: 1.151755, policy mean: 0.707656, policy: -2.009916, entropy: -0.069224, actor_norm: 0.087531, critic: 23.047329, critic_grad: 48.561829, value: 21.227751, critic_norm: 1.819579, value_mean: 9.523515, advantage: -2.795966
12
actor: -2.120147, actor_grad: 2.498731, policy mean: 0.745767, policy: -2.138214, entropy: -0.069104, actor_norm: 0.087171, critic: 20.374075, critic_grad: 66.575325, value: 18.603949, critic_norm: 1.770126, value_mean: 8.995826, advantage: -2.728307
14
actor: -0.649769, actor_grad: 0.380495, policy mean: 0.686784, policy: -0.667024, entropy: -0.069297, actor_norm: 0.086552, critic: 15.282134, critic_grad: 20.435003, value: 13.579741, critic_norm: 1.702393, value_mean: 8.143862, advantage: -0.959323
42
actor: -5.504827, actor_grad: 2.790707, policy mean: 0.704293, policy: -5.520951, entropy: -0.069169, actor_norm: 0.085293, critic: 70.187737, critic_grad: 151.516739, value: 68.178635, critic_norm: 2.009100, value_mean: 13.032537, advantage: -7.694283
54
actor: 0.891378, actor_grad: 0.824840, policy mean: 0.689396, policy: 0.873054, entropy: -0.068614, actor_norm: 0.086939, critic: 37.364876, critic_grad: 18.741976, value: 35.728073, critic_norm: 1.636801, value_mean: 9.221982, advantage: 1.511783
19
actor: -0.765281, actor_grad: 1.940711, policy mean: 0.754301, policy: -0.789680, entropy: -0.065697, actor_norm: 0.090095, critic: 24.716658, critic_grad: 31.164259, value: 22.987083, critic_norm: 1.729574, value_mean: 9.914083, advantage: -0.489378
39
actor: -6.640297, actor_grad: 3.398089, policy mean: 0.694341, policy: -6.686653, entropy: -0.053337, actor_norm: 0.099693, critic: 115.263474, critic_grad: 224.662018, value: 113.144669, critic_norm: 2.118801, value_mean: 14.362844, advantage: -10.441851
32
actor: 1.299745, actor_grad: 0.434126, policy mean: 0.606041, policy: 1.251158, entropy: -0.061267, actor_norm: 0.109855, critic: 64.469398, critic_grad: 36.190548, value: 62.398376, critic_norm: 2.071018, value_mean: 12.958664, advantage: 1.957128
65
actor: -5.440955, actor_grad: 13.293650, policy mean: 0.362473, policy: -5.492315, entropy: -0.061425, actor_norm: 0.112785, critic: 232.019699, critic_grad: 352.194031, value: 229.593292, critic_norm: 2.426406, value_mean: 16.152336, advantage: -15.152336
37
actor: -9.361421, actor_grad: 5.523485, policy mean: 0.513017, policy: -9.426874, entropy: -0.056712, actor_norm: 0.122164, critic: 342.173767, critic_grad: 527.996948, value: 339.408142, critic_norm: 2.765616, value_mean: 21.319099, advantage: -18.358801
64
actor: -6.683136, actor_grad: 4.781673, policy mean: 0.601413, policy: -6.750669, entropy: -0.053093, actor_norm: 0.120627, critic: 173.718277, critic_grad: 350.905243, value: 170.461639, critic_norm: 3.256633, value_mean: 25.340000, advantage: -10.424209
93
actor: -15.028400, actor_grad: 22.254862, policy mean: 0.494350, policy: -15.124894, entropy: -0.051176, actor_norm: 0.147669, critic: 896.603882, critic_grad: 1968.634766, value: 891.362427, critic_norm: 5.241434, value_mean: 43.073727, advantage: -29.383944
101
actor: -11.140105, actor_grad: 13.454467, policy mean: 0.663364, policy: -11.263744, entropy: -0.040578, actor_norm: 0.164217, critic: 334.527710, critic_grad: 2143.628174, value: 325.031250, critic_norm: 9.496468, value_mean: 108.483635, advantage: -17.474363
101
actor: -2.181667, actor_grad: 3.062660, policy mean: 0.603220, policy: -2.310618, entropy: -0.044911, actor_norm: 0.173863, critic: 32.178146, critic_grad: 346.224365, value: 22.887770, critic_norm: 9.290376, value_mean: 91.860855, advantage: -3.736918
101
actor: -8.147100, actor_grad: 5.362246, policy mean: 0.495220, policy: -8.246673, entropy: -0.057601, actor_norm: 0.157173, critic: 349.713074, critic_grad: 4819.658691, value: 337.382324, critic_norm: 12.330753, value_mean: 271.171265, advantage: -16.761089
101
actor: 14.895636, actor_grad: 30.091040, policy mean: 0.463492, policy: 14.713367, entropy: -0.042274, actor_norm: 0.224542, critic: 1113.742065, critic_grad: 5470.120117, value: 1103.035156, critic_norm: 10.706897, value_mean: 182.005524, advantage: 32.105675
101
actor: -2.528033, actor_grad: 6.117862, policy mean: 0.532049, policy: -2.698013, entropy: -0.044676, actor_norm: 0.214656, critic: 60.402481, critic_grad: 530.833618, value: 51.085960, critic_norm: 9.316523, value_mean: 70.039230, advantage: -5.146972
37
actor: -251.926987, actor_grad: 480.449646, policy mean: 1.706982, policy: -252.139359, entropy: -0.018675, actor_norm: 0.231049, critic: 22176.734375, critic_grad: 18464.255859, value: 22165.628906, critic_norm: 11.105744, value_mean: 151.689270, advantage: -148.728973
101
actor: 9.377790, actor_grad: 17.000336, policy mean: 0.539178, policy: 9.185712, entropy: -0.039703, actor_norm: 0.231781, critic: 316.978729, critic_grad: 3501.799072, value: 303.692261, critic_norm: 13.286461, value_mean: 245.840668, advantage: 17.003674
101
actor: 33.627205, actor_grad: 128.842667, policy mean: 0.715130, policy: 33.378693, entropy: -0.027188, actor_norm: 0.275701, critic: 2249.147217, critic_grad: 9346.453125, value: 2235.463623, critic_norm: 13.683690, value_mean: 222.198608, advantage: 45.885303
52
actor: -299.416809, actor_grad: 618.490356, policy mean: 1.329576, policy: -299.707458, entropy: -0.023213, actor_norm: 0.313880, critic: 45865.597656, critic_grad: 36725.652344, value: 45851.882812, critic_norm: 13.714481, value_mean: 223.420822, advantage: -213.556885
101
actor: 5.682774, actor_grad: 20.274513, policy mean: 0.551240, policy: 5.410029, entropy: -0.031718, actor_norm: 0.304462, critic: 109.014381, critic_grad: 2009.194824, value: 94.018616, critic_norm: 14.995769, value_mean: 255.884979, advantage: 9.302170
101
actor: 43.289288, actor_grad: 287.316284, policy mean: 0.727144, policy: 42.992371, entropy: -0.029011, actor_norm: 0.325928, critic: 2573.041504, critic_grad: 11358.971680, value: 2561.749512, critic_norm: 11.292064, value_mean: 166.449173, advantage: 49.079918
101
actor: 15.738564, actor_grad: 91.434113, policy mean: 0.661996, policy: 15.387138, entropy: -0.021652, actor_norm: 0.373077, critic: 632.651611, critic_grad: 3492.625488, value: 625.324890, critic_norm: 7.326715, value_mean: 72.623169, advantage: 24.475563
101
actor: 5.328609, actor_grad: 21.651194, policy mean: 0.487083, policy: 5.001754, entropy: -0.024983, actor_norm: 0.351838, critic: 111.081131, critic_grad: 772.335022, value: 104.860199, critic_norm: 6.220935, value_mean: 44.270462, advantage: 10.205029
101
actor: 2.889487, actor_grad: 17.949484, policy mean: 0.542589, policy: 2.602755, entropy: -0.026024, actor_norm: 0.312757, critic: 27.009766, critic_grad: 299.540619, value: 20.825941, critic_norm: 6.183824, value_mean: 28.820539, advantage: 3.972557
101
actor: 1.292862, actor_grad: 4.757530, policy mean: 0.478003, policy: 1.064226, entropy: -0.030446, actor_norm: 0.259083, critic: 21.573938, critic_grad: 145.443161, value: 14.582340, critic_norm: 6.991598, value_mean: 69.704239, advantage: 1.827988
101
actor: -6.540208, actor_grad: 12.789588, policy mean: 0.351111, policy: -6.673600, entropy: -0.052060, actor_norm: 0.185452, critic: 362.318176, critic_grad: 1873.603149, value: 352.658020, critic_norm: 9.660172, value_mean: 104.061295, advantage: -18.502632
46
actor: -125.503769, actor_grad: 247.221558, policy mean: 0.764649, policy: -125.639946, entropy: -0.029927, actor_norm: 0.166105, critic: 27157.861328, critic_grad: 22869.201172, value: 27145.673828, critic_norm: 12.188141, value_mean: 171.693283, advantage: -164.508743
101
actor: 24.822548, actor_grad: 80.375526, policy mean: 0.847882, policy: 24.643406, entropy: -0.031454, actor_norm: 0.210595, critic: 1037.743896, critic_grad: 7514.974609, value: 1024.395264, critic_norm: 13.348658, value_mean: 311.237976, advantage: 29.099873
90
actor: -334.491486, actor_grad: 1273.755981, policy mean: 0.690660, policy: -334.686005, entropy: -0.033650, actor_norm: 0.228194, critic: 213306.296875, critic_grad: 172205.828125, value: 213289.531250, critic_norm: 16.767403, value_mean: 470.595703, advantage: -458.156189
101
actor: 29.576153, actor_grad: 70.894104, policy mean: 0.502390, policy: 29.351763, entropy: -0.037978, actor_norm: 0.262370, critic: 3698.489990, critic_grad: 18656.449219, value: 3681.331055, critic_norm: 17.158838, value_mean: 407.147980, advantage: 56.547375
101
actor: 2.925088, actor_grad: 8.609086, policy mean: 0.318830, policy: 2.683558, entropy: -0.038423, actor_norm: 0.279953, critic: 85.303383, critic_grad: 1824.618896, value: 71.519592, critic_norm: 13.783789, value_mean: 273.790405, advantage: 8.419330
98
actor: -145.307159, actor_grad: 2497.243652, policy mean: 0.407617, policy: -145.537491, entropy: -0.039928, actor_norm: 0.270269, critic: 130230.296875, critic_grad: 186736.171875, value: 130214.843750, critic_norm: 15.454504, value_mean: 362.325562, advantage: -360.830566
101
actor: 14.592379, actor_grad: 82.755814, policy mean: 0.365112, policy: 14.300418, entropy: -0.033460, actor_norm: 0.325421, critic: 1378.003662, critic_grad: 6082.130859, value: 1367.678345, critic_norm: 10.325326, value_mean: 152.868744, advantage: 35.008324
101
actor: 20.048805, actor_grad: 64.137489, policy mean: 1.168328, policy: 19.719013, entropy: -0.018724, actor_norm: 0.348516, critic: 342.609070, critic_grad: 2247.991943, value: 335.632721, critic_norm: 6.976361, value_mean: 64.012146, advantage: 17.647713
101
actor: 4.834393, actor_grad: 24.955347, policy mean: 0.632969, policy: 4.517967, entropy: -0.023386, actor_norm: 0.339813, critic: 57.753132, critic_grad: 604.977600, value: 52.487320, critic_norm: 5.265812, value_mean: 22.487236, advantage: 7.148786
101
actor: -4.901268, actor_grad: 14.146327, policy mean: 0.441809, policy: -5.144350, entropy: -0.031727, actor_norm: 0.274809, critic: 145.270767, critic_grad: 1204.030151, value: 138.807343, critic_norm: 6.463420, value_mean: 60.942047, advantage: -11.732233
101
actor: -4.911406, actor_grad: 14.362350, policy mean: 0.723048, policy: -5.108978, entropy: -0.031538, actor_norm: 0.229110, critic: 49.838406, critic_grad: 482.827423, value: 43.743797, critic_norm: 6.094609, value_mean: 64.975098, advantage: -6.349014
101
actor: 3.194121, actor_grad: 5.669356, policy mean: 0.347734, policy: 3.035689, entropy: -0.042079, actor_norm: 0.200511, critic: 90.721313, critic_grad: 799.025574, value: 82.846245, critic_norm: 7.875066, value_mean: 112.713531, advantage: 8.571662
101
actor: -18.065001, actor_grad: 28.557026, policy mean: 0.581971, policy: -18.232731, entropy: -0.031091, actor_norm: 0.198820, critic: 1270.450562, critic_grad: 5065.912598, value: 1260.572388, critic_norm: 9.878135, value_mean: 166.169037, advantage: -34.635639
101
actor: -11.335540, actor_grad: 57.167461, policy mean: 0.541538, policy: -11.507842, entropy: -0.035206, actor_norm: 0.207508, critic: 449.803925, critic_grad: 4087.808838, value: 438.884552, critic_norm: 10.919381, value_mean: 208.683197, advantage: -20.818398
101
actor: -14.327879, actor_grad: 61.663734, policy mean: 0.538053, policy: -14.502487, entropy: -0.032172, actor_norm: 0.206780, critic: 770.640076, critic_grad: 7592.288574, value: 758.821411, critic_norm: 11.818670, value_mean: 265.887604, advantage: -27.476151
101
actor: 9.524418, actor_grad: 6.811948, policy mean: 0.358381, policy: 9.360910, entropy: -0.040705, actor_norm: 0.204212, critic: 655.822083, critic_grad: 3452.494873, value: 649.077637, critic_norm: 6.744449, value_mean: 109.034073, advantage: 25.375725
100
actor: -137.325104, actor_grad: 924.438538, policy mean: 0.958383, policy: -137.491089, entropy: -0.026854, actor_norm: 0.192835, critic: 20660.535156, critic_grad: 41656.824219, value: 20651.632812, critic_norm: 8.902457, value_mean: 146.152496, advantage: -143.677368
101
actor: 4.227572, actor_grad: 9.994851, policy mean: 0.350239, policy: 4.043837, entropy: -0.038400, actor_norm: 0.222136, critic: 134.542969, critic_grad: 925.663147, value: 129.152435, critic_norm: 5.390539, value_mean: 64.362396, advantage: 11.333213
101
actor: 3.146581, actor_grad: 3.281435, policy mean: 0.541635, policy: 2.979790, entropy: -0.033668, actor_norm: 0.200459, critic: 37.535339, critic_grad: 511.358276, value: 30.338768, critic_norm: 7.196570, value_mean: 97.821465, advantage: 5.368616
101
actor: -14.139565, actor_grad: 10.832235, policy mean: 0.465065, policy: -14.274714, entropy: -0.040052, actor_norm: 0.175201, critic: 932.889038, critic_grad: 2992.334473, value: 925.671997, critic_norm: 7.217018, value_mean: 104.934006, advantage: -30.358231
101
actor: 6.000432, actor_grad: 4.479026, policy mean: 0.467781, policy: 5.879625, entropy: -0.042154, actor_norm: 0.162961, critic: 169.428436, critic_grad: 851.868835, value: 164.158508, critic_norm: 5.269932, value_mean: 72.687996, advantage: 12.774054
101
actor: 17.494703, actor_grad: 25.722057, policy mean: 0.767003, policy: 17.392345, entropy: -0.037136, actor_norm: 0.139495, critic: 517.411011, critic_grad: 1813.581055, value: 510.926270, critic_norm: 6.484764, value_mean: 93.434677, advantage: 22.439522
101
actor: -7.233962, actor_grad: 6.058930, policy mean: 0.519538, policy: -7.317831, entropy: -0.047993, actor_norm: 0.131862, critic: 204.796783, critic_grad: 1325.079468, value: 196.981476, critic_norm: 7.815301, value_mean: 113.401146, advantage: -14.022327
101
actor: -13.958909, actor_grad: 25.852861, policy mean: 0.679002, policy: -14.046583, entropy: -0.040427, actor_norm: 0.128100, critic: 443.190460, critic_grad: 1956.371338, value: 436.070312, critic_norm: 7.120162, value_mean: 101.605019, advantage: -20.722265
101
actor: -4.925205, actor_grad: 5.542366, policy mean: 0.648012, policy: -4.971440, entropy: -0.054544, actor_norm: 0.100780, critic: 66.736397, critic_grad: 637.573059, value: 60.011547, critic_norm: 6.724846, value_mean: 97.941772, advantage: -7.679244
24
actor: -76.417381, actor_grad: 80.509811, policy mean: 0.695529, policy: -76.467407, entropy: -0.051019, actor_norm: 0.101042, critic: 11938.517578, critic_grad: 11336.269531, value: 11930.546875, critic_norm: 7.970849, value_mean: 120.590599, advantage: -108.998360
101
actor: -2.969442, actor_grad: 0.534489, policy mean: 0.615711, policy: -3.049716, entropy: -0.048647, actor_norm: 0.128922, critic: 37.398308, critic_grad: 494.143616, value: 31.140106, critic_norm: 6.258201, value_mean: 90.367149, advantage: -4.809926
101
actor: 11.470684, actor_grad: 1.088998, policy mean: 0.542030, policy: 11.397457, entropy: -0.054170, actor_norm: 0.127398, critic: 442.948029, critic_grad: 1555.166504, value: 437.091248, critic_norm: 5.856791, value_mean: 83.941574, advantage: 20.899612
101
actor: 13.778056, actor_grad: 3.442505, policy mean: 0.547646, policy: 13.684822, entropy: -0.047072, actor_norm: 0.140306, critic: 629.837402, critic_grad: 2210.176514, value: 623.327087, critic_norm: 6.510344, value_mean: 97.628799, advantage: 24.862055
101
actor: 1.814640, actor_grad: 0.115733, policy mean: 0.496811, policy: 1.736346, entropy: -0.050441, actor_norm: 0.128736, critic: 22.493610, critic_grad: 326.344482, value: 14.973007, critic_norm: 7.520603, value_mean: 110.161156, advantage: 3.575436
101
actor: -12.646859, actor_grad: 2.708481, policy mean: 0.459546, policy: -12.708522, entropy: -0.055704, actor_norm: 0.117367, critic: 759.445679, critic_grad: 2738.723389, value: 751.643738, critic_norm: 7.801962, value_mean: 117.901794, advantage: -27.342705
101
actor: 3.857090, actor_grad: 0.577110, policy mean: 0.576008, policy: 3.800551, entropy: -0.054304, actor_norm: 0.110843, critic: 54.610142, critic_grad: 579.494263, value: 49.230972, critic_norm: 5.379168, value_mean: 68.982384, advantage: 6.895732
101
actor: 8.284264, actor_grad: 2.533720, policy mean: 0.492716, policy: 8.220589, entropy: -0.052290, actor_norm: 0.115966, critic: 276.503723, critic_grad: 1437.346558, value: 269.671082, critic_norm: 6.832655, value_mean: 99.259544, advantage: 16.101101
101
actor: -12.044428, actor_grad: 5.833821, policy mean: 0.532571, policy: -12.093834, entropy: -0.053946, actor_norm: 0.103352, critic: 523.426025, critic_grad: 3229.798096, value: 515.347473, critic_norm: 8.078565, value_mean: 123.255371, advantage: -22.700367
101
actor: -11.134548, actor_grad: 16.325230, policy mean: 0.590380, policy: -11.183643, entropy: -0.053654, actor_norm: 0.102749, critic: 366.733673, critic_grad: 2297.280029, value: 360.150787, critic_norm: 6.582885, value_mean: 98.901993, advantage: -18.877592
101
actor: 7.685292, actor_grad: 1.118368, policy mean: 0.513702, policy: 7.639536, entropy: -0.056306, actor_norm: 0.102062, critic: 228.079468, critic_grad: 1173.761719, value: 222.435333, critic_norm: 5.644137, value_mean: 76.082153, advantage: 14.893232
101
actor: 18.854664, actor_grad: 19.446459, policy mean: 0.669225, policy: 18.796013, entropy: -0.046962, actor_norm: 0.105613, critic: 828.255249, critic_grad: 3280.710938, value: 821.498535, critic_norm: 6.756725, value_mean: 98.896667, advantage: 28.533794
101
actor: 6.416882, actor_grad: 4.561181, policy mean: 0.494340, policy: 6.358679, entropy: -0.056179, actor_norm: 0.114381, critic: 172.188416, critic_grad: 1747.629883, value: 164.808014, critic_norm: 7.380401, value_mean: 105.985680, advantage: 12.694092
101
actor: -11.779328, actor_grad: 6.193174, policy mean: 0.476775, policy: -11.838835, entropy: -0.056418, actor_norm: 0.115924, critic: 631.502869, critic_grad: 2435.469238, value: 623.356995, critic_norm: 8.145854, value_mean: 119.489326, advantage: -24.941040
101
actor: -1.589390, actor_grad: 1.221340, policy mean: 0.514950, policy: -1.647459, entropy: -0.053410, actor_norm: 0.111480, critic: 20.436390, critic_grad: 275.940338, value: 14.017635, critic_norm: 6.418755, value_mean: 90.943733, advantage: -3.309583
101
actor: 7.295537, actor_grad: 3.330683, policy mean: 0.608697, policy: 7.246338, entropy: -0.052022, actor_norm: 0.101221, critic: 145.363297, critic_grad: 881.957825, value: 139.276550, critic_norm: 6.086751, value_mean: 85.604927, advantage: 11.763164
101
actor: 13.902994, actor_grad: 16.129887, policy mean: 0.560807, policy: 13.833380, entropy: -0.048015, actor_norm: 0.117629, critic: 611.043091, critic_grad: 1827.608887, value: 605.023071, critic_norm: 6.020040, value_mean: 84.306770, advantage: 24.573997
101
actor: 10.843301, actor_grad: 3.232835, policy mean: 0.703506, policy: 10.783133, entropy: -0.044673, actor_norm: 0.104841, critic: 328.469818, critic_grad: 2579.519043, value: 321.135529, critic_norm: 7.334296, value_mean: 105.602333, advantage: 17.106972
101
actor: -15.444454, actor_grad: 22.982359, policy mean: 0.568180, policy: -15.514215, entropy: -0.047991, actor_norm: 0.117752, critic: 771.297119, critic_grad: 3384.585938, value: 763.349121, critic_norm: 7.948009, value_mean: 124.893990, advantage: -27.483919
101
actor: -4.519625, actor_grad: 4.112849, policy mean: 0.915506, policy: -4.621637, entropy: -0.037170, actor_norm: 0.139183, critic: 52.587017, critic_grad: 678.503662, value: 46.367264, critic_norm: 6.219754, value_mean: 92.756554, advantage: -6.337010
101
actor: 15.500774, actor_grad: 19.646729, policy mean: 0.579823, policy: 15.411772, entropy: -0.044430, actor_norm: 0.133433, critic: 696.771301, critic_grad: 2035.084717, value: 690.591980, critic_norm: 6.179306, value_mean: 88.149025, advantage: 26.247442
101
actor: 3.088613, actor_grad: 5.042894, policy mean: 0.607518, policy: 3.018174, entropy: -0.047070, actor_norm: 0.117508, critic: 45.071991, critic_grad: 672.500549, value: 37.134598, critic_norm: 7.937392, value_mean: 121.282631, advantage: 5.049022
101
actor: -21.184559, actor_grad: 35.187908, policy mean: 0.700992, policy: -21.271446, entropy: -0.041388, actor_norm: 0.128275, critic: 1011.138000, critic_grad: 4405.261230, value: 1003.923218, critic_norm: 7.214798, value_mean: 118.559906, advantage: -31.440886
101
actor: -2.326661, actor_grad: 0.606633, policy mean: 0.511302, policy: -2.394228, entropy: -0.052171, actor_norm: 0.119738, critic: 29.212280, critic_grad: 384.160858, value: 22.936226, critic_norm: 6.276053, value_mean: 90.487015, advantage: -4.680019
101
actor: 13.287412, actor_grad: 8.830060, policy mean: 0.588022, policy: 13.221094, entropy: -0.050714, actor_norm: 0.117032, critic: 502.136993, critic_grad: 1637.837402, value: 496.107544, critic_norm: 6.029436, value_mean: 84.940605, advantage: 22.241600
101
actor: 2.953306, actor_grad: 3.922835, policy mean: 0.625091, policy: 2.886487, entropy: -0.045792, actor_norm: 0.112612, critic: 47.149376, critic_grad: 522.862854, value: 39.564720, critic_norm: 7.584657, value_mean: 107.481903, advantage: 3.971745
101
actor: -9.073421, actor_grad: 3.402885, policy mean: 0.374006, policy: -9.160170, entropy: -0.053153, actor_norm: 0.139902, critic: 614.046997, critic_grad: 2169.434570, value: 606.689087, critic_norm: 7.357902, value_mean: 107.403290, advantage: -24.597530
101
actor: 1.927132, actor_grad: 2.199693, policy mean: 0.483230, policy: 1.842982, entropy: -0.048608, actor_norm: 0.132758, critic: 23.157459, critic_grad: 300.696899, value: 17.037577, critic_norm: 6.119883, value_mean: 85.167358, advantage: 3.995589
101
actor: 9.369136, actor_grad: 4.554491, policy mean: 0.522614, policy: 9.292817, entropy: -0.050993, actor_norm: 0.127312, critic: 306.229706, critic_grad: 1390.903442, value: 299.943756, critic_norm: 6.285964, value_mean: 91.841194, advantage: 17.162901
101
actor: -2.989709, actor_grad: 1.276840, policy mean: 0.547377, policy: -3.050538, entropy: -0.050820, actor_norm: 0.111649, critic: 42.150440, critic_grad: 509.466309, value: 34.583336, critic_norm: 7.567105, value_mean: 110.579269, advantage: -5.698996
101
actor: -10.084883, actor_grad: 2.558270, policy mean: 0.508159, policy: -10.119926, entropy: -0.060121, actor_norm: 0.095164, critic: 408.201508, critic_grad: 1714.179565, value: 401.080658, critic_norm: 7.120837, value_mean: 105.462959, advantage: -19.916857
84
actor: -54.748959, actor_grad: 59.655991, policy mean: 0.787813, policy: -54.806622, entropy: -0.038145, actor_norm: 0.095806, critic: 4850.136230, critic_grad: 5332.727051, value: 4844.245117, critic_norm: 5.890936, value_mean: 79.287827, advantage: -69.423889
101
actor: 10.612687, actor_grad: 12.474714, policy mean: 0.523182, policy: 10.544489, entropy: -0.050479, actor_norm: 0.118677, critic: 428.583771, critic_grad: 1700.505371, value: 421.727509, critic_norm: 6.856274, value_mean: 97.191139, advantage: 20.460480
101
actor: -8.518098, actor_grad: 1.539656, policy mean: 0.476321, policy: -8.578734, entropy: -0.053002, actor_norm: 0.113639, critic: 333.627777, critic_grad: 1760.020630, value: 325.543945, critic_norm: 8.083838, value_mean: 119.750504, advantage: -18.035376
101
actor: -10.845603, actor_grad: 3.667545, policy mean: 0.505678, policy: -10.896858, entropy: -0.055899, actor_norm: 0.107154, critic: 482.948181, critic_grad: 1924.546265, value: 475.630341, critic_norm: 7.317845, value_mean: 109.985931, advantage: -21.677937
101
actor: -5.555051, actor_grad: 5.245319, policy mean: 0.785049, policy: -5.646580, entropy: -0.039377, actor_norm: 0.130906, critic: 56.847332, critic_grad: 612.853027, value: 49.695435, critic_norm: 7.151898, value_mean: 105.702789, advantage: -6.802956
101
actor: 1.137466, actor_grad: 0.446851, policy mean: 0.470137, policy: 1.056939, entropy: -0.050967, actor_norm: 0.131493, critic: 11.578805, critic_grad: 164.380707, value: 5.021988, critic_norm: 6.556817, value_mean: 97.213593, advantage: 2.056046
101
actor: 5.608208, actor_grad: 2.200439, policy mean: 0.519752, policy: 5.527670, entropy: -0.049494, actor_norm: 0.130032, critic: 119.462860, critic_grad: 968.042725, value: 112.886703, critic_norm: 6.576160, value_mean: 95.110466, advantage: 10.258795
101
actor: 7.604738, actor_grad: 0.755959, policy mean: 0.537291, policy: 7.526218, entropy: -0.050492, actor_norm: 0.129013, critic: 202.471222, critic_grad: 1153.278809, value: 196.558487, critic_norm: 5.912734, value_mean: 84.628159, advantage: 14.013075
101
actor: 14.393963, actor_grad: 9.816408, policy mean: 0.543108, policy: 14.319158, entropy: -0.046911, actor_norm: 0.121715, critic: 700.895020, critic_grad: 2083.176025, value: 694.843384, critic_norm: 6.051665, value_mean: 81.543213, advantage: 26.344124
101
actor: -5.915400, actor_grad: 12.417237, policy mean: 0.660429, policy: -5.996642, entropy: -0.042278, actor_norm: 0.123520, critic: 86.695412, critic_grad: 1142.379639, value: 80.333588, critic_norm: 6.361825, value_mean: 91.502625, advantage: -8.777953
101
actor: 7.370377, actor_grad: 5.660359, policy mean: 0.568295, policy: 7.293448, entropy: -0.047586, actor_norm: 0.124514, critic: 167.907562, critic_grad: 1092.205444, value: 162.037048, critic_norm: 5.870518, value_mean: 84.914398, advantage: 12.696703
101
actor: 13.145831, actor_grad: 6.803848, policy mean: 0.474632, policy: 13.087093, entropy: -0.054915, actor_norm: 0.113653, critic: 768.177856, critic_grad: 2794.299072, value: 761.883789, critic_norm: 6.294077, value_mean: 91.577454, advantage: 27.579781
101
actor: 6.926409, actor_grad: 8.334666, policy mean: 0.466211, policy: 6.855676, entropy: -0.052876, actor_norm: 0.123609, critic: 231.932846, critic_grad: 1812.632935, value: 225.080292, critic_norm: 6.852550, value_mean: 102.826904, advantage: 14.776276
101
actor: -8.079592, actor_grad: 0.928890, policy mean: 0.521021, policy: -8.135646, entropy: -0.054304, actor_norm: 0.110358, critic: 255.348831, critic_grad: 1449.977295, value: 247.886536, critic_norm: 7.462291, value_mean: 115.054108, advantage: -15.669806
101
actor: -8.561745, actor_grad: 6.540316, policy mean: 0.585046, policy: -8.609243, entropy: -0.053137, actor_norm: 0.100635, critic: 222.724976, critic_grad: 1302.651855, value: 215.700836, critic_norm: 7.024140, value_mean: 105.646744, advantage: -14.675853
101
actor: -11.649750, actor_grad: 3.434127, policy mean: 0.579517, policy: -11.714778, entropy: -0.049470, actor_norm: 0.114498, critic: 416.190216, critic_grad: 2095.471924, value: 409.300018, critic_norm: 6.890209, value_mean: 112.819672, advantage: -20.029312
101
actor: 8.121783, actor_grad: 5.964924, policy mean: 0.497318, policy: 8.055862, entropy: -0.053041, actor_norm: 0.118962, critic: 269.775177, critic_grad: 1101.516113, value: 264.194427, critic_norm: 5.580747, value_mean: 79.045311, advantage: 16.242229
101
actor: 9.554634, actor_grad: 2.594802, policy mean: 0.496052, policy: 9.500545, entropy: -0.057280, actor_norm: 0.111370, critic: 377.720520, critic_grad: 1498.685059, value: 371.268066, critic_norm: 6.452445, value_mean: 95.939667, advantage: 19.182344
68
actor: -134.197586, actor_grad: 130.382645, policy mean: 1.153526, policy: -134.316086, entropy: -0.004409, actor_norm: 0.122903, critic: 14042.522461, critic_grad: 12693.000000, value: 14035.138672, critic_norm: 7.383727, value_mean: 120.917610, advantage: -118.442490
101
actor: 11.176369, actor_grad: 16.449583, policy mean: 0.493412, policy: 11.083757, entropy: -0.046757, actor_norm: 0.139368, critic: 512.542969, critic_grad: 1882.940552, value: 506.402649, critic_norm: 6.140327, value_mean: 93.273743, advantage: 22.486523
101
actor: 4.255529, actor_grad: 2.711860, policy mean: 0.540315, policy: 4.163970, entropy: -0.044247, actor_norm: 0.135807, critic: 70.588638, critic_grad: 677.717834, value: 63.572960, critic_norm: 7.015676, value_mean: 108.771690, advantage: 7.753043
101
actor: -17.935148, actor_grad: 8.669572, policy mean: 0.608894, policy: -18.011909, entropy: -0.046398, actor_norm: 0.123159, critic: 879.522766, critic_grad: 3219.306152, value: 871.366821, critic_norm: 8.155918, value_mean: 137.471130, advantage: -29.480377
56
actor: -167.615814, actor_grad: 288.568878, policy mean: 1.332970, policy: -167.756699, entropy: -0.020398, actor_norm: 0.161279, critic: 14953.023438, critic_grad: 14608.069336, value: 14945.364258, critic_norm: 7.659191, value_mean: 133.425308, advantage: -121.833069
101
actor: -5.157104, actor_grad: 2.293319, policy mean: 0.377930, policy: -5.285197, entropy: -0.044453, actor_norm: 0.172546, critic: 205.043365, critic_grad: 1119.701782, value: 198.676605, critic_norm: 6.366757, value_mean: 96.047226, advantage: -14.000060
101
actor: 6.139395, actor_grad: 0.518813, policy mean: 0.410951, policy: 6.013040, entropy: -0.043537, actor_norm: 0.169892, critic: 220.076019, critic_grad: 1081.528076, value: 214.113495, critic_norm: 5.962523, value_mean: 86.424797, advantage: 14.619746
101
actor: 11.920433, actor_grad: 4.020624, policy mean: 0.549635, policy: 11.808056, entropy: -0.042221, actor_norm: 0.154598, critic: 467.307281, critic_grad: 1852.515991, value: 461.135803, critic_norm: 6.171482, value_mean: 90.293625, advantage: 21.432751
101
actor: 10.254665, actor_grad: 14.815598, policy mean: 0.621526, policy: 10.160495, entropy: -0.042787, actor_norm: 0.136958, critic: 273.240204, critic_grad: 1479.931152, value: 267.412354, critic_norm: 5.827864, value_mean: 84.184601, advantage: 16.285753
101
actor: 7.638039, actor_grad: 7.283358, policy mean: 0.539182, policy: 7.577249, entropy: -0.050444, actor_norm: 0.111235, critic: 200.519012, critic_grad: 1279.973755, value: 194.113312, critic_norm: 6.405695, value_mean: 92.885986, advantage: 13.860624
101
actor: -4.160393, actor_grad: 2.764182, policy mean: 0.543395, policy: -4.208385, entropy: -0.054036, actor_norm: 0.102028, critic: 68.833923, critic_grad: 794.527832, value: 61.980049, critic_norm: 6.853873, value_mean: 100.970558, advantage: -7.704998
101
actor: -11.148699, actor_grad: 11.077123, policy mean: 0.644915, policy: -11.177213, entropy: -0.058754, actor_norm: 0.087268, critic: 303.988007, critic_grad: 1917.535156, value: 296.588135, critic_norm: 7.399861, value_mean: 110.339905, advantage: -17.193058
22
actor: -156.835098, actor_grad: 171.079895, policy mean: 1.454797, policy: -156.894775, entropy: -0.039128, actor_norm: 0.098805, critic: 10453.938477, critic_grad: 9542.404297, value: 10446.876953, critic_norm: 7.061657, value_mean: 112.690559, advantage: -101.956795
101
actor: 1.322742, actor_grad: 0.523957, policy mean: 0.609906, policy: 1.248068, entropy: -0.046171, actor_norm: 0.120845, critic: 12.091866, critic_grad: 138.560699, value: 6.407634, critic_norm: 5.684232, value_mean: 85.672310, advantage: 1.809768
101
actor: 13.028037, actor_grad: 4.686617, policy mean: 0.512604, policy: 12.955660, entropy: -0.049399, actor_norm: 0.121775, critic: 639.945190, critic_grad: 2034.863159, value: 634.259399, critic_norm: 5.685796, value_mean: 76.734085, advantage: 25.147339
101
actor: 3.843465, actor_grad: 0.692521, policy mean: 0.514522, policy: 3.776031, entropy: -0.051628, actor_norm: 0.119062, critic: 70.409454, critic_grad: 660.337219, value: 63.524387, critic_norm: 6.885065, value_mean: 105.608208, advantage: 7.642348
101
actor: -10.521088, actor_grad: 1.398206, policy mean: 0.515882, policy: -10.571573, entropy: -0.056407, actor_norm: 0.106892, critic: 434.548706, critic_grad: 1978.679810, value: 427.276337, critic_norm: 7.272361, value_mean: 115.425194, advantage: -20.636322
101
actor: -10.067869, actor_grad: 5.679053, policy mean: 0.568328, policy: -10.124904, entropy: -0.052685, actor_norm: 0.109719, critic: 321.169434, critic_grad: 1651.916626, value: 314.487488, critic_norm: 6.681958, value_mean: 105.618057, advantage: -17.616186
101
actor: -1.382292, actor_grad: 0.950964, policy mean: 0.497376, policy: -1.441751, entropy: -0.054224, actor_norm: 0.113684, critic: 16.193361, critic_grad: 223.936737, value: 10.179399, critic_norm: 6.013962, value_mean: 91.785149, advantage: -2.919754
101
actor: 7.751640, actor_grad: 2.353851, policy mean: 0.523436, policy: 7.678938, entropy: -0.048498, actor_norm: 0.121200, critic: 230.251038, critic_grad: 1267.786621, value: 224.359619, critic_norm: 5.891420, value_mean: 88.792046, advantage: 14.681807
101
actor: 9.381310, actor_grad: 4.038398, policy mean: 0.480277, policy: 9.303835, entropy: -0.050489, actor_norm: 0.127965, critic: 381.235260, critic_grad: 1362.179688, value: 375.618225, critic_norm: 5.617027, value_mean: 84.509247, advantage: 19.374971
101
actor: 11.174320, actor_grad: 15.100919, policy mean: 0.568202, policy: 11.100206, entropy: -0.047011, actor_norm: 0.121125, critic: 392.163361, critic_grad: 1497.612061, value: 386.120544, critic_norm: 6.042803, value_mean: 88.367081, advantage: 19.597466
101
actor: 2.645531, actor_grad: 0.824884, policy mean: 0.539457, policy: 2.597068, entropy: -0.055057, actor_norm: 0.103520, critic: 31.898689, critic_grad: 394.324310, value: 25.096542, critic_norm: 6.802146, value_mean: 107.092728, advantage: 4.666127
101
actor: -11.713176, actor_grad: 9.089781, policy mean: 0.619206, policy: -11.799662, entropy: -0.045112, actor_norm: 0.131597, critic: 383.525299, critic_grad: 3089.261475, value: 376.158936, critic_norm: 7.366362, value_mean: 144.564102, advantage: -19.358009
101
actor: -7.438654, actor_grad: 0.932287, policy mean: 0.492929, policy: -7.517106, entropy: -0.049643, actor_norm: 0.128096, critic: 240.502762, critic_grad: 1279.466431, value: 233.884369, critic_norm: 6.618391, value_mean: 105.986328, advantage: -15.228480
101
actor: 8.451836, actor_grad: 11.273545, policy mean: 0.576023, policy: 8.346816, entropy: -0.039958, actor_norm: 0.144978, critic: 211.716934, critic_grad: 1672.658081, value: 205.195740, critic_norm: 6.521194, value_mean: 119.298340, advantage: 14.314087
101
actor: 7.867239, actor_grad: 4.897468, policy mean: 0.438295, policy: 7.755420, entropy: -0.041915, actor_norm: 0.153733, critic: 332.580658, critic_grad: 1543.547607, value: 326.105103, critic_norm: 6.475567, value_mean: 104.564537, advantage: 17.943445
101
actor: 0.050389, actor_grad: 0.307753, policy mean: 0.409243, policy: -0.044915, entropy: -0.047631, actor_norm: 0.142934, critic: 7.936910, critic_grad: 20.053167, value: 1.020028, critic_norm: 6.916882, value_mean: 116.767426, advantage: -0.129596
101
actor: 1.518694, actor_grad: 1.188978, policy mean: 0.501931, policy: 1.394714, entropy: -0.038565, actor_norm: 0.162544, critic: 15.906857, critic_grad: 349.577820, value: 9.602234, critic_norm: 6.304623, value_mean: 103.599945, advantage: 2.837510
101
actor: 5.670303, actor_grad: 8.765533, policy mean: 0.417864, policy: 5.555515, entropy: -0.043705, actor_norm: 0.158493, critic: 181.333984, critic_grad: 1252.312256, value: 176.104111, critic_norm: 5.229874, value_mean: 63.390800, advantage: 13.247385
101
actor: 6.910417, actor_grad: 3.360161, policy mean: 0.482186, policy: 6.774520, entropy: -0.037946, actor_norm: 0.173843, critic: 209.013733, critic_grad: 1279.627319, value: 203.641083, critic_norm: 5.372654, value_mean: 53.655140, advantage: 14.190611
101
actor: 1.941982, actor_grad: 1.841493, policy mean: 0.402088, policy: 1.830529, entropy: -0.045827, actor_norm: 0.157280, critic: 27.838491, critic_grad: 406.401215, value: 21.956188, critic_norm: 5.882303, value_mean: 63.572990, advantage: 4.580195
101
actor: -9.990165, actor_grad: 18.419533, policy mean: 1.085456, policy: -10.092653, entropy: -0.031091, actor_norm: 0.133579, critic: 91.125648, critic_grad: 1137.344482, value: 83.988533, critic_norm: 7.137115, value_mean: 145.148849, advantage: -9.097347
26
actor: -212.474091, actor_grad: 257.862061, policy mean: 1.852761, policy: -212.659454, entropy: -0.009948, actor_norm: 0.195306, critic: 12823.352539, critic_grad: 10916.948242, value: 12816.084961, critic_norm: 7.267488, value_mean: 125.186699, advantage: -112.747192
101
actor: 7.730784, actor_grad: 4.521143, policy mean: 0.468354, policy: 7.564795, entropy: -0.037750, actor_norm: 0.203739, critic: 270.892059, critic_grad: 1112.369507, value: 265.498138, critic_norm: 5.393910, value_mean: 79.220802, advantage: 16.279221
101
actor: 5.349543, actor_grad: 13.542208, policy mean: 0.369607, policy: 5.197728, entropy: -0.040534, actor_norm: 0.192349, critic: 222.164902, critic_grad: 1191.445557, value: 216.101013, critic_norm: 6.063888, value_mean: 87.138290, advantage: 14.656010
101
actor: 4.529747, actor_grad: 3.380878, policy mean: 0.627919, policy: 4.384396, entropy: -0.036371, actor_norm: 0.181722, critic: 67.597412, critic_grad: 809.172119, value: 60.628296, critic_norm: 6.969113, value_mean: 128.911423, advantage: 7.515070
101
actor: -10.766501, actor_grad: 2.512940, policy mean: 0.429913, policy: -10.894003, entropy: -0.044345, actor_norm: 0.171847, critic: 644.954956, critic_grad: 2785.474854, value: 637.462280, critic_norm: 7.492676, value_mean: 142.819183, advantage: -25.004745
101
actor: 8.071906, actor_grad: 14.499196, policy mean: 1.178608, policy: 7.877574, entropy: -0.015798, actor_norm: 0.210130, critic: 49.034210, critic_grad: 847.139099, value: 41.621716, critic_norm: 7.412495, value_mean: 155.806610, advantage: 6.365365
101
actor: -9.252944, actor_grad: 8.725147, policy mean: 0.967890, policy: -9.452788, entropy: -0.022522, actor_norm: 0.222366, critic: 117.769821, critic_grad: 980.371704, value: 111.348419, critic_norm: 6.421402, value_mean: 116.900780, advantage: -10.300220
101
actor: 7.266537, actor_grad: 22.368286, policy mean: 0.396200, policy: 7.090316, entropy: -0.036597, actor_norm: 0.212818, critic: 330.756561, critic_grad: 1614.284546, value: 324.796936, critic_norm: 5.959618, value_mean: 99.029167, advantage: 17.877663
101
actor: 9.266943, actor_grad: 9.430471, policy mean: 0.444870, policy: 9.088509, entropy: -0.031741, actor_norm: 0.210175, critic: 431.829834, critic_grad: 1802.051758, value: 426.055603, critic_norm: 5.774216, value_mean: 84.988739, advantage: 20.438095
101
actor: 6.034077, actor_grad: 12.214528, policy mean: 0.402645, policy: 5.871717, entropy: -0.036415, actor_norm: 0.198774, critic: 230.540085, critic_grad: 1173.667603, value: 225.278717, critic_norm: 5.261369, value_mean: 38.821457, advantage: 14.898726
101
actor: -0.964690, actor_grad: 0.567113, policy mean: 0.434993, policy: -1.090825, entropy: -0.041742, actor_norm: 0.167877, critic: 11.934999, critic_grad: 179.634277, value: 6.157172, critic_norm: 5.777827, value_mean: 84.727180, advantage: -2.465029
101
actor: -6.236328, actor_grad: 6.057154, policy mean: 0.512860, policy: -6.318970, entropy: -0.053320, actor_norm: 0.135963, critic: 173.647339, critic_grad: 1100.748535, value: 166.775101, critic_norm: 6.872235, value_mean: 112.975082, advantage: -12.535379
101
actor: 13.411456, actor_grad: 4.086308, policy mean: 0.510117, policy: 13.303159, entropy: -0.043529, actor_norm: 0.151827, critic: 697.859741, critic_grad: 6164.582520, value: 689.757324, critic_norm: 8.102435, value_mean: 216.029510, advantage: 26.192017
101
actor: -1.960513, actor_grad: 0.880277, policy mean: 0.474902, policy: -2.081838, entropy: -0.042591, actor_norm: 0.163916, critic: 41.392632, critic_grad: 668.733154, value: 33.063309, critic_norm: 8.329324, value_mean: 172.419022, advantage: -4.512814
101
actor: 15.165056, actor_grad: 53.263359, policy mean: 0.492351, policy: 15.024778, entropy: -0.039790, actor_norm: 0.180068, critic: 1066.962646, critic_grad: 6552.103027, value: 1057.465088, critic_norm: 9.497602, value_mean: 228.034637, advantage: 32.021111
101
actor: 17.247561, actor_grad: 20.655962, policy mean: 0.722049, policy: 17.057697, entropy: -0.025353, actor_norm: 0.215215, critic: 633.502380, critic_grad: 4753.888184, value: 624.468628, critic_norm: 9.033742, value_mean: 212.104034, advantage: 23.988590
101
actor: -10.350842, actor_grad: 38.962929, policy mean: 0.505456, policy: -10.524597, entropy: -0.035197, actor_norm: 0.208953, critic: 431.807983, critic_grad: 3648.378662, value: 423.506714, critic_norm: 8.301281, value_mean: 194.644165, advantage: -20.517117
101
actor: 35.902809, actor_grad: 172.335159, policy mean: 0.589898, policy: 35.696198, entropy: -0.025636, actor_norm: 0.232245, critic: 3854.581787, critic_grad: 14611.822266, value: 3846.491699, critic_norm: 8.090192, value_mean: 189.585754, advantage: 61.268143
93
actor: -127.167641, actor_grad: 494.677856, policy mean: 1.309348, policy: -127.407608, entropy: -0.018704, actor_norm: 0.258671, critic: 8790.041992, critic_grad: 13196.110352, value: 8783.798828, critic_norm: 6.242680, value_mean: 107.207977, advantage: -93.518196
101
actor: 8.537657, actor_grad: 16.779984, policy mean: 1.026186, policy: 8.270178, entropy: -0.019130, actor_norm: 0.286610, critic: 56.733326, critic_grad: 539.469666, value: 52.051796, critic_norm: 4.681531, value_mean: 52.864716, advantage: 6.977909
101
actor: 2.045037, actor_grad: 0.890144, policy mean: 0.311562, policy: 1.808462, entropy: -0.026480, actor_norm: 0.263054, critic: 44.702354, critic_grad: 405.279419, value: 40.005653, critic_norm: 4.696699, value_mean: 51.566330, advantage: 6.133776
101
actor: -1.932760, actor_grad: 7.832189, policy mean: 0.525874, policy: -2.130569, entropy: -0.026778, actor_norm: 0.224587, critic: 25.922272, critic_grad: 302.202759, value: 20.930861, critic_norm: 4.991411, value_mean: 63.305485, advantage: -4.431235
101
actor: 2.831011, actor_grad: 5.394264, policy mean: 0.432849, policy: 2.686667, entropy: -0.039177, actor_norm: 0.183520, critic: 47.778122, critic_grad: 527.858704, value: 41.547440, critic_norm: 6.230682, value_mean: 91.636444, advantage: 6.379258
101
actor: -24.404646, actor_grad: 17.561211, policy mean: 0.505634, policy: -24.520176, entropy: -0.042998, actor_norm: 0.158527, critic: 2378.725830, critic_grad: 8788.184570, value: 2369.870605, critic_norm: 8.855179, value_mean: 230.990173, advantage: -48.030045
101
actor: -35.224277, actor_grad: 105.614166, policy mean: 0.844992, policy: -35.353546, entropy: -0.035755, actor_norm: 0.165022, critic: 1522.192749, critic_grad: 14262.095703, value: 1509.617676, critic_norm: 12.575046, value_mean: 449.460327, advantage: -38.513672
101
actor: -47.038651, actor_grad: 52.002872, policy mean: 0.529586, policy: -47.190628, entropy: -0.038049, actor_norm: 0.190026, critic: 7977.262695, critic_grad: 46391.519531, value: 7960.013672, critic_norm: 17.248884, value_mean: 683.771606, advantage: -89.079720
101
actor: -47.685551, actor_grad: 184.415222, policy mean: 0.770733, policy: -47.869762, entropy: -0.028416, actor_norm: 0.212629, critic: 4133.193848, critic_grad: 38050.941406, value: 4111.860352, critic_norm: 21.333441, value_mean: 842.913086, advantage: -61.426186
101
actor: -11.324305, actor_grad: 41.728291, policy mean: 0.680165, policy: -11.522753, entropy: -0.030737, actor_norm: 0.229185, critic: 296.850250, critic_grad: 5938.187500, value: 276.435547, critic_norm: 20.414709, value_mean: 587.786377, advantage: -16.245998
101
actor: 48.798054, actor_grad: 258.772400, policy mean: 1.073938, policy: 48.584530, entropy: -0.026754, actor_norm: 0.240276, critic: 1917.186890, critic_grad: 26281.255859, value: 1892.549683, critic_norm: 24.637201, value_mean: 884.601196, advantage: 41.245010
101
actor: 29.552790, actor_grad: 107.666946, policy mean: 0.430774, policy: 29.291458, entropy: -0.028484, actor_norm: 0.289815, critic: 4373.495605, critic_grad: 38461.082031, value: 4346.997070, critic_norm: 26.498541, value_mean: 879.448669, advantage: 61.336098
98
actor: -155.874130, actor_grad: 4487.477051, policy mean: 0.188768, policy: -156.182831, entropy: -0.032038, actor_norm: 0.340746, critic: 684485.062500, critic_grad: 689855.562500, value: 684461.875000, critic_norm: 23.174309, value_mean: 828.817139, advantage: -827.322144
101
actor: 40.173027, actor_grad: 199.466171, policy mean: 0.573257, policy: 39.689331, entropy: -0.015308, actor_norm: 0.499004, critic: 4606.369629, critic_grad: 19054.900391, value: 4592.500000, critic_norm: 13.869871, value_mean: 342.841248, advantage: 65.135071
101
actor: 18.315384, actor_grad: 75.143417, policy mean: 0.963648, policy: 17.681229, entropy: -0.005850, actor_norm: 0.640004, critic: 872.680054, critic_grad: 5033.169922, value: 865.537720, critic_norm: 7.142319, value_mean: 120.470871, advantage: 26.781914
100
actor: -7.348021, actor_grad: 314.354462, policy mean: 0.477178, policy: -8.007670, entropy: -0.013736, actor_norm: 0.673386, critic: 297.252258, critic_grad: 1525.778809, value: 292.680115, critic_norm: 4.572150, value_mean: 19.561016, advantage: -17.085892
101
actor: 1.519661, actor_grad: 1.251357, policy mean: 0.544759, policy: 0.949774, entropy: -0.009377, actor_norm: 0.579264, critic: 8.739002, critic_grad: 90.013512, value: 4.410244, critic_norm: 4.328759, value_mean: 36.728523, advantage: 1.258209
101
actor: 0.196969, actor_grad: 3.473528, policy mean: 0.137829, policy: -0.265686, entropy: -0.011702, actor_norm: 0.474356, critic: 9.849410, critic_grad: 148.812744, value: 5.503541, critic_norm: 4.345869, value_mean: 34.913620, advantage: -2.270570
101
actor: 0.918037, actor_grad: 4.275895, policy mean: 0.161588, policy: 0.550063, entropy: -0.006912, actor_norm: 0.374885, critic: 30.260963, critic_grad: 365.314240, value: 25.562071, critic_norm: 4.698892, value_mean: 44.414223, advantage: 4.940701
101
actor: 5.881338, actor_grad: 22.291435, policy mean: 0.389038, policy: 5.606958, entropy: -0.024929, actor_norm: 0.299309, critic: 212.127121, critic_grad: 999.448547, value: 207.071411, critic_norm: 5.055717, value_mean: 68.800491, advantage: 14.359414
101
actor: -16.348705, actor_grad: 45.053516, policy mean: 0.610138, policy: -16.546516, entropy: -0.041169, actor_norm: 0.238979, critic: 761.154297, critic_grad: 3125.674561, value: 753.707153, critic_norm: 7.447156, value_mean: 158.740341, advantage: -27.372934
101
actor: 24.282078, actor_grad: 90.491287, policy mean: 0.818122, policy: 24.071136, entropy: -0.029795, actor_norm: 0.240736, critic: 983.782654, critic_grad: 5374.104980, value: 975.601257, critic_norm: 8.181419, value_mean: 208.563156, advantage: 30.189251
101
actor: 19.514076, actor_grad: 168.405701, policy mean: 0.318930, policy: 19.256367, entropy: -0.026618, actor_norm: 0.284327, critic: 3271.281494, critic_grad: 12295.654297, value: 3264.308105, critic_norm: 6.973301, value_mean: 182.317322, advantage: 56.729774
101
actor: -18.845818, actor_grad: 37.456478, policy mean: 0.447965, policy: -19.102041, entropy: -0.024468, actor_norm: 0.280691, critic: 1904.835693, critic_grad: 7130.233398, value: 1897.673096, critic_norm: 7.162599, value_mean: 171.760178, advantage: -43.432594
101
actor: -4.295365, actor_grad: 5.295444, policy mean: 0.288806, policy: -4.542912, entropy: -0.028517, actor_norm: 0.276063, critic: 251.838715, critic_grad: 1862.054077, value: 245.758316, critic_norm: 6.080400, value_mean: 131.474426, advantage: -15.506470
101
actor: 8.219511, actor_grad: 40.348888, policy mean: 0.729706, policy: 7.972175, entropy: -0.025196, actor_norm: 0.272533, critic: 158.065765, critic_grad: 2554.464111, value: 149.758759, critic_norm: 8.307011, value_mean: 215.236343, advantage: 10.881304
101
actor: 6.272334, actor_grad: 6.890944, policy mean: 0.308144, policy: 6.007754, entropy: -0.023263, actor_norm: 0.287842, critic: 417.413513, critic_grad: 2471.991699, value: 411.896332, critic_norm: 5.517180, value_mean: 113.939247, advantage: 20.243727
101
actor: 11.552007, actor_grad: 83.891586, policy mean: 0.530112, policy: 11.308649, entropy: -0.029390, actor_norm: 0.272748, critic: 463.426910, critic_grad: 3336.444336, value: 456.834656, critic_norm: 6.592254, value_mean: 139.286606, advantage: 20.889141
100
actor: -5.305072, actor_grad: 134.863647, policy mean: 0.231704, policy: -5.592265, entropy: -0.030423, actor_norm: 0.317616, critic: 585.201111, critic_grad: 3424.730713, value: 580.401306, critic_norm: 4.799812, value_mean: 26.560898, advantage: -24.085772
101
actor: 2.916164, actor_grad: 2.266739, policy mean: 0.444533, policy: 2.650984, entropy: -0.020665, actor_norm: 0.285845, critic: 41.852058, critic_grad: 514.464722, value: 36.835045, critic_norm: 5.017014, value_mean: 34.287659, advantage: 5.642846
101
actor: -11.681026, actor_grad: 44.210686, policy mean: 0.500721, policy: -11.886254, entropy: -0.028704, actor_norm: 0.233933, critic: 579.992920, critic_grad: 2077.594238, value: 574.085144, critic_norm: 5.907761, value_mean: 73.943771, advantage: -23.897121
101
actor: -10.182259, actor_grad: 28.554626, policy mean: 0.447722, policy: -10.335374, entropy: -0.040521, actor_norm: 0.193636, critic: 544.403809, critic_grad: 2084.600098, value: 537.533386, critic_norm: 6.870445, value_mean: 121.093765, advantage: -23.080845
101
actor: -27.785563, actor_grad: 11.244065, policy mean: 0.440894, policy: -27.966156, entropy: -0.033593, actor_norm: 0.214186, critic: 4314.433594, critic_grad: 17859.548828, value: 4304.513672, critic_norm: 9.920008, value_mean: 325.085327, advantage: -65.006760
101
actor: -41.554585, actor_grad: 166.093384, policy mean: 1.293071, policy: -41.797478, entropy: -0.011350, actor_norm: 0.254241, critic: 1198.372803, critic_grad: 9338.926758, value: 1185.777344, critic_norm: 12.595519, value_mean: 384.769531, advantage: -31.937607
101
actor: 9.568010, actor_grad: 55.016468, policy mean: 0.529954, policy: 9.327081, entropy: -0.026720, actor_norm: 0.267650, critic: 327.207458, critic_grad: 4755.134277, value: 315.552673, critic_norm: 11.654800, value_mean: 355.212524, advantage: 17.523163
36
actor: -230.124542, actor_grad: 732.660706, policy mean: 0.679556, policy: -230.379547, entropy: -0.007416, actor_norm: 0.262425, critic: 116484.250000, critic_grad: 70117.632812, value: 116470.781250, critic_norm: 13.466190, value_mean: 343.697388, advantage: -341.222260
101
actor: 1.208239, actor_grad: 11.176695, policy mean: 0.358919, policy: 0.956466, entropy: -0.026547, actor_norm: 0.278321, critic: 41.830608, critic_grad: 1769.703369, value: 24.557240, critic_norm: 17.273369, value_mean: 648.583069, advantage: 3.442936
38
actor: -1178.649292, actor_grad: 2414.684082, policy mean: 2.142542, policy: -1178.914307, entropy: -0.015299, actor_norm: 0.280266, critic: 303394.437500, critic_grad: 166299.078125, value: 303375.125000, critic_norm: 19.298252, value_mean: 554.011658, advantage: -550.569397
101
actor: 145.082581, actor_grad: 845.045105, policy mean: 1.638412, policy: 144.776215, entropy: -0.015678, actor_norm: 0.322030, critic: 7107.811035, critic_grad: 45057.050781, value: 7087.932129, critic_norm: 19.878756, value_mean: 699.795166, advantage: 79.842484
101
actor: 47.794559, actor_grad: 486.752930, policy mean: 0.503154, policy: 47.410244, entropy: -0.016109, actor_norm: 0.400425, critic: 15075.971680, critic_grad: 53496.492188, value: 15060.528320, critic_norm: 15.442948, value_mean: 478.558624, advantage: 118.976341
77
actor: -318.617249, actor_grad: 1490.043335, policy mean: 1.562407, policy: -319.107544, entropy: -0.009291, actor_norm: 0.499564, critic: 40585.695312, critic_grad: 53470.011719, value: 40576.097656, critic_norm: 9.595976, value_mean: 208.153336, advantage: -201.425797
101
actor: 2.508867, actor_grad: 31.727665, policy mean: 0.141173, policy: 1.903377, entropy: -0.008814, actor_norm: 0.614304, critic: 362.920441, critic_grad: 1901.175903, value: 356.426514, critic_norm: 6.493938, value_mean: 30.801952, advantage: 15.122816
101
actor: 2.705160, actor_grad: 34.607964, policy mean: 0.447684, policy: 2.082121, entropy: -0.005948, actor_norm: 0.628987, critic: 82.922607, critic_grad: 393.327789, value: 76.888763, critic_norm: 6.033843, value_mean: 2.139486, advantage: 8.264478
101
actor: 11.909101, actor_grad: 68.586868, policy mean: 1.184051, policy: 11.367348, entropy: -0.007525, actor_norm: 0.549277, critic: 120.799210, critic_grad: 446.629639, value: 114.439438, critic_norm: 6.359774, value_mean: 2.470936, advantage: 9.733643
81
actor: -43.563652, actor_grad: 31.759701, policy mean: 3.545747, policy: -44.050011, entropy: -0.001507, actor_norm: 0.487864, critic: 160.890198, critic_grad: 735.724915, value: 154.458832, critic_norm: 6.431371, value_mean: 20.898932, advantage: -12.361427
101
actor: 0.878615, actor_grad: 0.495421, policy mean: 0.157636, policy: 0.461100, entropy: -0.010078, actor_norm: 0.427593, critic: 13.696095, critic_grad: 76.261879, value: 7.302369, critic_norm: 6.393725, value_mean: 20.555103, advantage: 1.105872
101
actor: 15.294292, actor_grad: 71.932083, policy mean: 0.849912, policy: 14.895601, entropy: -0.009101, actor_norm: 0.407792, critic: 322.718628, critic_grad: 1314.748047, value: 316.044830, critic_norm: 6.673792, value_mean: 38.320850, advantage: 17.751190
101
actor: -23.756470, actor_grad: 74.927078, policy mean: 0.557410, policy: -24.043686, entropy: -0.024419, actor_norm: 0.311637, critic: 1973.220703, critic_grad: 6271.273438, value: 1964.185547, critic_norm: 9.035107, value_mean: 195.017914, advantage: -44.144402
101
actor: -28.394800, actor_grad: 181.288376, policy mean: 0.864001, policy: -28.684776, entropy: -0.021267, actor_norm: 0.311243, critic: 1429.413818, critic_grad: 10101.993164, value: 1417.491455, critic_norm: 11.922357, value_mean: 393.230225, advantage: -33.881760
101
actor: 0.137370, actor_grad: 0.935386, policy mean: 0.600121, policy: -0.145029, entropy: -0.028610, actor_norm: 0.311009, critic: 31.913670, critic_grad: 321.928558, value: 17.210411, critic_norm: 14.703259, value_mean: 503.644653, advantage: -1.179319
34
actor: -914.413574, actor_grad: 2159.388672, policy mean: 2.495106, policy: -914.747986, entropy: -0.002038, actor_norm: 0.336408, critic: 136962.375000, critic_grad: 79468.328125, value: 136947.171875, critic_norm: 15.196696, value_mean: 371.542847, advantage: -370.047852
101
actor: 50.940578, actor_grad: 442.767059, policy mean: 0.629248, policy: 50.600655, entropy: -0.019668, actor_norm: 0.359593, critic: 6686.418457, critic_grad: 33569.785156, value: 6671.733398, critic_norm: 14.684844, value_mean: 502.250214, advantage: 79.853416
60
actor: -464.048523, actor_grad: 873.133606, policy mean: 1.257993, policy: -464.418304, entropy: -0.017021, actor_norm: 0.386801, critic: 119155.140625, critic_grad: 79589.109375, value: 119141.851562, critic_norm: 13.289068, value_mean: 356.986969, advantage: -343.711212
99
actor: -1145.283936, actor_grad: 25.383684, policy mean: 4.601825, policy: -1145.723755, entropy: 0.000000, actor_norm: 0.439794, critic: 63963.414062, critic_grad: 83789.398438, value: 63952.722656, critic_norm: 10.691995, value_mean: 254.860184, advantage: -252.873489
101
actor: 12.354870, actor_grad: 207.124695, policy mean: 0.282300, policy: 11.829109, entropy: -0.010543, actor_norm: 0.536303, critic: 2049.336914, critic_grad: 6644.410645, value: 2042.325684, critic_norm: 7.011339, value_mean: 57.098518, advantage: 43.493935
101
actor: 1.582333, actor_grad: 8.217393, policy mean: 0.132946, policy: 1.057595, entropy: -0.009952, actor_norm: 0.534690, critic: 67.577293, critic_grad: 492.464996, value: 60.997089, critic_norm: 6.580207, value_mean: 58.425968, advantage: 7.643377
101
actor: -20.740877, actor_grad: 68.451698, policy mean: 0.921957, policy: -21.159781, entropy: -0.013066, actor_norm: 0.431968, critic: 546.960999, critic_grad: 1922.313477, value: 539.746338, critic_norm: 7.214678, value_mean: 84.305939, advantage: -23.221989
101
actor: 0.174804, actor_grad: 1.007904, policy mean: 0.417404, policy: -0.191878, entropy: -0.026096, actor_norm: 0.392778, critic: 7.244303, critic_grad: 42.048508, value: 0.294519, critic_norm: 6.949785, value_mean: 113.005287, advantage: -0.464646
101
actor: -7.772195, actor_grad: 38.539989, policy mean: 0.400295, policy: -8.075264, entropy: -0.024719, actor_norm: 0.327788, critic: 432.972809, critic_grad: 2627.635254, value: 425.165771, critic_norm: 7.807036, value_mean: 159.986862, advantage: -20.476040
101
actor: -52.576965, actor_grad: 315.255890, policy mean: 1.767824, policy: -52.914127, entropy: -0.010333, actor_norm: 0.347498, critic: 829.131409, critic_grad: 6941.581543, value: 820.486633, critic_norm: 8.644790, value_mean: 237.025406, advantage: -28.364468
101
actor: 15.117285, actor_grad: 12.231071, policy mean: 0.628721, policy: 14.773716, entropy: -0.017917, actor_norm: 0.361486, critic: 602.796143, critic_grad: 2915.042969, value: 595.969421, critic_norm: 6.826702, value_mean: 117.136978, advantage: 24.144154
100
actor: -56.229427, actor_grad: 1520.642090, policy mean: 0.348986, policy: -56.541836, entropy: -0.028000, actor_norm: 0.340409, critic: 26366.531250, critic_grad: 41177.914062, value: 26359.484375, critic_norm: 7.047041, value_mean: 164.816971, advantage: -162.341843
101
actor: -7.715148, actor_grad: 61.729420, policy mean: 0.464496, policy: -8.037957, entropy: -0.019305, actor_norm: 0.342113, critic: 325.904694, critic_grad: 3547.550049, value: 318.217224, critic_norm: 7.687477, value_mean: 209.156281, advantage: -16.957069
76
actor: -432.303314, actor_grad: 686.580139, policy mean: 1.080734, policy: -432.617310, entropy: -0.018125, actor_norm: 0.332122, critic: 148564.875000, critic_grad: 120985.695312, value: 148553.859375, critic_norm: 11.018824, value_mean: 391.330231, advantage: -385.062714
101
actor: 11.675788, actor_grad: 91.029427, policy mean: 0.116121, policy: 11.297587, entropy: -0.004508, actor_norm: 0.382708, critic: 7285.135254, critic_grad: 19918.970703, value: 7276.161133, critic_norm: 8.974103, value_mean: 242.018982, advantage: 84.279625
101
actor: 25.545187, actor_grad: 55.915943, policy mean: 1.095126, policy: 25.153158, entropy: -0.015493, actor_norm: 0.407522, critic: 457.266052, critic_grad: 2017.222290, value: 451.674377, critic_norm: 5.591661, value_mean: 85.130615, advantage: 21.204390
101
actor: 33.351814, actor_grad: 128.825256, policy mean: 0.650082, policy: 32.980331, entropy: -0.011848, actor_norm: 0.383331, critic: 2711.820068, critic_grad: 8116.626465, value: 2705.030762, critic_norm: 6.789213, value_mean: 136.201630, advantage: 51.706818
101
actor: -20.752354, actor_grad: 6.858220, policy mean: 0.968329, policy: -21.104418, entropy: -0.014143, actor_norm: 0.366206, critic: 552.638062, critic_grad: 2481.123535, value: 547.051025, critic_norm: 5.587040, value_mean: 93.131889, advantage: -23.109818
101
actor: -12.700447, actor_grad: 75.499855, policy mean: 0.528947, policy: -12.996269, entropy: -0.020074, actor_norm: 0.315896, critic: 620.737366, critic_grad: 4132.871582, value: 612.531799, critic_norm: 8.205581, value_mean: 205.728348, advantage: -24.674221
101
actor: 20.013113, actor_grad: 98.573921, policy mean: 0.660182, policy: 19.683249, entropy: -0.010566, actor_norm: 0.340431, critic: 821.614136, critic_grad: 3527.456055, value: 815.633667, critic_norm: 5.980494, value_mean: 111.454010, advantage: 28.389713
101
actor: 15.024961, actor_grad: 15.245311, policy mean: 0.341546, policy: 14.706962, entropy: -0.017145, actor_norm: 0.335144, critic: 1948.465210, critic_grad: 4965.052246, value: 1943.242065, critic_norm: 5.223111, value_mean: 70.143257, advantage: 43.984734
101
actor: 6.319257, actor_grad: 19.609240, policy mean: 0.145528, policy: 6.027658, entropy: -0.018980, actor_norm: 0.310579, critic: 1607.076172, critic_grad: 4811.696777, value: 1601.458374, critic_norm: 5.617835, value_mean: 78.942749, advantage: 39.896362
101
actor: -15.175412, actor_grad: 73.459122, policy mean: 0.515698, policy: -15.425418, entropy: -0.020478, actor_norm: 0.270484, critic: 955.078430, critic_grad: 3329.550537, value: 949.671082, critic_norm: 5.407351, value_mean: 89.286926, advantage: -30.576225
101
actor: 9.805799, actor_grad: 32.341827, policy mean: 0.278875, policy: 9.595108, entropy: -0.029075, actor_norm: 0.239766, critic: 1225.469849, critic_grad: 3781.996338, value: 1219.812256, critic_norm: 5.657554, value_mean: 97.946701, advantage: 34.826843
101
actor: -14.602669, actor_grad: 25.839373, policy mean: 0.341908, policy: -14.792490, entropy: -0.030484, actor_norm: 0.220304, critic: 1881.850830, critic_grad: 7016.017578, value: 1875.508789, critic_norm: 6.342016, value_mean: 150.823090, advantage: -42.869705
101
actor: -18.903694, actor_grad: 83.712883, policy mean: 0.604936, policy: -19.079891, entropy: -0.032195, actor_norm: 0.208390, critic: 830.069153, critic_grad: 5012.281250, value: 822.773132, critic_norm: 7.296003, value_mean: 200.928467, advantage: -28.078102
99
actor: -136.222076, actor_grad: 2300.035645, policy mean: 0.263176, policy: -136.424057, entropy: -0.024429, actor_norm: 0.226411, critic: 262046.984375, critic_grad: 234216.218750, value: 262036.546875, critic_norm: 10.437579, value_mean: 513.853455, advantage: -511.866699
101
actor: 10.843716, actor_grad: 26.744356, policy mean: 0.611052, policy: 10.629622, entropy: -0.025488, actor_norm: 0.239582, critic: 294.811951, critic_grad: 4173.015625, value: 286.177734, critic_norm: 8.634201, value_mean: 283.063477, advantage: 16.643627
101
actor: -20.947882, actor_grad: 122.316826, policy mean: 0.406192, policy: -21.171515, entropy: -0.025525, actor_norm: 0.249158, critic: 2682.244629, critic_grad: 15917.517578, value: 2670.778809, critic_norm: 11.465819, value_mean: 412.268982, advantage: -51.554131
97
actor: -60.587635, actor_grad: 2067.383057, policy mean: 0.086049, policy: -60.842823, entropy: -0.028471, actor_norm: 0.283661, critic: 499968.531250, critic_grad: 431289.531250, value: 499954.812500, critic_norm: 13.721149, value_mean: 708.074829, advantage: -707.074829
101
actor: -26.554649, actor_grad: 233.825241, policy mean: 0.407459, policy: -26.847340, entropy: -0.018982, actor_norm: 0.311672, critic: 3456.468262, critic_grad: 24377.527344, value: 3443.207520, critic_norm: 13.260623, value_mean: 540.874146, advantage: -58.021896
53
actor: -407.203644, actor_grad: 943.934265, policy mean: 0.866130, policy: -407.515625, entropy: -0.013585, actor_norm: 0.325572, critic: 214313.687500, critic_grad: 128792.437500, value: 214299.125000, critic_norm: 14.568867, value_mean: 472.042786, advantage: -461.742462
56
actor: -552.898926, actor_grad: 1350.296265, policy mean: 1.242262, policy: -553.247192, entropy: -0.012859, actor_norm: 0.361144, critic: 187748.234375, critic_grad: 118698.046875, value: 187734.203125, critic_norm: 14.033535, value_mean: 443.696045, advantage: -432.103790
101
actor: 35.142025, actor_grad: 252.084869, policy mean: 0.440399, policy: 34.737576, entropy: -0.010631, actor_norm: 0.415080, critic: 5580.939453, critic_grad: 18833.398438, value: 5570.152344, critic_norm: 10.786896, value_mean: 284.644714, advantage: 72.656052
101
actor: 43.738667, actor_grad: 200.474075, policy mean: 0.624143, policy: 43.254597, entropy: -0.009173, actor_norm: 0.493246, critic: 3606.872314, critic_grad: 8646.248047, value: 3599.939453, critic_norm: 6.932813, value_mean: 74.069611, advantage: 58.978485
101
actor: -0.479104, actor_grad: 9.179454, policy mean: 0.776840, policy: -0.991200, entropy: -0.009347, actor_norm: 0.521442, critic: 12.013147, critic_grad: 160.642761, value: 6.922374, critic_norm: 5.090774, value_mean: 21.292597, advantage: -2.214653
101
actor: 5.138139, actor_grad: 20.299093, policy mean: 0.739383, policy: 4.646141, entropy: -0.007333, actor_norm: 0.499331, critic: 32.586666, critic_grad: 361.334473, value: 27.547478, critic_norm: 5.039187, value_mean: 21.901703, advantage: 5.090637
101
actor: -0.347393, actor_grad: 5.947982, policy mean: 0.156008, policy: -0.802700, entropy: -0.009820, actor_norm: 0.465127, critic: 26.841183, critic_grad: 398.552094, value: 21.257257, critic_norm: 5.583925, value_mean: 55.782467, advantage: -4.546576
101
actor: -12.504599, actor_grad: 54.380600, policy mean: 0.488012, policy: -12.917828, entropy: -0.009417, actor_norm: 0.422646, critic: 1007.112061, critic_grad: 4127.656738, value: 1000.809204, critic_norm: 6.302835, value_mean: 100.750061, advantage: -31.472054
101
actor: -12.794372, actor_grad: 90.210236, policy mean: 0.366468, policy: -13.160181, entropy: -0.012906, actor_norm: 0.378716, critic: 1198.172607, critic_grad: 4221.684082, value: 1191.632446, critic_norm: 6.540147, value_mean: 128.262466, advantage: -34.361015
101
actor: -20.794720, actor_grad: 55.595070, policy mean: 0.500903, policy: -21.119551, entropy: -0.015587, actor_norm: 0.340418, critic: 1782.202637, critic_grad: 7179.933105, value: 1775.085205, critic_norm: 7.117378, value_mean: 184.899948, advantage: -41.683796
101
actor: -28.475082, actor_grad: 219.005646, policy mean: 0.363838, policy: -28.773523, entropy: -0.017311, actor_norm: 0.315752, critic: 5313.044434, critic_grad: 23497.990234, value: 5303.193359, critic_norm: 9.850927, value_mean: 390.522949, advantage: -72.140564
101
actor: -60.143238, actor_grad: 51.185829, policy mean: 0.595749, policy: -60.427223, entropy: -0.018047, actor_norm: 0.302033, critic: 11303.388672, critic_grad: 46780.972656, value: 11289.562500, critic_norm: 13.825931, value_mean: 611.162109, advantage: -105.271172
22
actor: -437.671631, actor_grad: 1022.863770, policy mean: 1.098115, policy: -437.966431, entropy: -0.014343, actor_norm: 0.309154, critic: 146133.718750, critic_grad: 83392.906250, value: 146119.062500, critic_norm: 14.653441, value_mean: 391.848053, advantage: -381.114349
31
actor: -540.102356, actor_grad: 1152.977661, policy mean: 1.317163, policy: -540.400024, entropy: -0.015404, actor_norm: 0.313025, critic: 157268.343750, critic_grad: 87457.750000, value: 157253.078125, critic_norm: 15.258300, value_mean: 409.009552, advantage: -394.499756
101
actor: 22.065411, actor_grad: 221.696533, policy mean: 0.341127, policy: 21.723404, entropy: -0.014401, actor_norm: 0.356407, critic: 4842.305176, critic_grad: 25639.669922, value: 4829.208496, critic_norm: 13.096838, value_mean: 469.193420, advantage: 67.666061
101
actor: 33.648987, actor_grad: 112.400284, policy mean: 0.549592, policy: 33.207783, entropy: -0.008743, actor_norm: 0.449947, critic: 5678.639648, critic_grad: 17749.681641, value: 5669.673828, critic_norm: 8.965665, value_mean: 206.191177, advantage: 73.387161
101
actor: 13.757187, actor_grad: 24.223965, policy mean: 0.469015, policy: 13.269398, entropy: -0.007445, actor_norm: 0.495234, critic: 906.482605, critic_grad: 2518.530029, value: 900.598145, critic_norm: 5.884484, value_mean: 48.342384, advantage: 29.851660
101
actor: 2.098786, actor_grad: 31.115110, policy mean: 0.283457, policy: 1.625093, entropy: -0.008255, actor_norm: 0.481949, critic: 47.411167, critic_grad: 406.548035, value: 42.072784, critic_norm: 5.338383, value_mean: 17.684013, advantage: 6.272684
101
actor: -8.408380, actor_grad: 6.989185, policy mean: 1.161138, policy: -8.829139, entropy: -0.008952, actor_norm: 0.429711, critic: 78.450142, critic_grad: 976.836487, value: 72.431900, critic_norm: 6.018243, value_mean: 61.718452, advantage: -8.410179
101
actor: -15.728289, actor_grad: 101.530579, policy mean: 0.618553, policy: -16.105000, entropy: -0.011546, actor_norm: 0.388258, critic: 699.560425, critic_grad: 2963.308105, value: 693.477783, critic_norm: 6.082632, value_mean: 77.544411, advantage: -26.244698
101
actor: -3.140986, actor_grad: 31.694109, policy mean: 0.340495, policy: -3.485538, entropy: -0.015003, actor_norm: 0.359554, critic: 112.654808, critic_grad: 819.761597, value: 106.953491, critic_norm: 5.701315, value_mean: 76.860291, advantage: -10.158926
101
actor: 30.737389, actor_grad: 104.735207, policy mean: 1.063242, policy: 30.425217, entropy: -0.013804, actor_norm: 0.325976, critic: 842.831238, critic_grad: 2132.610352, value: 837.160828, critic_norm: 5.670432, value_mean: 85.527809, advantage: 28.887806
101
actor: -23.953314, actor_grad: 194.504425, policy mean: 0.859657, policy: -24.215143, entropy: -0.014879, actor_norm: 0.276709, critic: 897.790527, critic_grad: 5617.084473, value: 890.602295, critic_norm: 7.188224, value_mean: 187.919373, advantage: -29.339825
101
actor: 10.542507, actor_grad: 28.334145, policy mean: 0.543738, policy: 10.270657, entropy: -0.018884, actor_norm: 0.290734, critic: 326.684052, critic_grad: 2071.489746, value: 320.481140, critic_norm: 6.202909, value_mean: 124.414322, advantage: 17.785442
101
actor: -16.902504, actor_grad: 118.965164, policy mean: 0.414351, policy: -17.163832, entropy: -0.019469, actor_norm: 0.280797, critic: 1879.372681, critic_grad: 7101.099609, value: 1872.212524, critic_norm: 7.160186, value_mean: 183.653336, advantage: -43.194378
68
actor: -584.917664, actor_grad: 0.074842, policy mean: 3.453876, policy: -585.205811, entropy: 0.000000, actor_norm: 0.288147, critic: 30062.556641, critic_grad: 24134.935547, value: 30055.642578, critic_norm: 6.914603, value_mean: 175.810333, advantage: -173.335205
101
actor: 13.978949, actor_grad: 12.564353, policy mean: 0.446348, policy: 13.685738, entropy: -0.016391, actor_norm: 0.309602, critic: 860.280090, critic_grad: 3774.085693, value: 854.132263, critic_norm: 6.147855, value_mean: 122.016739, advantage: 28.823887
101
actor: -9.451083, actor_grad: 9.157967, policy mean: 0.374367, policy: -9.745855, entropy: -0.014270, actor_norm: 0.309042, critic: 789.513977, critic_grad: 3169.627686, value: 784.055115, critic_norm: 5.458879, value_mean: 81.764954, advantage: -27.954966
101
actor: 13.602073, actor_grad: 46.256554, policy mean: 0.446818, policy: 13.318930, entropy: -0.014348, actor_norm: 0.297491, critic: 858.617554, critic_grad: 2529.286621, value: 853.507080, critic_norm: 5.110482, value_mean: 38.780125, advantage: 29.153240
101
actor: -14.290480, actor_grad: 23.582708, policy mean: 0.669960, policy: -14.538223, entropy: -0.023117, actor_norm: 0.270861, critic: 475.207794, critic_grad: 2464.465576, value: 469.637329, critic_norm: 5.570457, value_mean: 90.110428, advantage: -21.665026
101
actor: -9.720354, actor_grad: 40.433437, policy mean: 0.348209, policy: -9.943116, entropy: -0.023548, actor_norm: 0.246311, critic: 857.408508, critic_grad: 2977.431396, value: 851.874023, critic_norm: 5.534472, value_mean: 94.811104, advantage: -29.100147
101
actor: -4.769702, actor_grad: 16.639114, policy mean: 0.446125, policy: -4.978204, entropy: -0.025564, actor_norm: 0.234065, critic: 152.640533, critic_grad: 916.780029, value: 147.440186, critic_norm: 5.200343, value_mean: 87.964676, advantage: -12.062576
101
actor: 11.300014, actor_grad: 9.564530, policy mean: 0.445178, policy: 11.114319, entropy: -0.032822, actor_norm: 0.218516, critic: 628.078735, critic_grad: 1987.051880, value: 622.989136, critic_norm: 5.089595, value_mean: 94.880829, advantage: 24.902157
101
actor: 10.032935, actor_grad: 4.368821, policy mean: 0.456254, policy: 9.866776, entropy: -0.034073, actor_norm: 0.200232, critic: 482.330933, critic_grad: 1590.631836, value: 477.263794, critic_norm: 5.067125, value_mean: 92.399155, advantage: 21.824709
101
actor: 10.424805, actor_grad: 22.632265, policy mean: 0.539059, policy: 10.273687, entropy: -0.037061, actor_norm: 0.188178, critic: 382.566803, critic_grad: 1723.452515, value: 377.252106, critic_norm: 5.314703, value_mean: 107.334671, advantage: 19.368084
101
actor: 6.302427, actor_grad: 17.001083, policy mean: 0.456499, policy: 6.160992, entropy: -0.041049, actor_norm: 0.182484, critic: 182.932175, critic_grad: 965.677490, value: 178.541061, critic_norm: 4.391113, value_mean: 62.896683, advantage: 13.349053
101
actor: 7.427366, actor_grad: 8.684831, policy mean: 0.514480, policy: 7.300395, entropy: -0.039374, actor_norm: 0.166345, critic: 201.947876, critic_grad: 1149.086670, value: 196.830505, critic_norm: 5.117370, value_mean: 92.338242, advantage: 13.792954
101
actor: -12.169066, actor_grad: 15.902982, policy mean: 0.479305, policy: -12.253168, entropy: -0.053949, actor_norm: 0.138051, critic: 654.957886, critic_grad: 2454.495850, value: 649.235962, critic_norm: 5.721927, value_mean: 116.497894, advantage: -25.463238
101
actor: -9.695888, actor_grad: 1.122520, policy mean: 0.561939, policy: -9.775472, entropy: -0.051015, actor_norm: 0.130599, critic: 318.915680, critic_grad: 1384.015869, value: 313.817078, critic_norm: 5.098614, value_mean: 103.664040, advantage: -17.657635
101
actor: 2.829544, actor_grad: 1.707703, policy mean: 0.567291, policy: 2.743743, entropy: -0.046560, actor_norm: 0.132361, critic: 28.311157, critic_grad: 320.927307, value: 23.690989, critic_norm: 4.620169, value_mean: 85.912743, advantage: 4.684126
101
actor: 16.695097, actor_grad: 18.693504, policy mean: 0.799105, policy: 16.609272, entropy: -0.042691, actor_norm: 0.128516, critic: 394.919647, critic_grad: 1710.099243, value: 390.068420, critic_norm: 4.851232, value_mean: 99.104042, advantage: 19.584591
101
actor: 8.990426, actor_grad: 5.147857, policy mean: 0.448323, policy: 8.916941, entropy: -0.052100, actor_norm: 0.125586, critic: 394.979187, critic_grad: 1630.862671, value: 390.297729, critic_norm: 4.681468, value_mean: 95.221619, advantage: 19.707094
101
actor: 11.315797, actor_grad: 8.031973, policy mean: 0.774769, policy: 11.221777, entropy: -0.032875, actor_norm: 0.126895, critic: 224.814056, critic_grad: 1728.063354, value: 219.382278, critic_norm: 5.431773, value_mean: 116.719719, advantage: 14.643578
101
actor: -15.260838, actor_grad: 12.777861, policy mean: 0.540765, policy: -15.337757, entropy: -0.049221, actor_norm: 0.126140, critic: 822.369141, critic_grad: 2825.273193, value: 816.766968, critic_norm: 5.602198, value_mean: 122.755653, advantage: -28.556309
101
actor: 0.719130, actor_grad: 1.854391, policy mean: 0.908610, policy: 0.589721, entropy: -0.024467, actor_norm: 0.153876, critic: 6.996808, critic_grad: 96.916931, value: 2.435839, critic_norm: 4.560968, value_mean: 78.977722, advantage: 1.027579
101
actor: 8.687311, actor_grad: 2.769086, policy mean: 0.466085, policy: 8.579222, entropy: -0.044225, actor_norm: 0.152314, critic: 345.158905, critic_grad: 1318.343506, value: 340.516693, critic_norm: 4.642210, value_mean: 88.930435, advantage: 18.439636
101
actor: 3.968344, actor_grad: 1.217564, policy mean: 0.401365, policy: 3.867047, entropy: -0.046254, actor_norm: 0.147551, critic: 98.231613, critic_grad: 858.318237, value: 93.169807, critic_norm: 5.061804, value_mean: 105.134331, advantage: 9.546415
101
actor: -8.404016, actor_grad: 12.122342, policy mean: 0.489051, policy: -8.498817, entropy: -0.045732, actor_norm: 0.140533, critic: 304.584045, critic_grad: 1475.611694, value: 299.126984, critic_norm: 5.457062, value_mean: 117.150887, advantage: -17.288103
101
actor: -9.231917, actor_grad: 2.498319, policy mean: 0.494752, policy: -9.316784, entropy: -0.048607, actor_norm: 0.133474, critic: 371.626404, critic_grad: 1634.610718, value: 366.692108, critic_norm: 4.934289, value_mean: 105.158554, advantage: -19.007265
101
actor: 6.822972, actor_grad: 4.445591, policy mean: 0.610396, policy: 6.734065, entropy: -0.045464, actor_norm: 0.134371, critic: 127.069466, critic_grad: 768.881226, value: 122.673004, critic_norm: 4.396461, value_mean: 82.598083, advantage: 11.072670
101
actor: 7.382467, actor_grad: 7.828339, policy mean: 0.419147, policy: 7.302954, entropy: -0.049826, actor_norm: 0.129339, critic: 317.361450, critic_grad: 1615.186646, value: 312.413147, critic_norm: 4.948302, value_mean: 102.798996, advantage: 17.555191
101
actor: -10.165168, actor_grad: 4.252459, policy mean: 0.500468, policy: -10.232757, entropy: -0.052214, actor_norm: 0.119802, critic: 424.962799, critic_grad: 1852.411133, value: 419.497192, critic_norm: 5.465614, value_mean: 119.040787, advantage: -20.454081
101
actor: -7.148557, actor_grad: 13.519554, policy mean: 0.622410, policy: -7.255518, entropy: -0.035009, actor_norm: 0.141970, critic: 163.740509, critic_grad: 1230.185303, value: 158.990448, critic_norm: 4.750054, value_mean: 103.490479, advantage: -12.407654
101
actor: 6.874349, actor_grad: 5.632699, policy mean: 0.370329, policy: 6.780310, entropy: -0.048111, actor_norm: 0.142149, critic: 335.118835, critic_grad: 1442.816162, value: 330.586914, critic_norm: 4.531919, value_mean: 92.573471, advantage: 18.141624
101
actor: 19.003462, actor_grad: 66.404945, policy mean: 0.587303, policy: 18.903412, entropy: -0.037947, actor_norm: 0.137996, critic: 1010.997375, critic_grad: 3784.846191, value: 1006.262451, critic_norm: 4.734920, value_mean: 94.299973, advantage: 31.572783
101
actor: -5.825075, actor_grad: 2.178021, policy mean: 0.444830, policy: -5.913502, entropy: -0.044347, actor_norm: 0.132774, critic: 182.170837, critic_grad: 1292.208618, value: 176.808197, critic_norm: 5.362634, value_mean: 118.544174, advantage: -13.294914
101
actor: -5.860729, actor_grad: 12.368380, policy mean: 0.647532, policy: -5.967616, entropy: -0.037876, actor_norm: 0.144764, critic: 83.617325, critic_grad: 686.559998, value: 79.067612, critic_norm: 4.549710, value_mean: 95.337753, advantage: -8.820921
101
actor: 12.482152, actor_grad: 17.584959, policy mean: 0.684692, policy: 12.379450, entropy: -0.037544, actor_norm: 0.140247, critic: 324.896088, critic_grad: 1191.568848, value: 320.646301, critic_norm: 4.249799, value_mean: 82.742691, advantage: 17.891504
101
actor: 11.900661, actor_grad: 11.038363, policy mean: 0.881969, policy: 11.802338, entropy: -0.036490, actor_norm: 0.134814, critic: 190.405151, critic_grad: 1126.034058, value: 185.529694, critic_norm: 4.875453, value_mean: 101.374283, advantage: 13.510031
101
actor: -4.972505, actor_grad: 11.173178, policy mean: 0.663025, policy: -5.061651, entropy: -0.040853, actor_norm: 0.129999, critic: 67.379066, critic_grad: 726.998169, value: 61.989353, critic_norm: 5.389716, value_mean: 111.046173, advantage: -7.729160
101
actor: -11.710653, actor_grad: 1.875729, policy mean: 0.531795, policy: -11.784218, entropy: -0.048006, actor_norm: 0.121570, critic: 490.463501, critic_grad: 2082.519531, value: 485.470978, critic_norm: 4.992534, value_mean: 108.151031, advantage: -21.938776
101
actor: 2.321586, actor_grad: 2.137500, policy mean: 0.529686, policy: 2.247038, entropy: -0.047299, actor_norm: 0.121848, critic: 22.302383, critic_grad: 342.125946, value: 17.966219, critic_norm: 4.336164, value_mean: 84.657043, advantage: 4.211578
101
actor: 8.691428, actor_grad: 3.353464, policy mean: 0.474930, policy: 8.618256, entropy: -0.049815, actor_norm: 0.122988, critic: 336.926636, critic_grad: 1355.848389, value: 332.273254, critic_norm: 4.653380, value_mean: 94.868721, advantage: 18.166592
101
actor: -5.088741, actor_grad: 8.066332, policy mean: 0.611475, policy: -5.169854, entropy: -0.045175, actor_norm: 0.126289, critic: 79.049835, critic_grad: 953.221375, value: 73.723343, critic_norm: 5.326491, value_mean: 119.134338, advantage: -8.533030
101
actor: -15.807294, actor_grad: 27.557898, policy mean: 0.758652, policy: -15.895271, entropy: -0.036861, actor_norm: 0.124838, critic: 474.976166, critic_grad: 2359.385010, value: 469.935486, critic_norm: 5.040679, value_mean: 115.842041, advantage: -21.550495
101
actor: 1.345298, actor_grad: 2.991960, policy mean: 0.531655, policy: 1.261947, entropy: -0.040601, actor_norm: 0.123952, critic: 11.438974, critic_grad: 277.583374, value: 6.884708, critic_norm: 4.554266, value_mean: 97.645157, advantage: 2.512067
101
actor: 11.919450, actor_grad: 27.229219, policy mean: 0.564272, policy: 11.845144, entropy: -0.047324, actor_norm: 0.121629, critic: 453.779846, critic_grad: 2128.740234, value: 449.209869, critic_norm: 4.569980, value_mean: 91.981277, advantage: 21.126379
101
actor: -6.654911, actor_grad: 8.404669, policy mean: 0.491858, policy: -6.719070, entropy: -0.049711, actor_norm: 0.113872, critic: 191.747086, critic_grad: 1911.937744, value: 186.124619, critic_norm: 5.622463, value_mean: 134.706146, advantage: -13.637634
101
actor: -15.190693, actor_grad: 18.923916, policy mean: 0.583886, policy: -15.261807, entropy: -0.045068, actor_norm: 0.116183, critic: 709.571960, critic_grad: 3674.678467, value: 704.311523, critic_norm: 5.260424, value_mean: 121.459671, advantage: -26.497473
101
actor: 9.315869, actor_grad: 25.305998, policy mean: 0.741978, policy: 9.227240, entropy: -0.032195, actor_norm: 0.120825, critic: 160.952606, critic_grad: 1330.021118, value: 156.578247, critic_norm: 4.374367, value_mean: 74.063568, advantage: 12.462994
101
actor: 4.378722, actor_grad: 1.983220, policy mean: 0.450006, policy: 4.312123, entropy: -0.052733, actor_norm: 0.119333, critic: 98.147797, critic_grad: 725.702759, value: 93.426926, critic_norm: 4.720875, value_mean: 94.659103, advantage: 9.612188
101
actor: -7.997967, actor_grad: 13.346413, policy mean: 0.657163, policy: -8.078543, entropy: -0.041495, actor_norm: 0.122071, critic: 157.812607, critic_grad: 1113.138916, value: 152.406036, critic_norm: 5.406569, value_mean: 112.611649, advantage: -12.334375
101
actor: -7.301679, actor_grad: 1.911255, policy mean: 0.496207, policy: -7.364023, entropy: -0.054339, actor_norm: 0.116683, critic: 228.966293, critic_grad: 1216.237061, value: 223.871155, critic_norm: 5.095143, value_mean: 107.125549, advantage: -14.913502
101
actor: 0.631628, actor_grad: 0.105379, policy mean: 0.509490, policy: 0.566637, entropy: -0.053313, actor_norm: 0.118304, critic: 7.934120, critic_grad: 90.143982, value: 3.348818, critic_norm: 4.585303, value_mean: 95.979012, advantage: 1.032144
101
actor: 13.794489, actor_grad: 9.273847, policy mean: 0.687321, policy: 13.715608, entropy: -0.044054, actor_norm: 0.122935, critic: 409.604523, critic_grad: 1441.561890, value: 405.126221, critic_norm: 4.478314, value_mean: 89.282272, advantage: 20.112658
101
actor: 8.684244, actor_grad: 1.790618, policy mean: 0.510978, policy: 8.623693, entropy: -0.054630, actor_norm: 0.115182, critic: 290.828033, critic_grad: 1767.054688, value: 285.985321, critic_norm: 4.842713, value_mean: 106.088135, advantage: 16.758018
101
actor: -2.660032, actor_grad: 1.345795, policy mean: 0.532273, policy: -2.715121, entropy: -0.054862, actor_norm: 0.109951, critic: 32.138149, critic_grad: 609.847961, value: 26.823847, critic_norm: 5.314303, value_mean: 120.949158, advantage: -5.074374
101
actor: 1.524274, actor_grad: 0.438978, policy mean: 0.556234, policy: 1.460035, entropy: -0.050116, actor_norm: 0.114356, critic: 12.445165, critic_grad: 216.865646, value: 7.513114, critic_norm: 4.932051, value_mean: 102.979248, advantage: 2.679695
101
actor: -5.018832, actor_grad: 2.129527, policy mean: 0.570035, policy: -5.072963, entropy: -0.052978, actor_norm: 0.107109, critic: 85.186844, critic_grad: 945.202759, value: 79.772423, critic_norm: 5.414419, value_mean: 120.570580, advantage: -8.909348
101
actor: -16.341656, actor_grad: 25.704315, policy mean: 0.629144, policy: -16.412865, entropy: -0.043532, actor_norm: 0.114740, critic: 681.879700, critic_grad: 3706.012695, value: 676.766174, critic_norm: 5.113544, value_mean: 114.312241, advantage: -25.937977
73
actor: -79.524521, actor_grad: 125.013000, policy mean: 0.825611, policy: -79.630028, entropy: -0.017957, actor_norm: 0.123465, critic: 9422.858398, critic_grad: 8070.571289, value: 9417.980469, critic_norm: 4.877597, value_mean: 101.871300, advantage: -97.002335
101
actor: -7.212020, actor_grad: 2.079042, policy mean: 0.443331, policy: -7.272482, entropy: -0.053025, actor_norm: 0.113487, critic: 271.549988, critic_grad: 2441.307129, value: 266.255859, critic_norm: 5.294121, value_mean: 117.912262, advantage: -16.312115
101
actor: -6.446980, actor_grad: 1.283534, policy mean: 0.466806, policy: -6.503526, entropy: -0.054904, actor_norm: 0.111450, critic: 203.499496, critic_grad: 1193.932983, value: 198.604691, critic_norm: 4.894806, value_mean: 102.048851, advantage: -14.060255
101
actor: 5.717620, actor_grad: 0.836126, policy mean: 0.483944, policy: 5.659346, entropy: -0.053212, actor_norm: 0.111486, critic: 141.548538, critic_grad: 811.092957, value: 137.201172, critic_norm: 4.347372, value_mean: 85.409683, advantage: 11.712107
101
actor: 10.112402, actor_grad: 3.929650, policy mean: 0.547018, policy: 10.053308, entropy: -0.052871, actor_norm: 0.111964, critic: 330.758270, critic_grad: 1583.837646, value: 325.915161, critic_norm: 4.843124, value_mean: 92.607780, advantage: 17.962399
101
actor: 1.672513, actor_grad: 1.194865, policy mean: 0.569021, policy: 1.603575, entropy: -0.047485, actor_norm: 0.116424, critic: 14.219565, critic_grad: 273.656036, value: 9.000255, critic_norm: 5.219311, value_mean: 108.983971, advantage: 2.825906
101
actor: -9.232861, actor_grad: 3.841300, policy mean: 0.528390, policy: -9.294943, entropy: -0.049728, actor_norm: 0.111811, critic: 318.544617, critic_grad: 1511.340942, value: 313.114349, critic_norm: 5.430270, value_mean: 111.839592, advantage: -17.669933
101
actor: 0.600833, actor_grad: 0.322108, policy mean: 0.491566, policy: 0.548175, entropy: -0.055249, actor_norm: 0.107907, critic: 7.330021, critic_grad: 81.062973, value: 2.815267, critic_norm: 4.514754, value_mean: 88.595497, advantage: 1.125576
101
actor: 10.734995, actor_grad: 12.642069, policy mean: 0.591189, policy: 10.659560, entropy: -0.039592, actor_norm: 0.115027, critic: 318.455017, critic_grad: 1322.695923, value: 313.967377, critic_norm: 4.487634, value_mean: 85.896416, advantage: 17.556349
101
actor: 3.709330, actor_grad: 1.769659, policy mean: 0.441522, policy: 3.654195, entropy: -0.056162, actor_norm: 0.111297, critic: 73.932182, critic_grad: 668.188171, value: 68.957565, critic_norm: 4.974620, value_mean: 100.753532, advantage: 8.235382
101
actor: -8.135770, actor_grad: 5.210782, policy mean: 0.499993, policy: -8.188283, entropy: -0.055762, actor_norm: 0.108275, critic: 272.365936, critic_grad: 1971.269043, value: 266.845856, critic_norm: 5.520070, value_mean: 116.655075, advantage: -16.325788
101
actor: -9.023632, actor_grad: 1.576475, policy mean: 0.507802, policy: -9.064529, entropy: -0.057817, actor_norm: 0.098714, critic: 324.124817, critic_grad: 1425.973877, value: 319.023590, critic_norm: 5.101234, value_mean: 103.825813, advantage: -17.782606
101
actor: 3.956154, actor_grad: 0.555991, policy mean: 0.498110, policy: 3.916116, entropy: -0.056296, actor_norm: 0.096334, critic: 66.262650, critic_grad: 615.037659, value: 61.584290, critic_norm: 4.678361, value_mean: 92.518997, advantage: 7.838588
101
actor: 8.731118, actor_grad: 1.875101, policy mean: 0.512472, policy: 8.692942, entropy: -0.058501, actor_norm: 0.096677, critic: 292.435883, critic_grad: 1223.194458, value: 287.787170, critic_norm: 4.648727, value_mean: 90.084076, advantage: 16.959105
101
actor: 1.601392, actor_grad: 0.455326, policy mean: 0.554831, policy: 1.566839, entropy: -0.058054, actor_norm: 0.092606, critic: 16.695383, critic_grad: 231.901291, value: 11.534754, critic_norm: 5.160629, value_mean: 105.129959, advantage: 2.881481
101
actor: -7.720949, actor_grad: 2.935204, policy mean: 0.535573, policy: -7.771261, entropy: -0.052620, actor_norm: 0.102932, critic: 216.011551, critic_grad: 1293.067627, value: 210.677948, critic_norm: 5.333610, value_mean: 114.479416, advantage: -14.381641
101
actor: -7.265261, actor_grad: 2.854798, policy mean: 0.480210, policy: -7.310555, entropy: -0.058384, actor_norm: 0.103677, critic: 239.844666, critic_grad: 1430.307007, value: 234.783112, critic_norm: 5.061547, value_mean: 98.832367, advantage: -15.183001
101
actor: 0.685953, actor_grad: 0.140522, policy mean: 0.490483, policy: 0.630288, entropy: -0.054373, actor_norm: 0.110038, critic: 7.502091, critic_grad: 102.233162, value: 2.786472, critic_norm: 4.715619, value_mean: 94.262756, advantage: 1.395072
101
actor: 7.201052, actor_grad: 1.831089, policy mean: 0.521397, policy: 7.142731, entropy: -0.053370, actor_norm: 0.111691, critic: 191.141937, critic_grad: 1002.815674, value: 186.495117, critic_norm: 4.646813, value_mean: 92.196259, advantage: 13.610312
101
actor: 10.236794, actor_grad: 17.455469, policy mean: 0.758597, policy: 10.171088, entropy: -0.041863, actor_norm: 0.107570, critic: 177.664078, critic_grad: 1476.216187, value: 172.581879, critic_norm: 5.082192, value_mean: 103.946960, advantage: 12.778534
101
actor: -11.101645, actor_grad: 11.547631, policy mean: 0.487471, policy: -11.157341, entropy: -0.055804, actor_norm: 0.111500, critic: 533.664001, critic_grad: 2798.160889, value: 528.377319, critic_norm: 5.286681, value_mean: 115.641754, advantage: -22.968863
101
actor: 0.188708, actor_grad: 0.224671, policy mean: 0.553486, policy: 0.130977, entropy: -0.051792, actor_norm: 0.109523, critic: 4.861259, critic_grad: 19.713392, value: 0.376328, critic_norm: 4.484931, value_mean: 85.608841, advantage: 0.274606
101
actor: 9.095736, actor_grad: 0.647946, policy mean: 0.509800, policy: 9.043911, entropy: -0.055236, actor_norm: 0.107060, critic: 321.907501, critic_grad: 1308.534058, value: 317.509583, critic_norm: 4.397933, value_mean: 80.856064, advantage: 17.797516
101
actor: 2.984119, actor_grad: 1.532208, policy mean: 0.526505, policy: 2.931863, entropy: -0.052443, actor_norm: 0.104699, critic: 39.866013, critic_grad: 450.099152, value: 34.869419, critic_norm: 4.996595, value_mean: 98.982597, advantage: 5.717552
101
actor: -9.759002, actor_grad: 4.267609, policy mean: 0.552708, policy: -9.799633, entropy: -0.055710, actor_norm: 0.096341, critic: 319.725647, critic_grad: 1503.026733, value: 314.235596, critic_norm: 5.490058, value_mean: 114.317657, advantage: -17.689892
101
actor: -0.297818, actor_grad: 0.338169, policy mean: 0.550378, policy: -0.348567, entropy: -0.052047, actor_norm: 0.102796, critic: 6.415294, critic_grad: 53.387848, value: 1.609171, critic_norm: 4.806122, value_mean: 96.888039, advantage: -0.676516
101
actor: 9.843191, actor_grad: 1.172831, policy mean: 0.569208, policy: 9.792437, entropy: -0.051938, actor_norm: 0.102692, critic: 299.917297, critic_grad: 1217.028931, value: 295.439941, critic_norm: 4.477362, value_mean: 88.201828, advantage: 17.187994
101
actor: 9.083299, actor_grad: 5.628703, policy mean: 0.603494, policy: 9.038629, entropy: -0.052487, actor_norm: 0.097158, critic: 231.539429, critic_grad: 1373.108398, value: 226.380859, critic_norm: 5.158572, value_mean: 110.424438, advantage: 14.991093
101
actor: 4.624418, actor_grad: 0.375423, policy mean: 0.530226, policy: 4.578602, entropy: -0.053531, actor_norm: 0.099347, critic: 82.771179, critic_grad: 794.162048, value: 77.704086, critic_norm: 5.067091, value_mean: 110.083015, advantage: 8.547679
101
actor: 3.182430, actor_grad: 0.983039, policy mean: 0.444040, policy: 3.140233, entropy: -0.059478, actor_norm: 0.101674, critic: 55.690956, critic_grad: 732.140991, value: 50.647861, critic_norm: 5.043095, value_mean: 109.941589, advantage: 6.976587
101
actor: -9.070807, actor_grad: 3.594315, policy mean: 0.568153, policy: -9.123251, entropy: -0.050239, actor_norm: 0.102683, critic: 270.094696, critic_grad: 1947.323730, value: 264.693115, critic_norm: 5.401584, value_mean: 116.771881, advantage: -16.251753
101
actor: -6.518206, actor_grad: 3.570168, policy mean: 0.418470, policy: -6.571069, entropy: -0.055505, actor_norm: 0.108369, critic: 254.525375, critic_grad: 1345.441040, value: 249.142136, critic_norm: 5.383238, value_mean: 112.649849, advantage: -15.774463
101
actor: -8.264601, actor_grad: 2.411958, policy mean: 0.502010, policy: -8.312587, entropy: -0.055424, actor_norm: 0.103410, critic: 277.256104, critic_grad: 1341.460449, value: 272.290222, critic_norm: 4.965894, value_mean: 101.535873, advantage: -16.419920
101
actor: 9.058147, actor_grad: 2.586036, policy mean: 0.643657, policy: 9.005585, entropy: -0.048668, actor_norm: 0.101230, critic: 197.161896, critic_grad: 1070.108276, value: 192.618225, critic_norm: 4.543664, value_mean: 90.123856, advantage: 13.860382
101
actor: 9.706630, actor_grad: 6.419506, policy mean: 0.568652, policy: 9.657671, entropy: -0.051534, actor_norm: 0.100493, critic: 287.834534, critic_grad: 1364.870728, value: 283.106049, critic_norm: 4.728481, value_mean: 95.828827, advantage: 16.764156
101
actor: 0.644973, actor_grad: 0.733407, policy mean: 0.671116, policy: 0.589643, entropy: -0.043810, actor_norm: 0.099140, critic: 6.861007, critic_grad: 57.178963, value: 1.714375, critic_norm: 5.146632, value_mean: 107.064590, advantage: 0.723555
101
actor: -10.772922, actor_grad: 1.055518, policy mean: 0.577894, policy: -10.815399, entropy: -0.054036, actor_norm: 0.096514, critic: 356.724640, critic_grad: 1779.055908, value: 351.077576, critic_norm: 5.647062, value_mean: 121.565384, advantage: -18.735832
101
actor: -9.367721, actor_grad: 7.350483, policy mean: 0.522214, policy: -9.413858, entropy: -0.054229, actor_norm: 0.100366, critic: 337.990601, critic_grad: 1920.849731, value: 332.895355, critic_norm: 5.095260, value_mean: 106.995193, advantage: -18.115128
101
actor: -3.297878, actor_grad: 0.958938, policy mean: 0.517595, policy: -3.347779, entropy: -0.053130, actor_norm: 0.103031, critic: 48.111530, critic_grad: 671.721130, value: 43.060795, critic_norm: 5.050737, value_mean: 107.525360, advantage: -6.469666
101
actor: 1.837516, actor_grad: 0.588105, policy mean: 0.491356, policy: 1.789608, entropy: -0.053894, actor_norm: 0.101802, critic: 18.658087, critic_grad: 296.682312, value: 14.141497, critic_norm: 4.516590, value_mean: 92.484978, advantage: 3.697641
101
actor: 9.791929, actor_grad: 1.777530, policy mean: 0.577870, policy: 9.735371, entropy: -0.048608, actor_norm: 0.105166, critic: 286.491058, critic_grad: 1453.391479, value: 281.842102, critic_norm: 4.648943, value_mean: 93.727737, advantage: 16.762482
101
actor: 10.899398, actor_grad: 9.516816, policy mean: 0.625053, policy: 10.837717, entropy: -0.045349, actor_norm: 0.107030, critic: 317.731201, critic_grad: 1558.381348, value: 313.035278, critic_norm: 4.695918, value_mean: 92.867447, advantage: 17.644320
101
actor: 4.452626, actor_grad: 0.916545, policy mean: 0.455378, policy: 4.404934, entropy: -0.055492, actor_norm: 0.103184, critic: 102.030815, critic_grad: 779.466736, value: 97.076385, critic_norm: 4.954432, value_mean: 101.539665, advantage: 9.789660
101
actor: -5.843766, actor_grad: 0.503164, policy mean: 0.520505, policy: -5.885283, entropy: -0.054709, actor_norm: 0.096227, critic: 131.106873, critic_grad: 1195.266357, value: 125.688515, critic_norm: 5.418365, value_mean: 112.572205, advantage: -11.177339
101
actor: -11.661551, actor_grad: 15.977840, policy mean: 0.674394, policy: -11.701975, entropy: -0.049754, actor_norm: 0.090179, critic: 298.704712, critic_grad: 1830.359985, value: 293.491638, critic_norm: 5.213060, value_mean: 102.593201, advantage: -17.095863
101
actor: -3.117845, actor_grad: 2.031254, policy mean: 0.542983, policy: -3.158151, entropy: -0.053263, actor_norm: 0.093570, critic: 39.122490, critic_grad: 486.261444, value: 34.417908, critic_norm: 4.704580, value_mean: 89.835609, advantage: -5.729041
101
actor: 4.693339, actor_grad: 4.480987, policy mean: 0.596118, policy: 4.647034, entropy: -0.048422, actor_norm: 0.094727, critic: 65.834824, critic_grad: 557.805115, value: 61.398315, critic_norm: 4.436512, value_mean: 88.348808, advantage: 7.809125
101
actor: 9.263674, actor_grad: 1.207412, policy mean: 0.512497, policy: 9.226924, entropy: -0.056016, actor_norm: 0.092766, critic: 331.067566, critic_grad: 1352.167114, value: 326.534729, critic_norm: 4.532825, value_mean: 87.016739, advantage: 18.048634
101
actor: 4.578530, actor_grad: 3.948519, policy mean: 0.619462, policy: 4.539257, entropy: -0.054136, actor_norm: 0.093409, critic: 52.548561, critic_grad: 526.314575, value: 47.316139, critic_norm: 5.232422, value_mean: 106.778015, advantage: 6.508345
101
actor: -9.470442, actor_grad: 4.080075, policy mean: 0.549661, policy: -9.510005, entropy: -0.052991, actor_norm: 0.092554, critic: 304.500854, critic_grad: 1496.781494, value: 298.877380, critic_norm: 5.623471, value_mean: 116.567886, advantage: -17.282845
101
actor: -6.213052, actor_grad: 0.574172, policy mean: 0.531425, policy: -6.256931, entropy: -0.052840, actor_norm: 0.096719, critic: 148.137955, critic_grad: 1273.516724, value: 143.214813, critic_norm: 4.923135, value_mean: 108.291031, advantage: -11.843402
101
actor: 7.138269, actor_grad: 8.916135, policy mean: 0.656750, policy: 7.076962, entropy: -0.046157, actor_norm: 0.107465, critic: 120.676628, critic_grad: 1133.317627, value: 116.307266, critic_norm: 4.369361, value_mean: 88.622513, advantage: 10.782387
101
actor: 7.297424, actor_grad: 2.554187, policy mean: 0.503193, policy: 7.240722, entropy: -0.051059, actor_norm: 0.107761, critic: 215.450790, critic_grad: 1098.833008, value: 210.520782, critic_norm: 4.930012, value_mean: 99.348312, advantage: 14.388645
101
actor: 1.168043, actor_grad: 0.957018, policy mean: 0.487426, policy: 1.122677, entropy: -0.055472, actor_norm: 0.100838, critic: 12.037079, critic_grad: 190.974426, value: 6.738182, critic_norm: 5.298897, value_mean: 109.547302, advantage: 2.269860
101
actor: -11.278625, actor_grad: 18.364220, policy mean: 0.493451, policy: -11.350201, entropy: -0.043513, actor_norm: 0.115089, critic: 512.394836, critic_grad: 2514.374512, value: 506.825378, critic_norm: 5.569453, value_mean: 124.900009, advantage: -22.431343
101
actor: -6.527784, actor_grad: 10.627492, policy mean: 0.486539, policy: -6.602465, entropy: -0.045817, actor_norm: 0.120498, critic: 188.308350, critic_grad: 1230.883789, value: 183.316895, critic_norm: 4.991462, value_mean: 106.066254, advantage: -13.457752
101
actor: 5.755942, actor_grad: 1.750374, policy mean: 0.583625, policy: 5.677742, entropy: -0.043234, actor_norm: 0.121434, critic: 95.550415, critic_grad: 711.267029, value: 90.983604, critic_norm: 4.566810, value_mean: 89.195534, advantage: 9.504791
101
actor: 12.688704, actor_grad: 24.007868, policy mean: 0.649367, policy: 12.610396, entropy: -0.038175, actor_norm: 0.116483, critic: 372.750031, critic_grad: 1966.643921, value: 367.890259, critic_norm: 4.859779, value_mean: 90.757019, advantage: 18.964481
101
actor: -9.256888, actor_grad: 9.202156, policy mean: 0.562215, policy: -9.332025, entropy: -0.042808, actor_norm: 0.117945, critic: 283.132019, critic_grad: 1827.890625, value: 277.766907, critic_norm: 5.365103, value_mean: 105.903152, advantage: -16.643562
101
actor: -6.281193, actor_grad: 7.513130, policy mean: 0.473593, policy: -6.342237, entropy: -0.050084, actor_norm: 0.111128, critic: 183.803955, critic_grad: 1156.486938, value: 179.012451, critic_norm: 4.791510, value_mean: 92.354080, advantage: -13.344376
101
actor: 5.397619, actor_grad: 1.359295, policy mean: 0.555069, policy: 5.341951, entropy: -0.049886, actor_norm: 0.105554, critic: 97.373291, critic_grad: 677.298157, value: 92.982834, critic_norm: 4.390460, value_mean: 83.673988, advantage: 9.641623
101
actor: 5.678939, actor_grad: 0.840937, policy mean: 0.498599, policy: 5.643462, entropy: -0.057443, actor_norm: 0.092920, critic: 138.777328, critic_grad: 973.992859, value: 133.689209, critic_norm: 5.088123, value_mean: 107.473892, advantage: 11.487742
101
actor: 7.872386, actor_grad: 11.975966, policy mean: 0.525292, policy: 7.809073, entropy: -0.046929, actor_norm: 0.110241, critic: 235.024658, critic_grad: 1171.821899, value: 230.391357, critic_norm: 4.633304, value_mean: 94.838120, advantage: 15.079733
101
actor: -5.516291, actor_grad: 6.835497, policy mean: 0.631331, policy: -5.577321, entropy: -0.047394, actor_norm: 0.108424, critic: 87.414665, critic_grad: 875.333374, value: 81.772530, critic_norm: 5.642135, value_mean: 123.121094, advantage: -9.015134
101
actor: -8.088454, actor_grad: 2.970803, policy mean: 0.431757, policy: -8.143742, entropy: -0.053812, actor_norm: 0.109099, critic: 366.649567, critic_grad: 1808.646484, value: 361.257690, critic_norm: 5.391866, value_mean: 116.115570, advantage: -18.965538
101
actor: -5.410418, actor_grad: 1.811881, policy mean: 0.512883, policy: -5.468796, entropy: -0.049289, actor_norm: 0.107666, critic: 122.912201, critic_grad: 940.520935, value: 117.790054, critic_norm: 5.122149, value_mean: 108.192322, advantage: -10.707300
101
actor: 5.453992, actor_grad: 3.723613, policy mean: 0.529778, policy: 5.391283, entropy: -0.048018, actor_norm: 0.110727, critic: 110.651215, critic_grad: 1117.554199, value: 105.958221, critic_norm: 4.692992, value_mean: 93.365158, advantage: 10.236877
101
actor: 9.344950, actor_grad: 5.109939, policy mean: 0.458637, policy: 9.282167, entropy: -0.049477, actor_norm: 0.112259, critic: 407.656891, critic_grad: 1977.575439, value: 403.050568, critic_norm: 4.606337, value_mean: 93.043823, advantage: 20.044308
101
actor: 8.109776, actor_grad: 16.367510, policy mean: 0.608727, policy: 8.047762, entropy: -0.047077, actor_norm: 0.109092, critic: 173.829498, critic_grad: 1179.782227, value: 169.049301, critic_norm: 4.780191, value_mean: 97.240013, advantage: 12.946995
101
actor: 8.960710, actor_grad: 9.597458, policy mean: 0.622356, policy: 8.893592, entropy: -0.044864, actor_norm: 0.111982, critic: 211.015823, critic_grad: 1518.405884, value: 206.292557, critic_norm: 4.723262, value_mean: 95.330750, advantage: 14.314437
101
actor: 3.307031, actor_grad: 0.546515, policy mean: 0.504922, policy: 3.246367, entropy: -0.048761, actor_norm: 0.109425, critic: 47.441761, critic_grad: 518.113281, value: 42.516525, critic_norm: 4.925237, value_mean: 99.404190, advantage: 6.470678
101
actor: -5.150590, actor_grad: 7.498361, policy mean: 0.600254, policy: -5.207973, entropy: -0.044695, actor_norm: 0.102078, critic: 80.117035, critic_grad: 746.984497, value: 74.839920, critic_norm: 5.277114, value_mean: 104.567856, advantage: -8.580860
78
actor: -97.343094, actor_grad: 124.407036, policy mean: 0.981731, policy: -97.413025, entropy: -0.028879, actor_norm: 0.098811, critic: 10443.105469, critic_grad: 8922.937500, value: 10437.939453, critic_norm: 5.166090, value_mean: 109.199257, advantage: -102.014717
101
actor: 9.136154, actor_grad: 2.077657, policy mean: 0.455148, policy: 9.087137, entropy: -0.055239, actor_norm: 0.104257, critic: 412.453766, critic_grad: 2018.578979, value: 407.445679, critic_norm: 5.008087, value_mean: 110.305305, advantage: 20.103537
101
actor: -2.644215, actor_grad: 0.903199, policy mean: 0.700439, policy: -2.703433, entropy: -0.045901, actor_norm: 0.105119, critic: 25.417568, critic_grad: 443.258057, value: 19.930431, critic_norm: 5.487137, value_mean: 125.365601, advantage: -4.263722
101
actor: -5.664618, actor_grad: 1.346269, policy mean: 0.517370, policy: -5.726412, entropy: -0.050469, actor_norm: 0.112263, critic: 128.852081, critic_grad: 1231.649048, value: 123.331009, critic_norm: 5.521080, value_mean: 132.943115, advantage: -10.855722
101
actor: 9.395804, actor_grad: 13.235228, policy mean: 0.475930, policy: 9.315229, entropy: -0.044817, actor_norm: 0.125392, critic: 386.032043, critic_grad: 3138.924316, value: 380.575226, critic_norm: 5.456809, value_mean: 146.943008, advantage: 19.493019
101
actor: -7.814356, actor_grad: 13.660331, policy mean: 0.618873, policy: -7.900954, entropy: -0.043647, actor_norm: 0.130246, critic: 173.713806, critic_grad: 1475.224487, value: 167.828400, critic_norm: 5.885407, value_mean: 139.368408, advantage: -12.884724
101
actor: -8.094110, actor_grad: 14.678533, policy mean: 0.565185, policy: -8.203942, entropy: -0.038587, actor_norm: 0.148420, critic: 212.657104, critic_grad: 1607.097778, value: 206.903412, critic_norm: 5.753696, value_mean: 138.078201, advantage: -14.331230
101
actor: -0.590330, actor_grad: 0.606306, policy mean: 0.356240, policy: -0.690758, entropy: -0.045740, actor_norm: 0.146168, critic: 8.777048, critic_grad: 146.022827, value: 3.987416, critic_norm: 4.789632, value_mean: 98.682571, advantage: -1.811296
101
actor: 6.374217, actor_grad: 6.675660, policy mean: 0.586933, policy: 6.263215, entropy: -0.034908, actor_norm: 0.145910, critic: 113.940720, critic_grad: 820.115906, value: 109.418549, critic_norm: 4.522172, value_mean: 92.900391, advantage: 10.420044
101
actor: 1.432538, actor_grad: 2.650317, policy mean: 0.383901, policy: 1.329882, entropy: -0.042541, actor_norm: 0.145197, critic: 20.930286, critic_grad: 341.656525, value: 16.095577, critic_norm: 4.834708, value_mean: 89.523834, advantage: 3.281650
101
actor: -2.485108, actor_grad: 5.418599, policy mean: 0.433964, policy: -2.574131, entropy: -0.045541, actor_norm: 0.134564, critic: 46.943146, critic_grad: 642.068420, value: 42.208572, critic_norm: 4.734573, value_mean: 78.838287, advantage: -6.199958
101
actor: -3.689559, actor_grad: 5.370486, policy mean: 0.729275, policy: -3.777586, entropy: -0.035421, actor_norm: 0.123447, critic: 35.653492, critic_grad: 419.243652, value: 31.180897, critic_norm: 4.472595, value_mean: 75.858940, advantage: -5.491341
101
actor: -8.019899, actor_grad: 17.037754, policy mean: 0.712325, policy: -8.103001, entropy: -0.033104, actor_norm: 0.116205, critic: 172.842056, critic_grad: 1941.318115, value: 166.470886, critic_norm: 6.371168, value_mean: 176.653030, advantage: -11.670284
101
actor: 7.923283, actor_grad: 9.481083, policy mean: 0.480529, policy: 7.848158, entropy: -0.047969, actor_norm: 0.123093, critic: 284.023804, critic_grad: 1704.757935, value: 278.649719, critic_norm: 5.374095, value_mean: 125.713852, advantage: 16.540934
101
actor: -12.733856, actor_grad: 50.948616, policy mean: 0.543636, policy: -12.814470, entropy: -0.045067, actor_norm: 0.125682, critic: 554.380127, critic_grad: 5145.033203, value: 547.765625, critic_norm: 6.614530, value_mean: 210.899063, advantage: -23.371973
101
actor: -5.912519, actor_grad: 5.134945, policy mean: 0.431267, policy: -6.005345, entropy: -0.041865, actor_norm: 0.134691, critic: 211.000214, critic_grad: 2564.335205, value: 203.587997, critic_norm: 7.412211, value_mean: 215.119858, advantage: -14.100439
101
actor: 4.583361, actor_grad: 11.426308, policy mean: 0.433830, policy: 4.481736, entropy: -0.041091, actor_norm: 0.142717, critic: 103.750954, critic_grad: 1726.906738, value: 97.626404, critic_norm: 6.124548, value_mean: 172.176498, advantage: 9.645332
101
actor: 9.514074, actor_grad: 18.556585, policy mean: 0.431689, policy: 9.410469, entropy: -0.042235, actor_norm: 0.145840, critic: 499.216309, critic_grad: 3597.289307, value: 493.214417, critic_norm: 6.001894, value_mean: 154.016220, advantage: 21.778610
101
actor: 5.231122, actor_grad: 11.396502, policy mean: 0.308545, policy: 5.113538, entropy: -0.041377, actor_norm: 0.158962, critic: 297.503510, critic_grad: 1714.779907, value: 292.728210, critic_norm: 4.775298, value_mean: 88.314301, advantage: 16.921093
101
actor: 2.989613, actor_grad: 9.909412, policy mean: 0.465014, policy: 2.867159, entropy: -0.035196, actor_norm: 0.157650, critic: 46.406849, critic_grad: 534.604553, value: 41.800892, critic_norm: 4.605957, value_mean: 61.366180, advantage: 6.300453
101
actor: -3.370778, actor_grad: 5.653691, policy mean: 0.515427, policy: -3.488439, entropy: -0.036838, actor_norm: 0.154499, critic: 53.324463, critic_grad: 536.131653, value: 48.586384, critic_norm: 4.738081, value_mean: 60.228004, advantage: -6.687629
101
actor: 2.580850, actor_grad: 1.563385, policy mean: 0.357182, policy: 2.478667, entropy: -0.044804, actor_norm: 0.146986, critic: 50.313644, critic_grad: 583.423889, value: 45.783966, critic_norm: 4.529676, value_mean: 56.639496, advantage: 6.726595
101
actor: 0.045799, actor_grad: 0.215003, policy mean: 0.446976, policy: -0.044090, entropy: -0.050924, actor_norm: 0.140814, critic: 8.669395, critic_grad: 33.305576, value: 3.213116, critic_norm: 5.456280, value_mean: 106.805809, advantage: -0.086501
101
actor: -1.676181, actor_grad: 0.942290, policy mean: 0.594808, policy: -1.786373, entropy: -0.040983, actor_norm: 0.151175, critic: 17.107843, critic_grad: 246.597916, value: 10.973702, critic_norm: 6.134141, value_mean: 145.641663, advantage: -2.163044
101
actor: -2.296910, actor_grad: 2.367634, policy mean: 0.561767, policy: -2.408883, entropy: -0.042574, actor_norm: 0.154547, critic: 22.286001, critic_grad: 360.402130, value: 17.348808, critic_norm: 4.937193, value_mean: 104.756683, advantage: -4.114827
101
actor: 10.019436, actor_grad: 30.725317, policy mean: 0.580919, policy: 9.894258, entropy: -0.036567, actor_norm: 0.161744, critic: 291.560028, critic_grad: 1453.841431, value: 287.221924, critic_norm: 4.338106, value_mean: 82.241997, advantage: 16.934687
101
actor: 8.283629, actor_grad: 3.247820, policy mean: 0.527130, policy: 8.165839, entropy: -0.038952, actor_norm: 0.156742, critic: 247.676498, critic_grad: 1169.589355, value: 243.091980, critic_norm: 4.584517, value_mean: 93.648918, advantage: 15.524979
101
actor: -3.826604, actor_grad: 1.551346, policy mean: 0.559493, policy: -3.932123, entropy: -0.038856, actor_norm: 0.144375, critic: 55.573868, critic_grad: 585.016663, value: 50.407391, critic_norm: 5.166479, value_mean: 110.526291, advantage: -7.086989
101
actor: -10.291168, actor_grad: 2.540550, policy mean: 0.446972, policy: -10.375601, entropy: -0.049276, actor_norm: 0.133709, critic: 541.154846, critic_grad: 2621.690674, value: 535.963501, critic_norm: 5.191350, value_mean: 113.023438, advantage: -23.143475
101
actor: -1.015830, actor_grad: 0.533290, policy mean: 0.509901, policy: -1.091011, entropy: -0.048294, actor_norm: 0.123475, critic: 12.858617, critic_grad: 228.328064, value: 8.535900, critic_norm: 4.322717, value_mean: 83.506981, advantage: -2.582634
101
actor: 13.776580, actor_grad: 5.542435, policy mean: 0.676770, policy: 13.706112, entropy: -0.046459, actor_norm: 0.116928, critic: 415.223785, critic_grad: 1940.608643, value: 410.869476, critic_norm: 4.354302, value_mean: 78.737976, advantage: 20.155979
101
actor: -2.027783, actor_grad: 0.424681, policy mean: 0.510875, policy: -2.090515, entropy: -0.048600, actor_norm: 0.111332, critic: 30.133827, critic_grad: 409.234528, value: 25.159412, critic_norm: 4.974414, value_mean: 99.836761, advantage: -4.420991
101
actor: -7.032208, actor_grad: 5.685949, policy mean: 0.475857, policy: -7.080623, entropy: -0.056186, actor_norm: 0.104601, critic: 222.932236, critic_grad: 1324.403809, value: 217.905457, critic_norm: 5.026778, value_mean: 115.274849, advantage: -14.747472
101
actor: -0.460527, actor_grad: 0.463177, policy mean: 0.540382, policy: -0.516504, entropy: -0.049462, actor_norm: 0.105439, critic: 7.193591, critic_grad: 99.869194, value: 1.768678, critic_norm: 5.424913, value_mean: 131.452393, advantage: -0.892020
101
actor: -3.712753, actor_grad: 4.202169, policy mean: 0.626410, policy: -3.778384, entropy: -0.042629, actor_norm: 0.108259, critic: 52.450359, critic_grad: 631.247253, value: 47.084702, critic_norm: 5.365657, value_mean: 125.475601, advantage: -6.488422
101
actor: -6.122674, actor_grad: 8.249221, policy mean: 0.791216, policy: -6.194365, entropy: -0.041967, actor_norm: 0.113657, critic: 59.750408, critic_grad: 897.017395, value: 54.446152, critic_norm: 5.304257, value_mean: 139.088470, advantage: -7.154866
101
actor: -81.157852, actor_grad: 587.089172, policy mean: 0.323291, policy: -81.247467, entropy: -0.042465, actor_norm: 0.132080, critic: 62177.710938, critic_grad: 72410.859375, value: 62171.101562, critic_norm: 6.610670, value_mean: 252.280472, advantage: -249.320160
101
actor: -0.577593, actor_grad: 0.461546, policy mean: 0.528963, policy: -0.675787, entropy: -0.041141, actor_norm: 0.139334, critic: 7.401518, critic_grad: 151.279083, value: 1.913069, critic_norm: 5.488450, value_mean: 138.012833, advantage: -1.227612
101
actor: 8.531873, actor_grad: 4.115895, policy mean: 0.394945, policy: 8.432462, entropy: -0.044262, actor_norm: 0.143673, critic: 462.083557, critic_grad: 2947.097168, value: 456.804993, critic_norm: 5.278579, value_mean: 134.184448, advantage: 21.240997
101
actor: 2.866094, actor_grad: 4.305107, policy mean: 0.521769, policy: 2.750920, entropy: -0.038029, actor_norm: 0.153203, critic: 48.968826, critic_grad: 735.685547, value: 43.635105, critic_norm: 5.333719, value_mean: 126.797478, advantage: 5.515241
101
actor: -3.329303, actor_grad: 0.935474, policy mean: 0.515844, policy: -3.452278, entropy: -0.036205, actor_norm: 0.159180, critic: 49.883705, critic_grad: 586.476196, value: 45.111557, critic_norm: 4.772147, value_mean: 102.710876, advantage: -6.709159
37
actor: -51.795341, actor_grad: 197.460327, policy mean: 0.588639, policy: -51.927956, entropy: -0.026425, actor_norm: 0.159038, critic: 8020.885254, critic_grad: 7115.161621, value: 8016.475586, critic_norm: 4.409451, value_mean: 92.485130, advantage: -89.524834
101
actor: 0.590427, actor_grad: 1.383258, policy mean: 0.341542, policy: 0.477637, entropy: -0.042068, actor_norm: 0.154857, critic: 6.553611, critic_grad: 121.680481, value: 2.299927, critic_norm: 4.253684, value_mean: 72.094101, advantage: 1.474674
101
actor: 7.710216, actor_grad: 16.018629, policy mean: 0.437654, policy: 7.600901, entropy: -0.040132, actor_norm: 0.149447, critic: 294.158844, critic_grad: 1281.909180, value: 289.962982, critic_norm: 4.195847, value_mean: 65.817566, advantage: 17.004677
101
actor: -8.367518, actor_grad: 7.744397, policy mean: 0.645550, policy: -8.461495, entropy: -0.039957, actor_norm: 0.133934, critic: 177.085815, critic_grad: 1333.969727, value: 172.081085, critic_norm: 5.004737, value_mean: 89.706947, advantage: -13.101027
101
actor: -5.775332, actor_grad: 4.826176, policy mean: 0.582838, policy: -5.852815, entropy: -0.047995, actor_norm: 0.125477, critic: 107.523155, critic_grad: 790.678955, value: 102.804527, critic_norm: 4.718631, value_mean: 98.334488, advantage: -10.044317
31
actor: -70.919342, actor_grad: 65.844894, policy mean: 0.654859, policy: -70.995880, entropy: -0.046138, actor_norm: 0.122672, critic: 11178.457031, critic_grad: 9438.467773, value: 11173.091797, critic_norm: 5.365104, value_mean: 119.774689, advantage: -105.264908
101
actor: 9.819880, actor_grad: 13.930923, policy mean: 0.599369, policy: 9.739149, entropy: -0.046309, actor_norm: 0.127041, critic: 264.611664, critic_grad: 1414.700195, value: 259.949127, critic_norm: 4.662538, value_mean: 100.609680, advantage: 15.980834
101
actor: 1.227713, actor_grad: 2.471010, policy mean: 0.534783, policy: 1.157628, entropy: -0.051131, actor_norm: 0.121217, critic: 12.735651, critic_grad: 316.012207, value: 7.844720, critic_norm: 4.890931, value_mean: 111.956261, advantage: 2.225529
101
actor: -5.676569, actor_grad: 13.967314, policy mean: 0.438596, policy: -5.755947, entropy: -0.047310, actor_norm: 0.126688, critic: 174.006775, critic_grad: 1655.665405, value: 169.581619, critic_norm: 4.425160, value_mean: 93.148697, advantage: -13.005921
101
actor: 6.927928, actor_grad: 10.013655, policy mean: 0.469753, policy: 6.861702, entropy: -0.053434, actor_norm: 0.119660, critic: 216.734726, critic_grad: 1085.659058, value: 212.476257, critic_norm: 4.258462, value_mean: 77.266739, advantage: 14.575436
101
actor: 8.635575, actor_grad: 3.298054, policy mean: 0.491026, policy: 8.571613, entropy: -0.052152, actor_norm: 0.116114, critic: 305.493164, critic_grad: 1587.557007, value: 301.040894, critic_norm: 4.452262, value_mean: 83.383530, advantage: 17.256056
101
actor: -4.890660, actor_grad: 2.355746, policy mean: 0.516406, policy: -4.938939, entropy: -0.055430, actor_norm: 0.103708, critic: 94.861557, critic_grad: 1099.939331, value: 89.942307, critic_norm: 4.919252, value_mean: 100.036942, advantage: -9.432587
101
actor: -6.189884, actor_grad: 0.707342, policy mean: 0.542894, policy: -6.233028, entropy: -0.052243, actor_norm: 0.095387, critic: 132.422501, critic_grad: 992.044250, value: 127.739311, critic_norm: 4.683195, value_mean: 99.066086, advantage: -11.249948
101
actor: -3.077172, actor_grad: 0.511625, policy mean: 0.561318, policy: -3.110646, entropy: -0.057295, actor_norm: 0.090769, critic: 36.533909, critic_grad: 410.473633, value: 32.087952, critic_norm: 4.445957, value_mean: 95.989830, advantage: -5.531130
101
actor: 9.767737, actor_grad: 2.546820, policy mean: 0.506261, policy: 9.730876, entropy: -0.058518, actor_norm: 0.095380, critic: 374.518799, critic_grad: 1736.396851, value: 370.060913, critic_norm: 4.457897, value_mean: 97.436005, advantage: 19.230099
101
actor: -2.777548, actor_grad: 3.068372, policy mean: 0.558038, policy: -2.815379, entropy: -0.052847, actor_norm: 0.090677, critic: 34.452362, critic_grad: 558.074707, value: 29.271755, critic_norm: 5.180605, value_mean: 122.229675, advantage: -5.041093
101
actor: -8.845458, actor_grad: 6.639596, policy mean: 0.593406, policy: -8.889282, entropy: -0.051522, actor_norm: 0.095347, critic: 230.586334, critic_grad: 1876.519653, value: 225.598328, critic_norm: 4.988010, value_mean: 124.011414, advantage: -14.983449
101
actor: -6.539678, actor_grad: 4.521113, policy mean: 0.552234, policy: -6.585546, entropy: -0.052429, actor_norm: 0.098297, critic: 145.419388, critic_grad: 1134.183228, value: 140.524170, critic_norm: 4.895222, value_mean: 114.732063, advantage: -11.720890
101
actor: -5.996639, actor_grad: 4.091668, policy mean: 0.558540, policy: -6.039762, entropy: -0.054234, actor_norm: 0.097357, critic: 123.087776, critic_grad: 1094.574463, value: 118.281906, critic_norm: 4.805869, value_mean: 114.375687, advantage: -10.839586
101
actor: 6.876991, actor_grad: 9.174967, policy mean: 0.615380, policy: 6.823265, entropy: -0.044064, actor_norm: 0.097790, critic: 121.979088, critic_grad: 1145.079834, value: 117.380203, critic_norm: 4.598882, value_mean: 107.930473, advantage: 10.795021
101
actor: 10.172457, actor_grad: 11.339075, policy mean: 0.595463, policy: 10.112460, entropy: -0.046635, actor_norm: 0.106632, critic: 303.313232, critic_grad: 1542.813721, value: 298.650848, critic_norm: 4.662387, value_mean: 101.999161, advantage: 17.249842
101
actor: 9.147932, actor_grad: 11.700997, policy mean: 0.500864, policy: 9.083014, entropy: -0.049507, actor_norm: 0.114425, critic: 336.567810, critic_grad: 1911.373779, value: 331.901550, critic_norm: 4.666268, value_mean: 112.399567, advantage: 18.158514
101
actor: -1.263937, actor_grad: 0.696644, policy mean: 0.505808, policy: -1.329768, entropy: -0.049343, actor_norm: 0.115174, critic: 11.919788, critic_grad: 204.780487, value: 6.888371, critic_norm: 5.031418, value_mean: 115.318039, advantage: -2.358213
101
actor: -12.135139, actor_grad: 17.450832, policy mean: 0.548209, policy: -12.203558, entropy: -0.048227, actor_norm: 0.116646, critic: 505.016846, critic_grad: 3164.525146, value: 499.516113, critic_norm: 5.500748, value_mean: 146.244949, advantage: -22.296587
101
actor: 5.459236, actor_grad: 6.485394, policy mean: 0.477462, policy: 5.378160, entropy: -0.048352, actor_norm: 0.129428, critic: 133.032410, critic_grad: 885.906006, value: 128.453934, critic_norm: 4.578470, value_mean: 97.920296, advantage: 11.281973
101
actor: 0.524606, actor_grad: 0.593212, policy mean: 0.594700, policy: 0.440180, entropy: -0.041005, actor_norm: 0.125431, critic: 6.666283, critic_grad: 91.241241, value: 1.062191, critic_norm: 5.604091, value_mean: 142.507477, advantage: 0.646214
101
actor: 0.137120, actor_grad: 0.434989, policy mean: 0.402911, policy: 0.058052, entropy: -0.051137, actor_norm: 0.130205, critic: 5.063258, critic_grad: 13.613765, value: 0.552646, critic_norm: 4.510612, value_mean: 103.427063, advantage: 0.181562
101
actor: 16.061659, actor_grad: 34.974697, policy mean: 0.689855, policy: 15.969557, entropy: -0.038966, actor_norm: 0.131067, critic: 555.743958, critic_grad: 2935.957031, value: 551.006714, critic_norm: 4.737228, value_mean: 112.208397, advantage: 23.399570
101
actor: -0.367776, actor_grad: 0.815757, policy mean: 0.442572, policy: -0.450962, entropy: -0.046874, actor_norm: 0.130059, critic: 7.658850, critic_grad: 144.369019, value: 2.270704, critic_norm: 5.388145, value_mean: 137.972870, advantage: -1.149268
101
actor: -8.349724, actor_grad: 4.022652, policy mean: 0.690826, policy: -8.444483, entropy: -0.039288, actor_norm: 0.134047, critic: 156.515198, critic_grad: 1567.871094, value: 151.837677, critic_norm: 4.677518, value_mean: 112.058853, advantage: -12.200356
101
actor: 8.301553, actor_grad: 14.822309, policy mean: 0.453033, policy: 8.212294, entropy: -0.046862, actor_norm: 0.136121, critic: 338.554199, critic_grad: 1509.809448, value: 334.258759, critic_norm: 4.295429, value_mean: 90.756111, advantage: 18.256081
101
actor: 5.883161, actor_grad: 2.671459, policy mean: 0.428385, policy: 5.792214, entropy: -0.047211, actor_norm: 0.138158, critic: 193.767227, critic_grad: 1723.270996, value: 188.730438, critic_norm: 5.036792, value_mean: 120.632507, advantage: 13.644550
101
actor: -10.801872, actor_grad: 1.694394, policy mean: 0.466163, policy: -10.895035, entropy: -0.045743, actor_norm: 0.138906, critic: 570.296387, critic_grad: 2877.044922, value: 565.209595, critic_norm: 5.086809, value_mean: 126.354706, advantage: -23.705881
101
actor: 2.164729, actor_grad: 0.696333, policy mean: 0.496898, policy: 2.067471, entropy: -0.042190, actor_norm: 0.139449, critic: 22.613998, critic_grad: 353.800537, value: 18.215668, critic_norm: 4.398330, value_mean: 97.300667, advantage: 4.162398
101
actor: 8.237079, actor_grad: 3.011166, policy mean: 0.493714, policy: 8.135853, entropy: -0.044106, actor_norm: 0.145331, critic: 277.069916, critic_grad: 1545.852783, value: 272.246399, critic_norm: 4.823530, value_mean: 112.653778, advantage: 16.447838
101
actor: -12.270077, actor_grad: 28.287283, policy mean: 0.483992, policy: -12.369279, entropy: -0.042700, actor_norm: 0.141902, critic: 660.545654, critic_grad: 4022.319092, value: 654.649658, critic_norm: 5.895989, value_mean: 166.195801, advantage: -25.492752
101
actor: 4.840564, actor_grad: 10.212172, policy mean: 0.395098, policy: 4.737783, entropy: -0.043401, actor_norm: 0.146182, critic: 151.433548, critic_grad: 1793.795288, value: 146.440308, critic_norm: 4.993246, value_mean: 139.478088, advantage: 12.096651
101
actor: -10.517077, actor_grad: 19.605204, policy mean: 0.518562, policy: -10.623497, entropy: -0.042183, actor_norm: 0.148602, critic: 453.044189, critic_grad: 3510.144775, value: 446.730804, critic_norm: 6.313372, value_mean: 199.565720, advantage: -20.829651
101
actor: 15.198006, actor_grad: 44.807671, policy mean: 0.516732, policy: 15.082186, entropy: -0.038788, actor_norm: 0.154608, critic: 953.610046, critic_grad: 5572.941895, value: 947.348877, critic_norm: 6.261195, value_mean: 200.081573, advantage: 30.510841
101
actor: -19.435110, actor_grad: 74.574577, policy mean: 0.762938, policy: -19.559946, entropy: -0.035271, actor_norm: 0.160107, critic: 644.120178, critic_grad: 4541.374512, value: 636.867432, critic_norm: 7.252744, value_mean: 232.980957, advantage: -25.204571
101
actor: 17.759008, actor_grad: 110.928032, policy mean: 0.521155, policy: 17.623367, entropy: -0.035270, actor_norm: 0.170912, critic: 1225.433350, critic_grad: 8410.100586, value: 1217.819824, critic_norm: 7.613527, value_mean: 281.162292, advantage: 34.378647
101
actor: -9.223776, actor_grad: 2.439288, policy mean: 0.379003, policy: -9.363725, entropy: -0.036407, actor_norm: 0.176356, critic: 627.010498, critic_grad: 5243.084961, value: 619.515869, critic_norm: 7.494641, value_mean: 266.181335, advantage: -24.040598
101
actor: -3.703709, actor_grad: 33.538269, policy mean: 0.564092, policy: -3.856819, entropy: -0.032403, actor_norm: 0.185514, critic: 76.892052, critic_grad: 2611.289795, value: 67.480743, critic_norm: 9.411310, value_mean: 404.837402, advantage: -7.737022
101
actor: -8.587219, actor_grad: 60.882656, policy mean: 0.272485, policy: -8.743905, entropy: -0.033388, actor_norm: 0.190074, critic: 1029.118530, critic_grad: 10068.995117, value: 1019.798767, critic_norm: 9.319777, value_mean: 381.501648, advantage: -31.783268
96
actor: -144.385544, actor_grad: 1084.096680, policy mean: 0.382796, policy: -144.563995, entropy: -0.028471, actor_norm: 0.206922, critic: 142852.656250, critic_grad: 137588.890625, value: 142843.281250, critic_norm: 9.378135, value_mean: 391.661621, advantage: -376.745789
66
actor: -466.618500, actor_grad: 1874.106323, policy mean: 1.968929, policy: -466.845917, entropy: -0.004959, actor_norm: 0.232359, critic: 55735.894531, critic_grad: 42181.960938, value: 55728.347656, critic_norm: 7.545405, value_mean: 237.561234, advantage: -236.066223
101
actor: 7.305066, actor_grad: 106.061745, policy mean: 0.203864, policy: 7.090175, entropy: -0.025222, actor_norm: 0.240113, critic: 1282.394287, critic_grad: 6491.067871, value: 1276.374634, critic_norm: 6.019645, value_mean: 149.552185, advantage: 35.171673
100
actor: -77.964760, actor_grad: 638.959778, policy mean: 1.085029, policy: -78.188385, entropy: -0.015538, actor_norm: 0.239169, critic: 5080.121094, critic_grad: 13116.840820, value: 5074.461914, critic_norm: 5.659389, value_mean: 73.706772, advantage: -71.231644
101
actor: 0.585819, actor_grad: 1.324575, policy mean: 0.456303, policy: 0.376208, entropy: -0.025665, actor_norm: 0.235277, critic: 6.444387, critic_grad: 65.202438, value: 0.846272, critic_norm: 5.598115, value_mean: 53.198418, advantage: 0.773318
101
actor: 5.974490, actor_grad: 1.976406, policy mean: 0.405454, policy: 5.777674, entropy: -0.027872, actor_norm: 0.224687, critic: 202.078903, critic_grad: 1069.478271, value: 196.747986, critic_norm: 5.330912, value_mean: 68.722992, advantage: 13.990839
101
actor: -5.312825, actor_grad: 3.535703, policy mean: 0.389434, policy: -5.493600, entropy: -0.026211, actor_norm: 0.206986, critic: 189.295349, critic_grad: 1409.913208, value: 183.931259, critic_norm: 5.364087, value_mean: 83.642052, advantage: -13.531966
101
actor: -7.820983, actor_grad: 2.266030, policy mean: 0.419509, policy: -7.976324, entropy: -0.033484, actor_norm: 0.188825, critic: 364.860046, critic_grad: 1527.563721, value: 359.562256, critic_norm: 5.297792, value_mean: 100.364258, advantage: -18.879421
101
actor: 5.759052, actor_grad: 7.373669, policy mean: 0.713255, policy: 5.608932, entropy: -0.029527, actor_norm: 0.179646, critic: 65.980194, critic_grad: 543.693359, value: 61.265198, critic_norm: 4.714993, value_mean: 72.544342, advantage: 7.823181
101
actor: 14.459070, actor_grad: 45.065845, policy mean: 0.697246, policy: 14.328357, entropy: -0.032378, actor_norm: 0.163092, critic: 431.367889, critic_grad: 2128.675781, value: 426.597260, critic_norm: 4.770617, value_mean: 76.252815, advantage: 20.578627
101
actor: -2.372962, actor_grad: 3.732619, policy mean: 0.403019, policy: -2.479808, entropy: -0.042583, actor_norm: 0.149429, critic: 45.346653, critic_grad: 534.604919, value: 40.102509, critic_norm: 5.244143, value_mean: 112.123734, advantage: -6.287601
101
actor: -7.618215, actor_grad: 5.121832, policy mean: 0.488360, policy: -7.714121, entropy: -0.043990, actor_norm: 0.139895, critic: 263.082581, critic_grad: 1425.864136, value: 258.005554, critic_norm: 5.077019, value_mean: 112.344376, advantage: -16.004410
101
actor: -0.794612, actor_grad: 0.489365, policy mean: 0.608770, policy: -0.881408, entropy: -0.044072, actor_norm: 0.130869, critic: 8.079363, critic_grad: 102.367706, value: 3.495675, critic_norm: 4.583688, value_mean: 96.157265, advantage: -1.313930
101
actor: 16.732101, actor_grad: 34.450954, policy mean: 0.903726, policy: 16.639008, entropy: -0.033698, actor_norm: 0.126791, critic: 333.980133, critic_grad: 2013.362183, value: 329.489532, critic_norm: 4.490612, value_mean: 95.161240, advantage: 18.133743
101
actor: 21.371666, actor_grad: 56.739708, policy mean: 0.857823, policy: 21.283991, entropy: -0.037357, actor_norm: 0.125032, critic: 591.331482, critic_grad: 2872.803955, value: 586.797363, critic_norm: 4.534138, value_mean: 95.659698, advantage: 24.092272
101
actor: 5.629517, actor_grad: 3.580392, policy mean: 0.493679, policy: 5.551317, entropy: -0.045417, actor_norm: 0.123616, critic: 132.737061, critic_grad: 947.100037, value: 128.352509, critic_norm: 4.384553, value_mean: 92.081406, advantage: 11.243526
101
actor: 1.211921, actor_grad: 0.558916, policy mean: 0.499865, policy: 1.143921, entropy: -0.046535, actor_norm: 0.114535, critic: 11.574781, critic_grad: 213.150238, value: 6.978246, critic_norm: 4.596536, value_mean: 97.381599, advantage: 2.400223
101
actor: -8.969183, actor_grad: 3.566563, policy mean: 0.628343, policy: -9.034717, entropy: -0.045306, actor_norm: 0.110840, critic: 212.052902, critic_grad: 1217.645386, value: 206.983612, critic_norm: 5.069291, value_mean: 114.673111, advantage: -14.355710
101
actor: -10.008596, actor_grad: 10.897026, policy mean: 0.606799, policy: -10.070913, entropy: -0.047721, actor_norm: 0.110038, critic: 284.930023, critic_grad: 1386.687866, value: 280.109924, critic_norm: 4.820096, value_mean: 110.236130, advantage: -16.715044
101
actor: -4.595946, actor_grad: 5.505569, policy mean: 0.423946, policy: -4.650458, entropy: -0.053119, actor_norm: 0.107631, critic: 132.644318, critic_grad: 1443.763794, value: 127.331276, critic_norm: 5.313046, value_mean: 151.240570, advantage: -10.844164
101
actor: 2.806128, actor_grad: 4.805934, policy mean: 0.385219, policy: 2.736525, entropy: -0.050413, actor_norm: 0.120015, critic: 54.744987, critic_grad: 1252.100220, value: 49.013905, critic_norm: 5.731081, value_mean: 191.876526, advantage: 6.933763
101
actor: -4.922540, actor_grad: 10.271813, policy mean: 0.743567, policy: -5.018716, entropy: -0.036473, actor_norm: 0.132649, critic: 48.034866, critic_grad: 926.974670, value: 41.912239, critic_norm: 6.122628, value_mean: 181.846863, advantage: -6.389400
101
actor: -12.905622, actor_grad: 49.898479, policy mean: 0.435172, policy: -13.000515, entropy: -0.043021, actor_norm: 0.137914, critic: 986.861389, critic_grad: 5896.368652, value: 980.180908, critic_norm: 6.680470, value_mean: 227.855225, advantage: -30.512419
101
actor: -16.669851, actor_grad: 59.415829, policy mean: 0.350168, policy: -16.771650, entropy: -0.041790, actor_norm: 0.143589, critic: 2216.888672, critic_grad: 14523.059570, value: 2207.628662, critic_norm: 9.260098, value_mean: 388.392487, advantage: -46.518661
101
actor: -22.325413, actor_grad: 69.078751, policy mean: 0.601703, policy: -22.436640, entropy: -0.034087, actor_norm: 0.145314, critic: 1456.045288, critic_grad: 11998.149414, value: 1444.149536, critic_norm: 11.895741, value_mean: 474.975891, advantage: -36.754452
91
actor: -609.820679, actor_grad: 2704.952881, policy mean: 0.883195, policy: -609.953552, entropy: -0.027617, actor_norm: 0.160435, critic: 435603.812500, critic_grad: 286187.468750, value: 435589.468750, critic_norm: 14.337670, value_mean: 669.887878, advantage: -657.028870
101
actor: -45.397022, actor_grad: 224.670868, policy mean: 0.749219, policy: -45.541756, entropy: -0.029569, actor_norm: 0.174302, critic: 3591.659180, critic_grad: 31176.285156, value: 3573.587646, critic_norm: 18.071594, value_mean: 853.115845, advantage: -59.705059
36
actor: -258.730927, actor_grad: 1059.460083, policy mean: 0.448318, policy: -258.893738, entropy: -0.014660, actor_norm: 0.177461, critic: 330466.562500, critic_grad: 168290.687500, value: 330448.125000, critic_norm: 18.437365, value_mean: 577.228882, advantage: -574.753784
101
actor: 53.899292, actor_grad: 124.989098, policy mean: 0.764891, policy: 53.739586, entropy: -0.030823, actor_norm: 0.190530, critic: 4671.630859, critic_grad: 21677.404297, value: 4653.783203, critic_norm: 17.847820, value_mean: 609.248596, advantage: 67.638412
101
actor: -1.731980, actor_grad: 29.475061, policy mean: 0.501146, policy: -1.897204, entropy: -0.031330, actor_norm: 0.196555, critic: 319.291962, critic_grad: 903.822388, value: 300.227020, critic_norm: 19.064938, value_mean: 779.292358, advantage: -0.281292
99
actor: -370.778503, actor_grad: 8420.033203, policy mean: 0.416857, policy: -370.962036, entropy: -0.024986, actor_norm: 0.208526, critic: 785145.937500, critic_grad: 615466.437500, value: 785127.562500, critic_norm: 18.382854, value_mean: 888.054688, advantage: -886.068054
101
actor: 8.746913, actor_grad: 104.908478, policy mean: 0.936608, policy: 8.538793, entropy: -0.022900, actor_norm: 0.231021, critic: 654.165833, critic_grad: 5993.126953, value: 639.367676, critic_norm: 14.798141, value_mean: 483.691528, advantage: 13.538541
45
actor: -434.473602, actor_grad: 1022.694763, policy mean: 1.427965, policy: -434.720093, entropy: -0.010497, actor_norm: 0.256999, critic: 94555.007812, critic_grad: 57201.976562, value: 94542.296875, critic_norm: 12.708030, value_mean: 313.948792, advantage: -307.221252
101
actor: 10.468769, actor_grad: 160.589096, policy mean: 0.306960, policy: 10.204062, entropy: -0.019926, actor_norm: 0.284633, critic: 1103.301025, critic_grad: 6316.710449, value: 1093.849121, critic_norm: 9.451934, value_mean: 185.941925, advantage: 31.154072
101
actor: 10.578799, actor_grad: 126.037170, policy mean: 0.580660, policy: 10.283630, entropy: -0.014802, actor_norm: 0.309971, critic: 505.095367, critic_grad: 2708.447266, value: 498.440430, critic_norm: 6.654952, value_mean: 86.618362, advantage: 21.483662
101
actor: 11.252104, actor_grad: 58.550072, policy mean: 1.083989, policy: 10.947148, entropy: -0.011259, actor_norm: 0.316215, critic: 116.171402, critic_grad: 768.287476, value: 111.144821, critic_norm: 5.026583, value_mean: 52.658428, advantage: 10.493747
101
actor: 4.378996, actor_grad: 24.278021, policy mean: 0.363989, policy: 4.101869, entropy: -0.016123, actor_norm: 0.293250, critic: 115.578575, critic_grad: 826.180298, value: 110.835762, critic_norm: 4.742810, value_mean: 63.804512, advantage: 10.453655
101
actor: -1.288683, actor_grad: 11.055367, policy mean: 0.275940, policy: -1.552590, entropy: -0.008476, actor_norm: 0.272382, critic: 31.946625, critic_grad: 440.730469, value: 27.506744, critic_norm: 4.439881, value_mean: 56.074158, advantage: -5.206762
101
actor: 3.232996, actor_grad: 2.697116, policy mean: 0.859581, policy: 2.987923, entropy: -0.016424, actor_norm: 0.261497, critic: 17.179508, critic_grad: 232.479797, value: 12.959906, critic_norm: 4.219603, value_mean: 63.722996, advantage: 3.249985
101
actor: 1.501217, actor_grad: 9.498550, policy mean: 0.222245, policy: 1.284738, entropy: -0.022355, actor_norm: 0.238834, critic: 39.498093, critic_grad: 448.649353, value: 34.790398, critic_norm: 4.707693, value_mean: 87.192177, advantage: 5.867496
101
actor: -2.699546, actor_grad: 11.686588, policy mean: 0.208768, policy: -2.907969, entropy: -0.022270, actor_norm: 0.230692, critic: 203.965027, critic_grad: 1080.263550, value: 199.297913, critic_norm: 4.667107, value_mean: 96.296860, advantage: -14.063627
101
actor: 4.343924, actor_grad: 10.041253, policy mean: 0.263648, policy: 4.150487, entropy: -0.026696, actor_norm: 0.220132, critic: 250.752258, critic_grad: 1071.885498, value: 246.644348, critic_norm: 4.107906, value_mean: 84.621475, advantage: 15.704125
101
actor: 4.177166, actor_grad: 2.455734, policy mean: 0.341955, policy: 4.014525, entropy: -0.032857, actor_norm: 0.195498, critic: 124.011909, critic_grad: 948.910828, value: 119.316704, critic_norm: 4.695204, value_mean: 110.366753, advantage: 10.758338
101
actor: -6.503981, actor_grad: 13.730621, policy mean: 0.401661, policy: -6.650924, entropy: -0.034641, actor_norm: 0.181584, critic: 282.135712, critic_grad: 1423.670898, value: 277.110413, critic_norm: 5.025314, value_mean: 115.869675, advantage: -16.634779
101
actor: -4.482540, actor_grad: 3.016827, policy mean: 0.674041, policy: -4.622304, entropy: -0.032358, actor_norm: 0.172122, critic: 48.804611, critic_grad: 532.491272, value: 44.314270, critic_norm: 4.490341, value_mean: 104.584106, advantage: -6.572707
101
actor: 1.193186, actor_grad: 3.204167, policy mean: 0.847072, policy: 1.051444, entropy: -0.028196, actor_norm: 0.169938, critic: 7.577326, critic_grad: 151.656158, value: 3.034613, critic_norm: 4.542713, value_mean: 109.299339, advantage: 1.538348
31
actor: -85.450851, actor_grad: 240.023621, policy mean: 0.928457, policy: -85.594261, entropy: -0.023553, actor_norm: 0.166964, critic: 8135.207031, critic_grad: 7312.446289, value: 8130.738770, critic_norm: 4.468181, value_mean: 104.273521, advantage: -89.763741
101
actor: 2.689800, actor_grad: 0.916040, policy mean: 0.311825, policy: 2.563945, entropy: -0.042622, actor_norm: 0.168477, critic: 75.416389, critic_grad: 792.957031, value: 70.903946, critic_norm: 4.512441, value_mean: 104.005844, advantage: 8.221169
101
actor: -22.507961, actor_grad: 27.852194, policy mean: 0.916781, policy: -22.636539, entropy: -0.034362, actor_norm: 0.162942, critic: 728.957275, critic_grad: 3050.754150, value: 724.242981, critic_norm: 4.714303, value_mean: 114.072510, advantage: -26.756878
101
actor: 11.194613, actor_grad: 32.004780, policy mean: 0.550371, policy: 11.066087, entropy: -0.034152, actor_norm: 0.162679, critic: 399.792603, critic_grad: 1558.606445, value: 395.658905, critic_norm: 4.133700, value_mean: 84.839607, advantage: 19.803835
101
actor: 6.633132, actor_grad: 1.063394, policy mean: 0.375351, policy: 6.514409, entropy: -0.040465, actor_norm: 0.159187, critic: 293.901184, critic_grad: 1537.490845, value: 289.791718, critic_norm: 4.109458, value_mean: 82.954742, advantage: 16.929653
101
actor: 4.871565, actor_grad: 3.893197, policy mean: 0.460643, policy: 4.765252, entropy: -0.042931, actor_norm: 0.149244, critic: 111.010597, critic_grad: 881.257202, value: 107.040443, critic_norm: 3.970156, value_mean: 73.829506, advantage: 10.344383
101
actor: 8.108575, actor_grad: 4.019870, policy mean: 0.679918, policy: 8.013544, entropy: -0.039370, actor_norm: 0.134400, critic: 143.397278, critic_grad: 910.784729, value: 138.968704, critic_norm: 4.428577, value_mean: 95.093613, advantage: 11.778959
101
actor: 8.428099, actor_grad: 31.648314, policy mean: 0.613405, policy: 8.327458, entropy: -0.035337, actor_norm: 0.135976, critic: 204.870499, critic_grad: 1698.697144, value: 199.768723, critic_norm: 5.101773, value_mean: 132.050354, advantage: 13.855065
69
actor: -173.138000, actor_grad: 415.930664, policy mean: 1.472252, policy: -173.265320, entropy: -0.008734, actor_norm: 0.136045, critic: 14232.864258, critic_grad: 13938.801758, value: 14228.236328, critic_norm: 4.628318, value_mean: 122.224953, advantage: -119.264648
101
actor: -3.017190, actor_grad: 13.488256, policy mean: 0.408457, policy: -3.124440, entropy: -0.037876, actor_norm: 0.145126, critic: 64.023293, critic_grad: 1210.045410, value: 58.626766, critic_norm: 5.396527, value_mean: 157.556519, advantage: -7.556178
101
actor: 1.570625, actor_grad: 1.816224, policy mean: 0.570016, policy: 1.459666, entropy: -0.037982, actor_norm: 0.148942, critic: 12.619160, critic_grad: 273.283417, value: 7.844882, critic_norm: 4.774278, value_mean: 121.392441, advantage: 2.618454
101
actor: -0.325095, actor_grad: 0.322873, policy mean: 0.463627, policy: -0.439714, entropy: -0.035432, actor_norm: 0.150051, critic: 7.371102, critic_grad: 132.876648, value: 2.363120, critic_norm: 5.007982, value_mean: 138.934357, advantage: -0.986806
101
actor: 7.858161, actor_grad: 7.854919, policy mean: 0.759496, policy: 7.738146, entropy: -0.032185, actor_norm: 0.152201, critic: 106.542274, critic_grad: 1043.378296, value: 102.472931, critic_norm: 4.069342, value_mean: 93.610100, advantage: 10.108072
101
actor: 5.051944, actor_grad: 6.300079, policy mean: 0.411706, policy: 4.936799, entropy: -0.037269, actor_norm: 0.152414, critic: 183.673447, critic_grad: 1309.032715, value: 179.061859, critic_norm: 4.611586, value_mean: 107.495674, advantage: 12.954556
101
actor: -7.349272, actor_grad: 10.279873, policy mean: 0.403869, policy: -7.458339, entropy: -0.040265, actor_norm: 0.149333, critic: 345.020020, critic_grad: 1727.399902, value: 340.341614, critic_norm: 4.678416, value_mean: 107.012100, advantage: -18.434954
101
actor: -2.438082, actor_grad: 3.582566, policy mean: 0.452448, policy: -2.553998, entropy: -0.033302, actor_norm: 0.149217, critic: 34.532478, critic_grad: 495.068359, value: 30.561447, critic_norm: 3.971030, value_mean: 82.166451, advantage: -5.517322
101
actor: 4.336574, actor_grad: 7.096894, policy mean: 0.449338, policy: 4.229632, entropy: -0.036224, actor_norm: 0.143166, critic: 94.612389, critic_grad: 934.971680, value: 90.619133, critic_norm: 3.993253, value_mean: 66.288353, advantage: 9.316996
101
actor: -9.579740, actor_grad: 16.992653, policy mean: 0.556570, policy: -9.680825, entropy: -0.038159, actor_norm: 0.139245, critic: 310.384125, critic_grad: 1400.464478, value: 305.753326, critic_norm: 4.630811, value_mean: 103.414886, advantage: -17.476250
101
actor: -4.665190, actor_grad: 1.169385, policy mean: 0.529968, policy: -4.750090, entropy: -0.046994, actor_norm: 0.131894, critic: 87.663734, critic_grad: 1442.687378, value: 82.450417, critic_norm: 5.213320, value_mean: 157.291214, advantage: -9.034636
87
actor: -114.861816, actor_grad: 193.484238, policy mean: 1.028570, policy: -114.974915, entropy: -0.024418, actor_norm: 0.137523, critic: 12034.839844, critic_grad: 10966.636719, value: 12030.210938, critic_norm: 4.628627, value_mean: 120.544434, advantage: -109.380028
101
actor: 5.813958, actor_grad: 4.369342, policy mean: 0.466777, policy: 5.712722, entropy: -0.042699, actor_norm: 0.143935, critic: 152.975784, critic_grad: 1254.171631, value: 148.055084, critic_norm: 4.920706, value_mean: 128.025162, advantage: 12.124975
101
actor: -14.380281, actor_grad: 59.030434, policy mean: 0.765150, policy: -14.504826, entropy: -0.028864, actor_norm: 0.153408, critic: 390.791138, critic_grad: 4141.766113, value: 384.741455, critic_norm: 6.049691, value_mean: 206.165314, advantage: -19.549484
101
actor: 8.403541, actor_grad: 3.100331, policy mean: 0.436441, policy: 8.273901, entropy: -0.036273, actor_norm: 0.165913, critic: 386.839600, critic_grad: 2433.111084, value: 382.301208, critic_norm: 4.538378, value_mean: 117.427841, advantage: 19.454803
101
actor: 6.382844, actor_grad: 10.316175, policy mean: 0.430003, policy: 6.252607, entropy: -0.036300, actor_norm: 0.166537, critic: 221.918457, critic_grad: 1760.531738, value: 217.401260, critic_norm: 4.517196, value_mean: 103.791611, advantage: 14.482145
101
actor: -0.151365, actor_grad: 0.845935, policy mean: 0.606156, policy: -0.291608, entropy: -0.028327, actor_norm: 0.168570, critic: 8.530634, critic_grad: 69.335442, value: 4.151500, critic_norm: 4.379134, value_mean: 71.780212, advantage: -0.915551
101
actor: -4.793623, actor_grad: 2.967643, policy mean: 0.340521, policy: -4.907702, entropy: -0.039955, actor_norm: 0.154034, critic: 208.014587, critic_grad: 1144.901978, value: 203.549225, critic_norm: 4.465360, value_mean: 78.430481, advantage: -14.243355
101
actor: -0.750879, actor_grad: 1.888652, policy mean: 0.402840, policy: -0.853798, entropy: -0.042193, actor_norm: 0.145111, critic: 9.382807, critic_grad: 159.491577, value: 4.904049, critic_norm: 4.478758, value_mean: 85.616364, advantage: -2.131105
101
actor: -4.553220, actor_grad: 10.131720, policy mean: 0.554682, policy: -4.643667, entropy: -0.046957, actor_norm: 0.137404, critic: 72.871132, critic_grad: 818.781616, value: 67.548241, critic_norm: 5.322889, value_mean: 134.123138, advantage: -7.573256
101
actor: 12.183301, actor_grad: 18.312223, policy mean: 0.415536, policy: 12.089718, entropy: -0.044649, actor_norm: 0.138233, critic: 848.227173, critic_grad: 4084.586914, value: 843.020508, critic_norm: 5.206649, value_mean: 150.153687, advantage: 28.689024
101
actor: -8.875005, actor_grad: 3.314785, policy mean: 0.375482, policy: -8.967048, entropy: -0.047071, actor_norm: 0.139114, critic: 581.534485, critic_grad: 2416.941406, value: 576.146362, critic_norm: 5.388122, value_mean: 134.567474, advantage: -23.968563
101
actor: -8.078251, actor_grad: 9.740979, policy mean: 0.427100, policy: -8.177936, entropy: -0.043125, actor_norm: 0.142810, critic: 378.528717, critic_grad: 2234.255127, value: 373.424133, critic_norm: 5.104589, value_mean: 136.445084, advantage: -19.235533
101
actor: -20.084404, actor_grad: 52.925880, policy mean: 0.905389, policy: -20.214140, entropy: -0.024107, actor_norm: 0.153842, critic: 457.057587, critic_grad: 3176.049316, value: 450.402039, critic_norm: 6.655544, value_mean: 198.151993, advantage: -20.650213
101
actor: 3.874094, actor_grad: 10.041981, policy mean: 0.535972, policy: 3.756516, entropy: -0.036741, actor_norm: 0.154320, critic: 58.222374, critic_grad: 1183.262695, value: 52.798088, critic_norm: 5.424284, value_mean: 167.295959, advantage: 7.254823
27
actor: -156.372299, actor_grad: 233.668884, policy mean: 1.094274, policy: -156.500137, entropy: -0.028135, actor_norm: 0.155971, critic: 18896.005859, critic_grad: 14412.805664, value: 18889.984375, critic_norm: 6.021600, value_mean: 149.772537, advantage: -136.913544
101
actor: 8.805789, actor_grad: 4.363976, policy mean: 0.400224, policy: 8.675646, entropy: -0.035674, actor_norm: 0.165818, critic: 480.132172, critic_grad: 2078.927002, value: 475.199158, critic_norm: 4.933009, value_mean: 118.395111, advantage: 21.699389
101
actor: -2.172514, actor_grad: 2.816230, policy mean: 0.471291, policy: -2.301902, entropy: -0.034987, actor_norm: 0.164375, critic: 31.370651, critic_grad: 733.751160, value: 26.081669, critic_norm: 5.288981, value_mean: 144.652924, advantage: -4.918899
101
actor: -0.505104, actor_grad: 1.354238, policy mean: 0.664656, policy: -0.639045, entropy: -0.031356, actor_norm: 0.165297, critic: 5.977748, critic_grad: 78.791588, value: 1.529797, critic_norm: 4.447951, value_mean: 104.895020, advantage: -0.827730
101
actor: 12.442892, actor_grad: 13.143837, policy mean: 0.462898, policy: 12.315461, entropy: -0.033659, actor_norm: 0.161090, critic: 669.195129, critic_grad: 2739.222900, value: 664.963135, critic_norm: 4.232014, value_mean: 69.614105, advantage: 25.574627
101
actor: -1.990268, actor_grad: 3.511278, policy mean: 0.732705, policy: -2.111556, entropy: -0.029764, actor_norm: 0.151052, critic: 12.957783, critic_grad: 223.482727, value: 8.977177, critic_norm: 3.980607, value_mean: 66.121964, advantage: -2.809634
101
actor: 10.096153, actor_grad: 23.235132, policy mean: 0.713539, policy: 9.984625, entropy: -0.031975, actor_norm: 0.143504, critic: 202.002213, critic_grad: 1057.277344, value: 197.923584, critic_norm: 4.078625, value_mean: 77.211754, advantage: 13.991203
101
actor: -0.465879, actor_grad: 0.532634, policy mean: 0.415167, policy: -0.554928, entropy: -0.046199, actor_norm: 0.135247, critic: 6.898047, critic_grad: 104.272865, value: 2.166822, critic_norm: 4.731225, value_mean: 106.421341, advantage: -1.264492
84
actor: -98.174133, actor_grad: 163.651596, policy mean: 0.938161, policy: -98.273117, entropy: -0.031053, actor_norm: 0.130034, critic: 10687.414062, critic_grad: 9462.265625, value: 10682.625000, critic_norm: 4.789477, value_mean: 113.060669, advantage: -103.196739
101
actor: 6.029850, actor_grad: 15.879307, policy mean: 0.465991, policy: 5.949345, entropy: -0.049686, actor_norm: 0.130192, critic: 172.432053, critic_grad: 1832.364624, value: 167.106064, critic_norm: 5.325986, value_mean: 155.826706, advantage: 12.774211
101
actor: -3.569098, actor_grad: 8.308330, policy mean: 0.498748, policy: -3.652987, entropy: -0.044623, actor_norm: 0.128512, critic: 60.922722, critic_grad: 1426.987793, value: 55.088310, critic_norm: 5.834412, value_mean: 195.752090, advantage: -7.331066
101
actor: 2.997072, actor_grad: 0.472037, policy mean: 0.459148, policy: 2.907700, entropy: -0.045408, actor_norm: 0.134781, critic: 46.634815, critic_grad: 651.177368, value: 41.444084, critic_norm: 5.190732, value_mean: 135.809570, advantage: 6.360616
30
actor: -125.812347, actor_grad: 270.065948, policy mean: 1.045018, policy: -125.923920, entropy: -0.023984, actor_norm: 0.135559, critic: 13686.009766, critic_grad: 10986.773438, value: 13680.699219, critic_norm: 5.310061, value_mean: 130.456802, advantage: -116.355667
101
actor: 7.676315, actor_grad: 7.599687, policy mean: 0.388094, policy: 7.579279, entropy: -0.045041, actor_norm: 0.142077, critic: 387.695190, critic_grad: 2202.998779, value: 383.143463, critic_norm: 4.551731, value_mean: 118.233421, advantage: 19.518291
101
actor: 1.256765, actor_grad: 0.158184, policy mean: 0.369234, policy: 1.152130, entropy: -0.042686, actor_norm: 0.147320, critic: 16.676861, critic_grad: 321.132446, value: 11.361542, critic_norm: 5.315320, value_mean: 139.143326, advantage: 3.040817
101
actor: 13.171666, actor_grad: 37.691017, policy mean: 0.661846, policy: 13.052113, entropy: -0.035071, actor_norm: 0.154625, critic: 392.255249, critic_grad: 1853.910522, value: 388.193604, critic_norm: 4.061658, value_mean: 78.330254, advantage: 19.674492
101
actor: 7.288645, actor_grad: 4.173370, policy mean: 0.346100, policy: 7.185361, entropy: -0.045814, actor_norm: 0.149098, critic: 440.969147, critic_grad: 2020.645264, value: 436.950348, critic_norm: 4.018786, value_mean: 86.730515, advantage: 20.769951
101
actor: 6.119115, actor_grad: 8.100822, policy mean: 0.514669, policy: 6.017864, entropy: -0.040760, actor_norm: 0.142011, critic: 144.672409, critic_grad: 1178.215454, value: 140.675522, critic_norm: 3.996890, value_mean: 78.789177, advantage: 11.768653
101
actor: -0.793474, actor_grad: 0.431724, policy mean: 0.470687, policy: -0.878950, entropy: -0.046803, actor_norm: 0.132279, critic: 8.622327, critic_grad: 176.180923, value: 4.451997, critic_norm: 4.170330, value_mean: 84.856712, advantage: -1.975355
101
actor: -7.351672, actor_grad: 13.979100, policy mean: 0.638131, policy: -7.433318, entropy: -0.044124, actor_norm: 0.125770, critic: 144.471161, critic_grad: 999.084167, value: 139.843475, critic_norm: 4.627687, value_mean: 104.050583, advantage: -11.767302
101
actor: -7.436437, actor_grad: 4.874097, policy mean: 0.467699, policy: -7.508191, entropy: -0.050060, actor_norm: 0.121815, critic: 264.141754, critic_grad: 1941.978516, value: 258.846191, critic_norm: 5.295550, value_mean: 152.047028, advantage: -16.053617
101
actor: -4.323329, actor_grad: 2.559339, policy mean: 0.585045, policy: -4.410924, entropy: -0.041043, actor_norm: 0.128637, critic: 67.519241, critic_grad: 1039.875732, value: 62.498463, critic_norm: 5.020781, value_mean: 151.952728, advantage: -7.780208
101
actor: -9.834050, actor_grad: 7.392202, policy mean: 0.541929, policy: -9.921096, entropy: -0.044923, actor_norm: 0.131970, critic: 328.860046, critic_grad: 2191.799072, value: 323.550690, critic_norm: 5.309343, value_mean: 154.079636, advantage: -17.762745
101
actor: -0.244202, actor_grad: 0.612037, policy mean: 0.326720, policy: -0.329197, entropy: -0.049164, actor_norm: 0.134159, critic: 6.472709, critic_grad: 141.156906, value: 1.172537, critic_norm: 5.300172, value_mean: 163.251724, advantage: -0.990352
101
actor: 4.144502, actor_grad: 6.934349, policy mean: 0.488512, policy: 4.055034, entropy: -0.043769, actor_norm: 0.133237, critic: 73.254395, critic_grad: 1115.342285, value: 68.556412, critic_norm: 4.697979, value_mean: 135.167725, advantage: 8.254730
101
actor: 13.341465, actor_grad: 10.850505, policy mean: 0.526507, policy: 13.243340, entropy: -0.039650, actor_norm: 0.137776, critic: 663.155518, critic_grad: 3153.031494, value: 658.404053, critic_norm: 4.751461, value_mean: 110.660858, advantage: 25.457798
101
actor: -7.168482, actor_grad: 9.443685, policy mean: 0.476857, policy: -7.259418, entropy: -0.043461, actor_norm: 0.134397, critic: 237.610504, critic_grad: 1597.352295, value: 232.558517, critic_norm: 5.051979, value_mean: 117.997131, advantage: -15.228523
101
actor: -10.982612, actor_grad: 5.885349, policy mean: 0.527448, policy: -11.071543, entropy: -0.042791, actor_norm: 0.131723, critic: 451.515137, critic_grad: 2020.525513, value: 446.848328, critic_norm: 4.666810, value_mean: 111.521759, advantage: -21.135887
101
actor: -1.828270, actor_grad: 1.905322, policy mean: 0.435466, policy: -1.912216, entropy: -0.046423, actor_norm: 0.130368, critic: 24.210588, critic_grad: 406.270996, value: 20.133114, critic_norm: 4.077474, value_mean: 83.814560, advantage: -4.435601
101
actor: 2.857172, actor_grad: 5.440007, policy mean: 0.560015, policy: 2.777181, entropy: -0.043395, actor_norm: 0.123386, critic: 28.068184, critic_grad: 377.929901, value: 24.154049, critic_norm: 3.914135, value_mean: 76.464806, advantage: 4.911047
101
actor: -4.991669, actor_grad: 1.752325, policy mean: 0.495614, policy: -5.052622, entropy: -0.050957, actor_norm: 0.111910, critic: 108.411514, critic_grad: 897.840942, value: 103.790382, critic_norm: 4.621132, value_mean: 107.043533, advantage: -10.166176
101
actor: -8.939066, actor_grad: 5.625449, policy mean: 0.617347, policy: -9.001664, entropy: -0.048286, actor_norm: 0.110885, critic: 220.668015, critic_grad: 1346.944824, value: 216.063049, critic_norm: 4.604968, value_mean: 103.129456, advantage: -14.647140
101
actor: -0.774284, actor_grad: 0.865319, policy mean: 0.597612, policy: -0.827868, entropy: -0.050091, actor_norm: 0.103675, critic: 6.671226, critic_grad: 127.273880, value: 2.293681, critic_norm: 4.377545, value_mean: 112.693871, advantage: -1.352597
101
actor: 3.910647, actor_grad: 3.179716, policy mean: 0.592032, policy: 3.862054, entropy: -0.051601, actor_norm: 0.100194, critic: 47.470078, critic_grad: 582.958496, value: 43.444611, critic_norm: 4.025466, value_mean: 99.242096, advantage: 6.563014
101
actor: 6.213492, actor_grad: 1.825082, policy mean: 0.474541, policy: 6.170184, entropy: -0.058829, actor_norm: 0.102137, critic: 172.520309, critic_grad: 1176.899536, value: 168.196960, critic_norm: 4.323343, value_mean: 111.392769, advantage: 12.950371
20
actor: -107.593513, actor_grad: 178.416977, policy mean: 1.056903, policy: -107.657883, entropy: -0.038473, actor_norm: 0.102843, critic: 9502.871094, critic_grad: 8154.632812, value: 9498.501953, critic_norm: 4.369097, value_mean: 107.125488, advantage: -97.261551
101
actor: 9.754625, actor_grad: 13.331940, policy mean: 0.587108, policy: 9.688780, entropy: -0.047086, actor_norm: 0.112932, critic: 288.076904, critic_grad: 1352.227173, value: 284.097900, critic_norm: 3.979009, value_mean: 95.582962, advantage: 16.769146
101
actor: -1.512808, actor_grad: 3.396931, policy mean: 0.776040, policy: -1.592639, entropy: -0.032942, actor_norm: 0.112772, critic: 14.077456, critic_grad: 214.547836, value: 9.268183, critic_norm: 4.809274, value_mean: 121.950645, advantage: -1.830222
101
actor: -10.105309, actor_grad: 7.768875, policy mean: 0.558977, policy: -10.163515, entropy: -0.053912, actor_norm: 0.112118, critic: 341.549927, critic_grad: 2023.667358, value: 336.743225, critic_norm: 4.806705, value_mean: 123.610962, advantage: -18.325241
101
actor: 3.536400, actor_grad: 1.421811, policy mean: 0.626662, policy: 3.472089, entropy: -0.050690, actor_norm: 0.115000, critic: 35.703888, critic_grad: 475.855133, value: 31.721853, critic_norm: 3.982033, value_mean: 94.489502, advantage: 5.605799
101
actor: 15.159487, actor_grad: 40.004734, policy mean: 0.671499, policy: 15.090405, entropy: -0.048657, actor_norm: 0.117739, critic: 484.179962, critic_grad: 2473.169189, value: 480.030182, critic_norm: 4.149782, value_mean: 99.818535, advantage: 21.794178
101
actor: -1.442802, actor_grad: 1.839461, policy mean: 0.491794, policy: -1.504879, entropy: -0.050692, actor_norm: 0.112769, critic: 14.667189, critic_grad: 292.556183, value: 10.117033, critic_norm: 4.550156, value_mean: 110.387375, advantage: -3.146194
101
actor: -7.058462, actor_grad: 0.923669, policy mean: 0.545126, policy: -7.120796, entropy: -0.050931, actor_norm: 0.113265, critic: 177.457672, critic_grad: 1034.118164, value: 173.022339, critic_norm: 4.435339, value_mean: 104.921036, advantage: -13.126165
101
actor: 0.620996, actor_grad: 0.978505, policy mean: 0.495975, policy: 0.553706, entropy: -0.050502, actor_norm: 0.117792, critic: 6.060679, critic_grad: 71.517883, value: 2.146834, critic_norm: 3.913845, value_mean: 89.652237, advantage: 1.030141
101
actor: 7.309192, actor_grad: 1.671013, policy mean: 0.468757, policy: 7.248673, entropy: -0.054345, actor_norm: 0.114865, critic: 242.015793, critic_grad: 1117.648193, value: 238.051056, critic_norm: 3.964730, value_mean: 90.532715, advantage: 15.416994
101
actor: 6.521990, actor_grad: 6.149892, policy mean: 0.548161, policy: 6.458050, entropy: -0.050292, actor_norm: 0.114232, critic: 151.865906, critic_grad: 935.569397, value: 147.567749, critic_norm: 4.298151, value_mean: 101.547089, advantage: 12.085286
101
actor: -3.912689, actor_grad: 4.952168, policy mean: 0.583237, policy: -3.967999, entropy: -0.050997, actor_norm: 0.106307, critic: 50.897961, critic_grad: 603.699036, value: 46.217804, critic_norm: 4.680158, value_mean: 110.659760, advantage: -6.745259
101
actor: -9.237781, actor_grad: 9.006592, policy mean: 0.581351, policy: -9.285240, entropy: -0.054068, actor_norm: 0.101528, critic: 252.448639, critic_grad: 1661.312866, value: 248.148209, critic_norm: 4.300431, value_mean: 107.147552, advantage: -15.654819
101
actor: 5.584533, actor_grad: 6.111345, policy mean: 0.497990, policy: 5.533201, entropy: -0.048878, actor_norm: 0.100209, critic: 125.403679, critic_grad: 1241.109741, value: 121.453857, critic_norm: 3.949823, value_mean: 92.110489, advantage: 11.014714
101
actor: 3.545217, actor_grad: 1.344657, policy mean: 0.515087, policy: 3.501977, entropy: -0.054498, actor_norm: 0.097738, critic: 51.128098, critic_grad: 532.879150, value: 46.715858, critic_norm: 4.412238, value_mean: 106.238228, advantage: 6.681819
101
actor: -8.815119, actor_grad: 8.765595, policy mean: 0.579590, policy: -8.855416, entropy: -0.057099, actor_norm: 0.097396, critic: 232.806183, critic_grad: 1396.109009, value: 228.070572, critic_norm: 4.735610, value_mean: 117.241272, advantage: -15.074074
101
actor: -3.159160, actor_grad: 6.159289, policy mean: 0.626127, policy: -3.212024, entropy: -0.049798, actor_norm: 0.102662, critic: 30.953321, critic_grad: 593.235413, value: 26.898891, critic_norm: 4.054430, value_mean: 90.370895, advantage: -5.153396
101
actor: 9.303543, actor_grad: 15.134542, policy mean: 0.627311, policy: 9.247879, entropy: -0.046368, actor_norm: 0.102032, critic: 219.795685, critic_grad: 1066.691162, value: 215.814560, critic_norm: 3.981125, value_mean: 91.006546, advantage: 14.636437
101
actor: 2.929164, actor_grad: 5.928525, policy mean: 0.620318, policy: 2.869096, entropy: -0.041616, actor_norm: 0.101684, critic: 24.763218, critic_grad: 452.268799, value: 20.257734, critic_norm: 4.505483, value_mean: 110.723373, advantage: 4.378925
101
actor: -12.283442, actor_grad: 12.932849, policy mean: 0.601137, policy: -12.337027, entropy: -0.048786, actor_norm: 0.102371, critic: 418.629303, critic_grad: 2163.249756, value: 414.190063, critic_norm: 4.439251, value_mean: 106.121124, advantage: -20.292339
101
actor: -1.424800, actor_grad: 1.137206, policy mean: 0.553568, policy: -1.474700, entropy: -0.049646, actor_norm: 0.099547, critic: 11.703960, critic_grad: 209.331482, value: 7.699922, critic_norm: 4.004039, value_mean: 89.759552, advantage: -2.646562
101
actor: 8.318950, actor_grad: 5.238169, policy mean: 0.542567, policy: 8.278639, entropy: -0.055837, actor_norm: 0.096149, critic: 238.618179, critic_grad: 1109.449829, value: 234.631882, critic_norm: 3.986296, value_mean: 89.199242, advantage: 15.282948
101
actor: 0.147570, actor_grad: 0.273842, policy mean: 0.612481, policy: 0.105173, entropy: -0.050798, actor_norm: 0.093195, critic: 6.488791, critic_grad: 27.881439, value: 2.109986, critic_norm: 4.378805, value_mean: 111.244064, advantage: 0.182582
101
actor: -6.661985, actor_grad: 5.430744, policy mean: 0.595629, policy: -6.701825, entropy: -0.052126, actor_norm: 0.091966, critic: 132.691360, critic_grad: 1243.101318, value: 128.147675, critic_norm: 4.543683, value_mean: 114.247421, advantage: -11.274506
101
actor: -8.364417, actor_grad: 4.768686, policy mean: 0.554474, policy: -8.401088, entropy: -0.056658, actor_norm: 0.093328, critic: 234.853149, critic_grad: 1262.433960, value: 230.309631, critic_norm: 4.543523, value_mean: 108.555527, advantage: -15.150951
101
actor: -8.811451, actor_grad: 7.098919, policy mean: 0.664025, policy: -8.857374, entropy: -0.048588, actor_norm: 0.094511, critic: 196.813492, critic_grad: 1105.335327, value: 192.444733, critic_norm: 4.368761, value_mean: 102.045303, advantage: -13.792484
101
actor: 2.319762, actor_grad: 0.562047, policy mean: 0.522258, policy: 2.281627, entropy: -0.056010, actor_norm: 0.094145, critic: 23.157869, critic_grad: 333.290985, value: 19.104988, critic_norm: 4.052881, value_mean: 91.318123, advantage: 4.295556
101
actor: 7.709791, actor_grad: 1.651349, policy mean: 0.529768, policy: 7.674120, entropy: -0.056806, actor_norm: 0.092476, critic: 213.341568, critic_grad: 1040.948853, value: 209.311035, critic_norm: 4.030528, value_mean: 92.376358, advantage: 14.446180
101
actor: 6.279318, actor_grad: 4.525580, policy mean: 0.494593, policy: 6.244822, entropy: -0.059468, actor_norm: 0.093965, critic: 163.613968, critic_grad: 985.749207, value: 159.355530, critic_norm: 4.258434, value_mean: 96.911659, advantage: 12.550683
101
actor: -1.608843, actor_grad: 0.324387, policy mean: 0.657262, policy: -1.650783, entropy: -0.051637, actor_norm: 0.093576, critic: 13.647224, critic_grad: 287.412933, value: 9.085177, critic_norm: 4.562047, value_mean: 115.118996, advantage: -2.699575
101
actor: -9.042741, actor_grad: 5.675217, policy mean: 0.613407, policy: -9.086307, entropy: -0.051754, actor_norm: 0.095320, critic: 223.360352, critic_grad: 1347.761230, value: 218.708450, critic_norm: 4.651905, value_mean: 109.165733, advantage: -14.775137
101
actor: -6.690271, actor_grad: 5.215545, policy mean: 0.470630, policy: -6.724287, entropy: -0.059691, actor_norm: 0.093706, critic: 213.849457, critic_grad: 1513.039673, value: 209.393387, critic_norm: 4.456063, value_mean: 105.591805, advantage: -14.408634
101
actor: -3.743852, actor_grad: 2.827893, policy mean: 0.710099, policy: -3.792204, entropy: -0.047059, actor_norm: 0.095411, critic: 38.484348, critic_grad: 459.330414, value: 34.292145, critic_norm: 4.192204, value_mean: 97.767639, advantage: -5.707089
101
actor: 1.481338, actor_grad: 0.266743, policy mean: 0.560790, policy: 1.430152, entropy: -0.050666, actor_norm: 0.101851, critic: 12.214130, critic_grad: 186.526993, value: 8.229475, critic_norm: 3.984656, value_mean: 92.467728, advantage: 2.589494
101
actor: 5.931635, actor_grad: 3.254651, policy mean: 0.608204, policy: 5.881792, entropy: -0.048942, actor_norm: 0.098786, critic: 97.837357, critic_grad: 722.001404, value: 93.819916, critic_norm: 4.017444, value_mean: 91.449425, advantage: 9.681435
101
actor: 10.055300, actor_grad: 3.569182, policy mean: 0.637493, policy: 10.008200, entropy: -0.048109, actor_norm: 0.095209, critic: 248.285645, critic_grad: 1143.564331, value: 244.123505, critic_norm: 4.162137, value_mean: 93.718826, advantage: 15.514831
101
actor: 4.471005, actor_grad: 2.486628, policy mean: 0.628494, policy: 4.431005, entropy: -0.051930, actor_norm: 0.091931, critic: 54.621326, critic_grad: 763.111267, value: 50.206566, critic_norm: 4.414762, value_mean: 104.408340, advantage: 7.009424
101
actor: -6.369893, actor_grad: 4.793677, policy mean: 0.620642, policy: -6.409068, entropy: -0.050521, actor_norm: 0.089697, critic: 111.472275, critic_grad: 1176.050537, value: 106.732010, critic_norm: 4.740262, value_mean: 118.316803, advantage: -10.326307
101
actor: -7.133566, actor_grad: 0.887209, policy mean: 0.562016, policy: -7.167961, entropy: -0.056520, actor_norm: 0.090915, critic: 169.098312, critic_grad: 1009.530151, value: 164.790771, critic_norm: 4.307538, value_mean: 101.535873, advantage: -12.808960
101
actor: 1.844214, actor_grad: 1.664845, policy mean: 0.648744, policy: 1.803526, entropy: -0.050643, actor_norm: 0.091332, critic: 14.072802, critic_grad: 217.147430, value: 10.146797, critic_norm: 3.926004, value_mean: 88.535828, advantage: 3.032764
101
actor: 6.940520, actor_grad: 4.313326, policy mean: 0.511060, policy: 6.904555, entropy: -0.053425, actor_norm: 0.089390, critic: 184.966843, critic_grad: 1080.722046, value: 180.960144, critic_norm: 4.006695, value_mean: 93.403091, advantage: 13.420697
101
actor: 3.541131, actor_grad: 5.354963, policy mean: 0.557359, policy: 3.503592, entropy: -0.051032, actor_norm: 0.088572, critic: 43.251163, critic_grad: 536.614258, value: 38.761177, critic_norm: 4.489987, value_mean: 104.113136, advantage: 5.966350
101
actor: -5.311877, actor_grad: 1.934936, policy mean: 0.489511, policy: -5.344501, entropy: -0.058540, actor_norm: 0.091164, critic: 125.416840, critic_grad: 920.865967, value: 120.893234, critic_norm: 4.523606, value_mean: 107.975677, advantage: -10.969036
101
actor: -5.912608, actor_grad: 4.472682, policy mean: 0.748188, policy: -5.969304, entropy: -0.038409, actor_norm: 0.095105, critic: 76.699394, critic_grad: 659.297119, value: 72.510094, critic_norm: 4.189300, value_mean: 96.803513, advantage: -8.212671
101
actor: 5.201195, actor_grad: 1.939873, policy mean: 0.705468, policy: 5.153058, entropy: -0.048659, actor_norm: 0.096795, critic: 56.896500, critic_grad: 555.528931, value: 52.885841, critic_norm: 4.010660, value_mean: 85.635468, advantage: 7.252380
101
actor: 6.568959, actor_grad: 3.428020, policy mean: 0.558726, policy: 6.526602, entropy: -0.053414, actor_norm: 0.095772, critic: 136.279373, critic_grad: 911.903809, value: 132.101440, critic_norm: 4.177936, value_mean: 97.573708, advantage: 11.414682
101
actor: -0.116854, actor_grad: 0.325501, policy mean: 0.499363, policy: -0.152176, entropy: -0.058201, actor_norm: 0.093523, critic: 5.365396, critic_grad: 27.467192, value: 0.855193, critic_norm: 4.510203, value_mean: 103.381058, advantage: -0.318817
101
actor: -6.773203, actor_grad: 1.221512, policy mean: 0.626850, policy: -6.808662, entropy: -0.053849, actor_norm: 0.089308, critic: 121.874542, critic_grad: 863.494507, value: 117.434258, critic_norm: 4.440281, value_mean: 104.131134, advantage: -10.778899
66
actor: -335.835663, actor_grad: 509.563629, policy mean: 3.481549, policy: -335.927582, entropy: -0.000432, actor_norm: 0.092334, critic: 9493.765625, critic_grad: 12378.215820, value: 9489.761719, critic_norm: 4.003812, value_mean: 98.906036, advantage: -97.411041
101
actor: 6.263819, actor_grad: 6.701125, policy mean: 0.552168, policy: 6.226611, entropy: -0.054781, actor_norm: 0.091990, critic: 135.972305, critic_grad: 1277.384155, value: 131.724487, critic_norm: 4.247819, value_mean: 87.504158, advantage: 11.367567
101
actor: -7.529289, actor_grad: 4.413292, policy mean: 0.549097, policy: -7.569525, entropy: -0.051431, actor_norm: 0.091667, critic: 194.032654, critic_grad: 1174.239746, value: 189.342209, critic_norm: 4.690445, value_mean: 112.325302, advantage: -13.752393
101
actor: -3.389162, actor_grad: 6.015747, policy mean: 0.674925, policy: -3.430933, entropy: -0.048151, actor_norm: 0.089922, critic: 29.606155, critic_grad: 455.778595, value: 25.119110, critic_norm: 4.487046, value_mean: 109.644936, advantage: -4.978002
101
actor: -0.121648, actor_grad: 1.037299, policy mean: 0.548585, policy: -0.157117, entropy: -0.055631, actor_norm: 0.091100, critic: 7.015869, critic_grad: 43.721233, value: 2.613843, critic_norm: 4.402026, value_mean: 117.632217, advantage: -0.287853
101
actor: -3.083306, actor_grad: 1.735740, policy mean: 0.606240, policy: -3.123788, entropy: -0.054055, actor_norm: 0.094537, critic: 30.983828, critic_grad: 549.321655, value: 25.942360, critic_norm: 5.041468, value_mean: 141.405090, advantage: -5.025041
101
actor: 4.344658, actor_grad: 7.119478, policy mean: 0.578799, policy: 4.293138, entropy: -0.050008, actor_norm: 0.101528, critic: 59.084229, critic_grad: 819.606750, value: 54.465775, critic_norm: 4.618455, value_mean: 130.783524, advantage: 7.339238
101
actor: -6.320520, actor_grad: 13.801365, policy mean: 0.799240, policy: -6.392700, entropy: -0.035149, actor_norm: 0.107329, critic: 82.095398, critic_grad: 1103.614136, value: 76.541718, critic_norm: 5.553682, value_mean: 170.673355, advantage: -7.643339
101
actor: -4.721865, actor_grad: 6.237159, policy mean: 0.523389, policy: -4.783057, entropy: -0.049163, actor_norm: 0.110355, critic: 127.681137, critic_grad: 2044.158691, value: 122.708054, critic_norm: 4.973086, value_mean: 184.652481, advantage: -9.922379
101
actor: -7.193723, actor_grad: 10.362340, policy mean: 0.466149, policy: -7.269928, entropy: -0.048920, actor_norm: 0.125125, critic: 244.716782, critic_grad: 1886.504761, value: 238.649857, critic_norm: 6.066930, value_mean: 171.117477, advantage: -15.402859
101
actor: -9.750817, actor_grad: 20.525061, policy mean: 0.393709, policy: -9.847281, entropy: -0.047021, actor_norm: 0.143484, critic: 690.682373, critic_grad: 5740.449219, value: 683.899048, critic_norm: 6.783303, value_mean: 267.134094, advantage: -25.422470
101
actor: 4.608167, actor_grad: 12.920013, policy mean: 0.706903, policy: 4.494895, entropy: -0.036089, actor_norm: 0.149361, critic: 54.369534, critic_grad: 1060.129028, value: 47.642410, critic_norm: 6.727124, value_mean: 220.730194, advantage: 6.236135
101
actor: -10.017989, actor_grad: 19.071720, policy mean: 0.426971, policy: -10.138604, entropy: -0.040075, actor_norm: 0.160689, critic: 587.865356, critic_grad: 5875.504883, value: 579.708801, critic_norm: 8.156532, value_mean: 332.992554, advantage: -23.573753
101
actor: -36.309139, actor_grad: 126.787018, policy mean: 0.675009, policy: -36.463234, entropy: -0.034381, actor_norm: 0.188477, critic: 2949.562988, critic_grad: 16937.902344, value: 2938.666748, critic_norm: 10.896142, value_mean: 462.164124, advantage: -54.096245
30
actor: -281.531036, actor_grad: 732.350586, policy mean: 0.842713, policy: -281.708008, entropy: -0.022017, actor_norm: 0.198980, critic: 109031.742188, critic_grad: 63860.464844, value: 109020.023438, critic_norm: 11.722091, value_mean: 342.084625, advantage: -327.983490
101
actor: 2.413579, actor_grad: 8.482687, policy mean: 0.418709, policy: 2.229223, entropy: -0.032205, actor_norm: 0.216562, critic: 51.312511, critic_grad: 1448.131592, value: 40.110905, critic_norm: 11.201607, value_mean: 442.549561, advantage: 5.297897
101
actor: 65.004578, actor_grad: 328.068970, policy mean: 1.182655, policy: 64.780563, entropy: -0.020814, actor_norm: 0.244829, critic: 4467.669922, critic_grad: 18944.687500, value: 4457.406250, critic_norm: 10.263770, value_mean: 392.660492, advantage: 64.073936
101
actor: -9.880664, actor_grad: 92.116676, policy mean: 0.442735, policy: -10.132973, entropy: -0.024330, actor_norm: 0.276639, critic: 548.770203, critic_grad: 6931.854980, value: 538.659973, critic_norm: 10.110222, value_mean: 413.268768, advantage: -23.039494
68
actor: -2228.408447, actor_grad: 4450.635742, policy mean: 6.067240, policy: -2228.710449, entropy: -0.007728, actor_norm: 0.309852, critic: 133610.531250, critic_grad: 91362.289062, value: 133600.859375, critic_norm: 9.672667, value_mean: 367.925751, advantage: -365.450623
101
actor: 16.843403, actor_grad: 250.233963, policy mean: 0.378881, policy: 16.505039, entropy: -0.017993, actor_norm: 0.356356, critic: 2035.856689, critic_grad: 11259.134766, value: 2028.016724, critic_norm: 7.839916, value_mean: 259.545685, advantage: 43.682800
101
actor: 10.321777, actor_grad: 236.834946, policy mean: 0.399324, policy: 9.943453, entropy: -0.017369, actor_norm: 0.395694, critic: 513.802979, critic_grad: 4104.348145, value: 507.572449, critic_norm: 6.230513, value_mean: 149.633636, advantage: 20.010845
101
actor: 21.713823, actor_grad: 98.121773, policy mean: 0.963010, policy: 21.284174, entropy: -0.010113, actor_norm: 0.439762, critic: 256.548187, critic_grad: 2060.470703, value: 251.372223, critic_norm: 5.175954, value_mean: 74.108994, advantage: 14.287440
101
actor: 4.852996, actor_grad: 82.415695, policy mean: 0.402543, policy: 4.394081, entropy: -0.008867, actor_norm: 0.467783, critic: 168.306381, critic_grad: 1102.371460, value: 163.465729, critic_norm: 4.840654, value_mean: 17.583481, advantage: 11.924032
101
actor: -12.148471, actor_grad: 54.747219, policy mean: 1.188037, policy: -12.556150, entropy: -0.010120, actor_norm: 0.417799, critic: 122.227661, critic_grad: 775.925781, value: 117.468323, critic_norm: 4.759338, value_mean: 55.266479, advantage: -10.751849
101
actor: -0.580763, actor_grad: 2.439169, policy mean: 0.117388, policy: -0.911123, entropy: -0.015204, actor_norm: 0.345565, critic: 57.222103, critic_grad: 609.206116, value: 51.600952, critic_norm: 5.621151, value_mean: 113.009712, advantage: -7.066815
101
actor: -5.730482, actor_grad: 9.969166, policy mean: 0.476314, policy: -6.031000, entropy: -0.014704, actor_norm: 0.315223, critic: 161.320328, critic_grad: 1038.524292, value: 156.077881, critic_norm: 5.242453, value_mean: 109.559807, advantage: -12.485397
101
actor: 0.767896, actor_grad: 0.348298, policy mean: 0.138259, policy: 0.506549, entropy: -0.023060, actor_norm: 0.284408, critic: 17.400000, critic_grad: 278.201782, value: 12.472593, critic_norm: 4.927407, value_mean: 102.721481, advantage: 3.518868
101
actor: -9.776531, actor_grad: 53.836132, policy mean: 0.560084, policy: -10.004601, entropy: -0.023907, actor_norm: 0.251976, critic: 319.979095, critic_grad: 2349.145752, value: 313.894012, critic_norm: 6.085082, value_mean: 172.458206, advantage: -17.646130
101
actor: -7.768082, actor_grad: 27.318399, policy mean: 0.363246, policy: -7.976571, entropy: -0.030767, actor_norm: 0.239256, critic: 475.140991, critic_grad: 4479.886230, value: 467.364075, critic_norm: 7.776913, value_mean: 248.665161, advantage: -21.331184
101
actor: 12.721589, actor_grad: 86.673691, policy mean: 0.445496, policy: 12.488158, entropy: -0.025664, actor_norm: 0.259096, critic: 787.819641, critic_grad: 4509.632812, value: 782.028442, critic_norm: 5.791191, value_mean: 158.943314, advantage: 27.382065
101
actor: 5.081697, actor_grad: 5.786814, policy mean: 0.398934, policy: 4.852693, entropy: -0.027532, actor_norm: 0.256536, critic: 167.561142, critic_grad: 1409.960693, value: 162.892334, critic_norm: 4.668807, value_mean: 98.825523, advantage: 12.594124
101
actor: 1.888690, actor_grad: 3.478107, policy mean: 0.430876, policy: 1.680196, entropy: -0.031206, actor_norm: 0.239700, critic: 23.664957, critic_grad: 390.938324, value: 19.301914, critic_norm: 4.363043, value_mean: 82.034744, advantage: 4.169867
101
actor: -6.717570, actor_grad: 24.741898, policy mean: 0.537420, policy: -6.889046, entropy: -0.032416, actor_norm: 0.203892, critic: 168.870346, critic_grad: 1160.445068, value: 164.479858, critic_norm: 4.390491, value_mean: 85.629692, advantage: -12.800720
101
actor: -0.959728, actor_grad: 1.395457, policy mean: 0.358655, policy: -1.108724, entropy: -0.035418, actor_norm: 0.184414, critic: 13.981535, critic_grad: 225.011200, value: 9.711870, critic_norm: 4.269664, value_mean: 97.188126, advantage: -2.991345
101
actor: 20.398243, actor_grad: 46.717945, policy mean: 1.763924, policy: 20.226189, entropy: -0.010523, actor_norm: 0.182578, critic: 151.088486, critic_grad: 1326.853760, value: 146.543472, critic_norm: 4.545011, value_mean: 124.144257, advantage: 11.994858
101
actor: 5.098044, actor_grad: 20.151014, policy mean: 0.392917, policy: 4.947279, entropy: -0.037347, actor_norm: 0.188112, critic: 172.362427, critic_grad: 1366.497925, value: 167.734665, critic_norm: 4.627767, value_mean: 121.691269, advantage: 12.848448
101
actor: -2.832917, actor_grad: 4.572866, policy mean: 0.568712, policy: -2.998641, entropy: -0.028968, actor_norm: 0.194692, critic: 35.653118, critic_grad: 600.045410, value: 30.500988, critic_norm: 5.152129, value_mean: 143.798904, advantage: -5.484227
101
actor: -27.728193, actor_grad: 98.766975, policy mean: 1.447432, policy: -27.903362, entropy: -0.011996, actor_norm: 0.187164, critic: 379.717010, critic_grad: 3330.754150, value: 373.713989, critic_norm: 6.003031, value_mean: 204.049164, advantage: -18.668427
101
actor: 7.375709, actor_grad: 42.290230, policy mean: 0.353512, policy: 7.208277, entropy: -0.033890, actor_norm: 0.201322, critic: 419.670471, critic_grad: 2390.240479, value: 414.679474, critic_norm: 4.991007, value_mean: 141.098495, advantage: 20.333143
101
actor: -2.046851, actor_grad: 12.184711, policy mean: 0.600256, policy: -2.220083, entropy: -0.028436, actor_norm: 0.201668, critic: 21.266827, critic_grad: 786.552246, value: 15.120477, critic_norm: 6.146349, value_mean: 219.995544, advantage: -3.791933
101
actor: -6.178481, actor_grad: 44.006191, policy mean: 0.566555, policy: -6.361815, entropy: -0.025559, actor_norm: 0.208894, critic: 150.801804, critic_grad: 2940.623291, value: 144.407837, critic_norm: 6.393966, value_mean: 253.758575, advantage: -11.651918
101
actor: 9.277130, actor_grad: 10.556980, policy mean: 0.317681, policy: 9.096614, entropy: -0.031201, actor_norm: 0.211716, critic: 984.375000, critic_grad: 4900.709961, value: 978.894043, critic_norm: 5.480930, value_mean: 161.403839, advantage: 30.935535
101
actor: 3.170302, actor_grad: 32.990284, policy mean: 0.270413, policy: 2.987292, entropy: -0.029227, actor_norm: 0.212237, critic: 125.444214, critic_grad: 1518.958740, value: 120.721046, critic_norm: 4.723166, value_mean: 114.862442, advantage: 10.694883
101
actor: 5.880353, actor_grad: 27.963921, policy mean: 0.464983, policy: 5.693496, entropy: -0.025954, actor_norm: 0.212811, critic: 144.541245, critic_grad: 1232.575195, value: 139.991974, critic_norm: 4.549268, value_mean: 54.888657, advantage: 11.805847
101
actor: -1.497595, actor_grad: 1.048904, policy mean: 0.248618, policy: -1.662442, entropy: -0.030405, actor_norm: 0.195253, critic: 53.626335, critic_grad: 585.684998, value: 49.256828, critic_norm: 4.369508, value_mean: 80.840012, advantage: -6.966618
101
actor: 2.517943, actor_grad: 2.257077, policy mean: 0.439831, policy: 2.362685, entropy: -0.030134, actor_norm: 0.185392, critic: 33.000137, critic_grad: 391.741058, value: 28.812225, critic_norm: 4.187912, value_mean: 73.785721, advantage: 5.355047
101
actor: 10.506250, actor_grad: 10.785121, policy mean: 0.651754, policy: 10.372407, entropy: -0.031811, actor_norm: 0.165654, critic: 252.050079, critic_grad: 1114.342651, value: 247.923965, critic_norm: 4.126122, value_mean: 86.549728, advantage: 15.699212
101
actor: 2.208450, actor_grad: 2.558647, policy mean: 0.433472, policy: 2.100651, entropy: -0.040969, actor_norm: 0.148768, critic: 27.663408, critic_grad: 467.533508, value: 23.348133, critic_norm: 4.315275, value_mean: 93.120018, advantage: 4.790200
101
actor: -8.022193, actor_grad: 5.833210, policy mean: 0.493423, policy: -8.111319, entropy: -0.044737, actor_norm: 0.133862, critic: 273.124725, critic_grad: 1353.090942, value: 268.559753, critic_norm: 4.564964, value_mean: 109.429985, advantage: -16.351837
101
actor: -0.246570, actor_grad: 0.529034, policy mean: 0.489836, policy: -0.326772, entropy: -0.047064, actor_norm: 0.127265, critic: 5.375881, critic_grad: 32.556004, value: 1.384920, critic_norm: 3.990961, value_mean: 91.665779, advantage: -0.364233
101
actor: 7.986468, actor_grad: 3.822542, policy mean: 0.618136, policy: 7.909577, entropy: -0.040296, actor_norm: 0.117187, critic: 163.194519, critic_grad: 923.228027, value: 159.169128, critic_norm: 4.025389, value_mean: 92.134773, advantage: 12.559805
101
actor: 4.374270, actor_grad: 4.017980, policy mean: 0.551341, policy: 4.312401, entropy: -0.047158, actor_norm: 0.109027, critic: 68.909622, critic_grad: 629.844604, value: 64.729477, critic_norm: 4.180143, value_mean: 98.029518, advantage: 7.989183
101
actor: -3.371855, actor_grad: 2.880453, policy mean: 0.616922, policy: -3.426088, entropy: -0.046613, actor_norm: 0.100846, critic: 37.278706, critic_grad: 464.161377, value: 32.762245, critic_norm: 4.516461, value_mean: 113.511948, advantage: -5.352058
101
actor: -7.711684, actor_grad: 0.769247, policy mean: 0.523492, policy: -7.750886, entropy: -0.055461, actor_norm: 0.094663, critic: 232.716660, critic_grad: 1284.895752, value: 228.369934, critic_norm: 4.346725, value_mean: 109.934296, advantage: -15.048515
101
actor: 2.411593, actor_grad: 2.575196, policy mean: 0.531378, policy: 2.366483, entropy: -0.053458, actor_norm: 0.098568, critic: 24.738018, critic_grad: 420.393372, value: 20.856926, critic_norm: 3.881092, value_mean: 94.668945, advantage: 4.524292
101
actor: 8.166630, actor_grad: 1.668831, policy mean: 0.598315, policy: 8.117337, entropy: -0.049824, actor_norm: 0.099117, critic: 199.059875, critic_grad: 1079.300049, value: 194.922852, critic_norm: 4.137017, value_mean: 98.186188, advantage: 13.855567
101
actor: -2.145131, actor_grad: 0.954374, policy mean: 0.628002, policy: -2.193167, entropy: -0.047772, actor_norm: 0.095808, critic: 17.238543, critic_grad: 317.672363, value: 12.765960, critic_norm: 4.472583, value_mean: 112.796280, advantage: -3.527188
101
actor: -6.704543, actor_grad: 0.641744, policy mean: 0.509092, policy: -6.748009, entropy: -0.055532, actor_norm: 0.098998, critic: 179.194626, critic_grad: 1073.732544, value: 174.733292, critic_norm: 4.461331, value_mean: 107.030052, advantage: -13.191078
101
actor: 0.072565, actor_grad: 0.289986, policy mean: 0.453032, policy: 0.026349, entropy: -0.054025, actor_norm: 0.100241, critic: 4.590417, critic_grad: 5.604268, value: 0.611069, critic_norm: 3.979348, value_mean: 97.618729, advantage: 0.001566
101
actor: 6.725562, actor_grad: 4.518704, policy mean: 0.436353, policy: 6.670972, entropy: -0.052304, actor_norm: 0.106894, critic: 236.848877, critic_grad: 1333.078369, value: 232.630829, critic_norm: 4.218048, value_mean: 103.572174, advantage: 15.165490
101
actor: -6.463383, actor_grad: 2.242245, policy mean: 0.549779, policy: -6.518364, entropy: -0.048983, actor_norm: 0.103964, critic: 145.999222, critic_grad: 1110.776001, value: 141.274536, critic_norm: 4.724686, value_mean: 123.087616, advantage: -11.881495
101
actor: -8.633711, actor_grad: 5.633884, policy mean: 0.511363, policy: -8.686728, entropy: -0.049964, actor_norm: 0.102981, critic: 294.926239, critic_grad: 1662.469482, value: 290.543335, critic_norm: 4.382901, value_mean: 115.265495, advantage: -17.005854
101
actor: 7.809831, actor_grad: 0.633169, policy mean: 0.499685, policy: 7.755534, entropy: -0.049734, actor_norm: 0.104031, critic: 245.754669, critic_grad: 1625.665894, value: 241.791183, critic_norm: 3.963485, value_mean: 98.472900, advantage: 15.544842
101
actor: 8.484267, actor_grad: 2.490234, policy mean: 0.683792, policy: 8.426250, entropy: -0.045092, actor_norm: 0.103108, critic: 162.790787, critic_grad: 1009.348633, value: 158.575226, critic_norm: 4.215560, value_mean: 105.436943, advantage: 12.517060
101
actor: -3.262278, actor_grad: 5.196187, policy mean: 0.636350, policy: -3.333000, entropy: -0.040962, actor_norm: 0.111684, critic: 33.019104, critic_grad: 585.650085, value: 28.520889, critic_norm: 4.498217, value_mean: 122.722778, advantage: -5.304915
101
actor: -7.451140, actor_grad: 9.736920, policy mean: 0.488440, policy: -7.518062, entropy: -0.050004, actor_norm: 0.116925, critic: 251.575089, critic_grad: 1465.886963, value: 247.088654, critic_norm: 4.486439, value_mean: 118.779251, advantage: -15.607630
101
actor: 3.882541, actor_grad: 5.543129, policy mean: 0.377698, policy: 3.811520, entropy: -0.048875, actor_norm: 0.119896, critic: 107.065063, critic_grad: 989.279968, value: 102.891266, critic_norm: 4.173800, value_mean: 104.938400, advantage: 10.134628
101
actor: 7.488221, actor_grad: 5.295145, policy mean: 0.524105, policy: 7.406534, entropy: -0.042563, actor_norm: 0.124251, critic: 208.071793, critic_grad: 1122.549683, value: 203.831879, critic_norm: 4.239908, value_mean: 102.869095, advantage: 14.199026
101
actor: -13.253215, actor_grad: 16.123741, policy mean: 0.624629, policy: -13.330065, entropy: -0.043865, actor_norm: 0.120716, critic: 441.933716, critic_grad: 2772.517578, value: 436.966187, critic_norm: 4.967523, value_mean: 150.917236, advantage: -20.807590
101
actor: 1.023341, actor_grad: 1.387664, policy mean: 0.484576, policy: 0.941953, entropy: -0.046522, actor_norm: 0.127911, critic: 9.808546, critic_grad: 171.947845, value: 5.293176, critic_norm: 4.515371, value_mean: 125.702408, advantage: 1.825464
101
actor: 15.430721, actor_grad: 7.735106, policy mean: 0.596625, policy: 15.346354, entropy: -0.041242, actor_norm: 0.125609, critic: 674.188416, critic_grad: 3949.968750, value: 669.395264, critic_norm: 4.793169, value_mean: 144.521698, advantage: 25.719351
101
actor: -9.864182, actor_grad: 3.934193, policy mean: 0.516074, policy: -9.949357, entropy: -0.044078, actor_norm: 0.129253, critic: 386.778107, critic_grad: 2061.607910, value: 381.908752, critic_norm: 4.869347, value_mean: 134.002777, advantage: -19.445496
101
actor: 6.952750, actor_grad: 5.981304, policy mean: 0.587101, policy: 6.856998, entropy: -0.036718, actor_norm: 0.132469, critic: 118.226425, critic_grad: 1626.850098, value: 113.151817, critic_norm: 5.074607, value_mean: 155.963715, advantage: 9.991971
101
actor: -1.897329, actor_grad: 2.984926, policy mean: 0.399374, policy: -1.990202, entropy: -0.044243, actor_norm: 0.137116, critic: 29.501442, critic_grad: 394.867706, value: 25.296206, critic_norm: 4.205236, value_mean: 102.803131, advantage: -5.005107
101
actor: 4.473938, actor_grad: 8.999934, policy mean: 0.430571, policy: 4.379210, entropy: -0.044999, actor_norm: 0.139728, critic: 107.559441, critic_grad: 927.132385, value: 103.425278, critic_norm: 4.134166, value_mean: 102.008591, advantage: 10.159970
101
actor: 2.958417, actor_grad: 8.887107, policy mean: 0.566277, policy: 2.858866, entropy: -0.040576, actor_norm: 0.140126, critic: 29.101030, critic_grad: 491.014587, value: 24.715544, critic_norm: 4.385486, value_mean: 113.351807, advantage: 4.242225
101
actor: 1.357071, actor_grad: 1.170873, policy mean: 0.473954, policy: 1.259624, entropy: -0.039990, actor_norm: 0.137437, critic: 11.879602, critic_grad: 304.727081, value: 7.892817, critic_norm: 3.986786, value_mean: 82.911034, advantage: 2.713196
101
actor: 2.147004, actor_grad: 1.095573, policy mean: 0.703112, policy: 2.057133, entropy: -0.040932, actor_norm: 0.130803, critic: 12.875460, critic_grad: 218.146103, value: 8.895447, critic_norm: 3.980013, value_mean: 93.208015, advantage: 2.921413
101
actor: 0.010000, actor_grad: 0.115688, policy mean: 0.558620, policy: -0.067368, entropy: -0.043292, actor_norm: 0.120660, critic: 4.170123, critic_grad: 17.728220, value: 0.197789, critic_norm: 3.972334, value_mean: 93.893059, advantage: -0.178466
101
actor: 6.683324, actor_grad: 10.334244, policy mean: 0.628336, policy: 6.613053, entropy: -0.042490, actor_norm: 0.112760, critic: 113.490044, critic_grad: 879.955688, value: 109.685135, critic_norm: 3.804907, value_mean: 87.236130, advantage: 10.464546
101
actor: 11.911568, actor_grad: 18.594910, policy mean: 0.756853, policy: 11.847754, entropy: -0.042399, actor_norm: 0.106213, critic: 246.703690, critic_grad: 1147.620239, value: 242.687439, critic_norm: 4.016253, value_mean: 89.483284, advantage: 15.569731
101
actor: 5.543942, actor_grad: 4.632809, policy mean: 0.526878, policy: 5.483252, entropy: -0.046078, actor_norm: 0.106769, critic: 111.831688, critic_grad: 1182.088867, value: 107.554512, critic_norm: 4.277176, value_mean: 104.140327, advantage: 10.284425
101
actor: -8.796728, actor_grad: 3.639327, policy mean: 0.545362, policy: -8.849356, entropy: -0.049613, actor_norm: 0.102240, critic: 267.568787, critic_grad: 1532.797119, value: 263.168274, critic_norm: 4.400516, value_mean: 112.827454, advantage: -16.172951
101
actor: 1.315084, actor_grad: 0.864427, policy mean: 0.555010, policy: 1.260850, entropy: -0.048978, actor_norm: 0.103213, critic: 11.253231, critic_grad: 198.014053, value: 7.425999, critic_norm: 3.827232, value_mean: 91.893570, advantage: 2.368281
101
actor: 6.560167, actor_grad: 13.603117, policy mean: 0.539121, policy: 6.505714, entropy: -0.049251, actor_norm: 0.103704, critic: 156.394470, critic_grad: 1050.169189, value: 152.227448, critic_norm: 4.167029, value_mean: 101.556335, advantage: 12.287661
101
actor: -4.137065, actor_grad: 5.920197, policy mean: 0.654194, policy: -4.196247, entropy: -0.042698, actor_norm: 0.101880, critic: 44.786301, critic_grad: 517.993835, value: 40.372925, critic_norm: 4.413374, value_mean: 109.007866, advantage: -6.304176
101
actor: -9.053506, actor_grad: 17.656919, policy mean: 0.644502, policy: -9.109163, entropy: -0.046065, actor_norm: 0.101722, critic: 203.810135, critic_grad: 1267.743774, value: 199.364227, critic_norm: 4.445910, value_mean: 114.092758, advantage: -14.054783
101
actor: 1.282456, actor_grad: 0.216229, policy mean: 0.540438, policy: 1.227784, entropy: -0.052955, actor_norm: 0.107627, critic: 11.290599, critic_grad: 181.061951, value: 7.361564, critic_norm: 3.929034, value_mean: 92.499680, advantage: 2.513878
101
actor: 12.482803, actor_grad: 24.372427, policy mean: 0.723710, policy: 12.410103, entropy: -0.039384, actor_norm: 0.112084, critic: 287.313416, critic_grad: 1817.856689, value: 283.188171, critic_norm: 4.125255, value_mean: 98.219086, advantage: 16.700291
101
actor: -0.740099, actor_grad: 1.050444, policy mean: 0.645554, policy: -0.807353, entropy: -0.040412, actor_norm: 0.107665, critic: 6.486665, critic_grad: 173.635574, value: 1.926955, critic_norm: 4.559710, value_mean: 121.738647, advantage: -1.358980
101
actor: -6.067526, actor_grad: 1.861813, policy mean: 0.451450, policy: -6.129772, entropy: -0.051218, actor_norm: 0.113463, critic: 186.385376, critic_grad: 1268.096069, value: 182.142578, critic_norm: 4.242794, value_mean: 105.963982, advantage: -13.453159
101
actor: 8.542212, actor_grad: 6.920823, policy mean: 0.971179, policy: 8.464096, entropy: -0.035041, actor_norm: 0.113156, critic: 82.008102, critic_grad: 638.977173, value: 78.116219, critic_norm: 3.891886, value_mean: 91.531525, advantage: 8.833903
101
actor: 13.091656, actor_grad: 36.082638, policy mean: 0.657825, policy: 13.026882, entropy: -0.048854, actor_norm: 0.113627, critic: 372.267731, critic_grad: 1952.944092, value: 368.059021, critic_norm: 4.208724, value_mean: 104.082695, advantage: 19.098314
101
actor: 0.253927, actor_grad: 1.147507, policy mean: 0.570325, policy: 0.184205, entropy: -0.044704, actor_norm: 0.114426, critic: 5.520391, critic_grad: 21.895573, value: 1.111181, critic_norm: 4.409209, value_mean: 112.198441, advantage: 0.174960
101
actor: -6.919580, actor_grad: 9.052594, policy mean: 0.462706, policy: -6.981237, entropy: -0.049800, actor_norm: 0.111458, critic: 235.337830, critic_grad: 1276.971069, value: 230.982300, critic_norm: 4.355527, value_mean: 108.440437, advantage: -15.182983
101
actor: 2.863271, actor_grad: 5.599317, policy mean: 0.504718, policy: 2.795362, entropy: -0.045787, actor_norm: 0.113695, critic: 34.731613, critic_grad: 408.539459, value: 30.801600, critic_norm: 3.930016, value_mean: 92.238403, advantage: 5.510860
101
actor: 7.441124, actor_grad: 0.900508, policy mean: 0.502183, policy: 7.374137, entropy: -0.047173, actor_norm: 0.114160, critic: 220.188141, critic_grad: 1082.336304, value: 216.186340, critic_norm: 4.001797, value_mean: 93.885880, advantage: 14.648026
101
actor: -1.371443, actor_grad: 0.425165, policy mean: 0.421035, policy: -1.430017, entropy: -0.051941, actor_norm: 0.110516, critic: 16.718624, critic_grad: 353.305847, value: 12.144416, critic_norm: 4.574208, value_mean: 117.267830, advantage: -3.419379
101
actor: -7.921583, actor_grad: 1.826889, policy mean: 0.445027, policy: -7.980374, entropy: -0.050794, actor_norm: 0.109584, critic: 335.962494, critic_grad: 1974.407593, value: 331.550751, critic_norm: 4.411737, value_mean: 116.202347, advantage: -18.072010
101
actor: 4.924052, actor_grad: 3.983976, policy mean: 0.426233, policy: 4.867955, entropy: -0.050863, actor_norm: 0.106959, critic: 134.224533, critic_grad: 1065.080811, value: 130.283295, critic_norm: 3.941236, value_mean: 94.518486, advantage: 11.411629
101
actor: 5.451238, actor_grad: 2.179937, policy mean: 0.446429, policy: 5.398326, entropy: -0.052044, actor_norm: 0.104956, critic: 143.104935, critic_grad: 1132.877808, value: 138.888550, critic_norm: 4.216384, value_mean: 104.915443, advantage: 11.719576
101
actor: -3.230910, actor_grad: 4.387723, policy mean: 0.701121, policy: -3.296403, entropy: -0.038888, actor_norm: 0.104381, critic: 26.024704, critic_grad: 394.417542, value: 21.626959, critic_norm: 4.397746, value_mean: 108.164352, advantage: -4.625898
101
actor: -6.496334, actor_grad: 1.377553, policy mean: 0.443353, policy: -6.556566, entropy: -0.047533, actor_norm: 0.107766, critic: 218.267365, critic_grad: 1730.148926, value: 214.226379, critic_norm: 4.040985, value_mean: 95.919769, advantage: -14.593707
101
actor: 3.787574, actor_grad: 2.717145, policy mean: 0.539137, policy: 3.728864, entropy: -0.048199, actor_norm: 0.106909, critic: 51.532299, critic_grad: 494.534729, value: 47.667534, critic_norm: 3.864766, value_mean: 89.060196, advantage: 6.898302
101
actor: 7.597653, actor_grad: 5.432928, policy mean: 0.467550, policy: 7.542371, entropy: -0.050690, actor_norm: 0.105973, critic: 267.394562, critic_grad: 1448.494629, value: 263.405945, critic_norm: 3.988617, value_mean: 89.164360, advantage: 16.210762
101
actor: -1.904567, actor_grad: 0.250429, policy mean: 0.558762, policy: -1.949882, entropy: -0.052090, actor_norm: 0.097404, critic: 16.564716, critic_grad: 281.608765, value: 12.196101, critic_norm: 4.368614, value_mean: 107.474182, advantage: -3.474034
101
actor: -7.608976, actor_grad: 3.223194, policy mean: 0.498745, policy: -7.649831, entropy: -0.054504, actor_norm: 0.095359, critic: 241.811142, critic_grad: 1246.677979, value: 237.391830, critic_norm: 4.419312, value_mean: 108.678574, advantage: -15.380902
101
actor: 0.020960, actor_grad: 0.056704, policy mean: 0.530376, policy: -0.010531, entropy: -0.056546, actor_norm: 0.088037, critic: 5.423877, critic_grad: 12.727340, value: 1.495381, critic_norm: 3.928495, value_mean: 89.429443, advantage: 0.058848
101
actor: 7.287874, actor_grad: 2.790491, policy mean: 0.509642, policy: 7.256617, entropy: -0.053436, actor_norm: 0.084694, critic: 207.489166, critic_grad: 1151.627686, value: 203.505417, critic_norm: 3.983745, value_mean: 92.480560, advantage: 14.218350
101
actor: 0.062284, actor_grad: 0.557528, policy mean: 0.556016, policy: 0.032145, entropy: -0.056340, actor_norm: 0.086479, critic: 5.748672, critic_grad: 24.650757, value: 1.450183, critic_norm: 4.298489, value_mean: 104.118225, advantage: -0.074852
101
actor: -6.540163, actor_grad: 8.689289, policy mean: 0.643934, policy: -6.573102, entropy: -0.049647, actor_norm: 0.082586, critic: 108.322433, critic_grad: 836.006775, value: 103.928345, critic_norm: 4.394087, value_mean: 108.237061, advantage: -10.193819
101
actor: -7.018411, actor_grad: 9.927964, policy mean: 0.585392, policy: -7.055317, entropy: -0.051769, actor_norm: 0.088676, critic: 148.278336, critic_grad: 994.783020, value: 143.956696, critic_norm: 4.321636, value_mean: 106.380989, advantage: -11.967778
101
actor: -4.337075, actor_grad: 5.460922, policy mean: 0.617948, policy: -4.374387, entropy: -0.049996, actor_norm: 0.087308, critic: 54.966213, critic_grad: 560.967834, value: 50.860706, critic_norm: 4.105506, value_mean: 98.705475, advantage: -7.054097
101
actor: 4.815507, actor_grad: 1.501631, policy mean: 0.562631, policy: 4.781914, entropy: -0.052891, actor_norm: 0.086484, critic: 75.953163, critic_grad: 639.049744, value: 72.061615, critic_norm: 3.891550, value_mean: 90.216286, advantage: 8.488575
101
actor: 9.308492, actor_grad: 2.174613, policy mean: 0.599497, policy: 9.271186, entropy: -0.050488, actor_norm: 0.087794, critic: 242.023987, critic_grad: 1326.280029, value: 238.088226, critic_norm: 3.935768, value_mean: 87.950401, advantage: 15.344555
101
actor: 4.684167, actor_grad: 4.950144, policy mean: 0.582123, policy: 4.649921, entropy: -0.053741, actor_norm: 0.087987, critic: 65.102898, critic_grad: 708.379761, value: 60.910744, critic_norm: 4.192151, value_mean: 103.951950, advantage: 7.640427
101
actor: -2.440623, actor_grad: 0.659508, policy mean: 0.541944, policy: -2.476201, entropy: -0.051881, actor_norm: 0.087459, critic: 28.743385, critic_grad: 392.841034, value: 24.268291, critic_norm: 4.475093, value_mean: 109.246445, advantage: -4.772733
101
actor: -7.249672, actor_grad: 2.077032, policy mean: 0.505199, policy: -7.280098, entropy: -0.058187, actor_norm: 0.088613, critic: 212.249863, critic_grad: 1298.022827, value: 207.798859, critic_norm: 4.451006, value_mean: 108.825768, advantage: -14.405671
101
actor: -3.799427, actor_grad: 3.773449, policy mean: 0.515682, policy: -3.829506, entropy: -0.057553, actor_norm: 0.087633, critic: 59.544842, critic_grad: 633.213684, value: 55.450157, critic_norm: 4.094684, value_mean: 100.373901, advantage: -7.342631
101
actor: 2.702193, actor_grad: 2.780923, policy mean: 0.573273, policy: 2.669321, entropy: -0.053831, actor_norm: 0.086703, critic: 25.764095, critic_grad: 343.951080, value: 21.870384, critic_norm: 3.893712, value_mean: 91.258621, advantage: 4.666218
101
actor: 8.810133, actor_grad: 4.745875, policy mean: 0.591580, policy: 8.777567, entropy: -0.052433, actor_norm: 0.084999, critic: 225.280701, critic_grad: 1158.001831, value: 221.372345, critic_norm: 3.908350, value_mean: 88.725227, advantage: 14.866556
101
actor: 4.228497, actor_grad: 4.846257, policy mean: 0.635561, policy: 4.190963, entropy: -0.048525, actor_norm: 0.086058, critic: 50.021816, critic_grad: 520.027649, value: 45.750782, critic_norm: 4.271034, value_mean: 101.673271, advantage: 6.711876
101
actor: -3.222669, actor_grad: 1.072433, policy mean: 0.489008, policy: -3.245929, entropy: -0.062375, actor_norm: 0.085634, critic: 48.737484, critic_grad: 577.884094, value: 44.270653, critic_norm: 4.466830, value_mean: 109.122017, advantage: -6.629348
101
actor: -7.655245, actor_grad: 2.122994, policy mean: 0.539596, policy: -7.683216, entropy: -0.057251, actor_norm: 0.085221, critic: 207.111923, critic_grad: 1239.900391, value: 202.629547, critic_norm: 4.482370, value_mean: 110.459290, advantage: -14.226692
101
actor: -7.688020, actor_grad: 10.683606, policy mean: 0.722555, policy: -7.741165, entropy: -0.037965, actor_norm: 0.091110, critic: 123.690941, critic_grad: 856.176147, value: 119.540054, critic_norm: 4.150886, value_mean: 99.943283, advantage: -10.866948
101
actor: 3.460280, actor_grad: 4.296030, policy mean: 0.540531, policy: 3.427052, entropy: -0.055692, actor_norm: 0.088920, critic: 45.438156, critic_grad: 590.548645, value: 41.480103, critic_norm: 3.958054, value_mean: 87.902267, advantage: 6.410839
101
actor: 6.601793, actor_grad: 1.028455, policy mean: 0.520140, policy: 6.570340, entropy: -0.056320, actor_norm: 0.087773, critic: 164.034012, critic_grad: 1060.453369, value: 160.154602, critic_norm: 3.879409, value_mean: 91.854904, advantage: 12.650559
101
actor: 4.230256, actor_grad: 1.564381, policy mean: 0.478137, policy: 4.203754, entropy: -0.059400, actor_norm: 0.085901, critic: 82.880585, critic_grad: 849.900696, value: 78.713608, critic_norm: 4.166977, value_mean: 100.473297, advantage: 8.843084
101
actor: -5.101110, actor_grad: 7.476906, policy mean: 0.750285, policy: -5.144957, entropy: -0.043710, actor_norm: 0.087556, critic: 51.648136, critic_grad: 752.516174, value: 47.155136, critic_norm: 4.493000, value_mean: 111.356346, advantage: -6.852562
101
actor: -6.355019, actor_grad: 1.877797, policy mean: 0.590800, policy: -6.389050, entropy: -0.053174, actor_norm: 0.087205, critic: 125.385033, critic_grad: 1024.683228, value: 121.010033, critic_norm: 4.375003, value_mean: 104.673096, advantage: -10.954364
101
actor: 0.308373, actor_grad: 0.173589, policy mean: 0.578447, policy: 0.275795, entropy: -0.053007, actor_norm: 0.085586, critic: 4.744542, critic_grad: 35.519623, value: 0.794376, critic_norm: 3.950166, value_mean: 93.502075, advantage: 0.462199
101
actor: 8.116276, actor_grad: 7.623156, policy mean: 0.590439, policy: 8.080511, entropy: -0.052070, actor_norm: 0.087834, critic: 191.660141, critic_grad: 979.970642, value: 187.862061, critic_norm: 3.798083, value_mean: 88.929192, advantage: 13.702386
101
actor: 4.851624, actor_grad: 4.368512, policy mean: 0.536926, policy: 4.819563, entropy: -0.054638, actor_norm: 0.086700, critic: 86.250786, critic_grad: 708.229431, value: 82.037582, critic_norm: 4.213206, value_mean: 99.122879, advantage: 8.998553
101
actor: -2.970703, actor_grad: 0.817896, policy mean: 0.549747, policy: -3.003930, entropy: -0.054420, actor_norm: 0.087647, critic: 36.108040, critic_grad: 651.511292, value: 31.554970, critic_norm: 4.553069, value_mean: 105.460342, advantage: -5.547818
101
actor: -6.639795, actor_grad: 6.185636, policy mean: 0.525485, policy: -6.672783, entropy: -0.054353, actor_norm: 0.087341, critic: 167.643906, critic_grad: 1084.920654, value: 163.257248, critic_norm: 4.386657, value_mean: 107.741692, advantage: -12.732214
101
actor: -0.374542, actor_grad: 0.259124, policy mean: 0.596947, policy: -0.410866, entropy: -0.051188, actor_norm: 0.087513, critic: 4.704602, critic_grad: 61.580074, value: 0.773013, critic_norm: 3.931589, value_mean: 90.911011, advantage: -0.746613
101
actor: 8.724689, actor_grad: 8.347935, policy mean: 0.594461, policy: 8.693697, entropy: -0.052586, actor_norm: 0.083578, critic: 216.620163, critic_grad: 1049.434204, value: 212.749649, critic_norm: 3.870513, value_mean: 89.561646, advantage: 14.561386
101
actor: 1.546179, actor_grad: 2.052557, policy mean: 0.616994, policy: 1.510192, entropy: -0.050450, actor_norm: 0.086437, critic: 12.156439, critic_grad: 211.598129, value: 7.791411, critic_norm: 4.365028, value_mean: 106.317780, advantage: 2.618745
101
actor: -6.639024, actor_grad: 4.364289, policy mean: 0.498483, policy: -6.666399, entropy: -0.056850, actor_norm: 0.084225, critic: 185.259171, critic_grad: 1188.344482, value: 180.741119, critic_norm: 4.518049, value_mean: 114.181885, advantage: -13.437643
101
actor: -8.846843, actor_grad: 17.917212, policy mean: 0.578203, policy: -8.879265, entropy: -0.053262, actor_norm: 0.085685, critic: 236.067688, critic_grad: 2421.125000, value: 231.666595, critic_norm: 4.401086, value_mean: 128.164398, advantage: -15.118011
101
actor: -3.356335, actor_grad: 4.815550, policy mean: 0.683379, policy: -3.405044, entropy: -0.040739, actor_norm: 0.089448, critic: 27.179424, critic_grad: 360.494781, value: 23.081356, critic_norm: 4.098067, value_mean: 95.500717, advantage: -4.753139
101
actor: 2.125510, actor_grad: 1.747921, policy mean: 0.552582, policy: 2.085947, entropy: -0.051604, actor_norm: 0.091167, critic: 19.310312, critic_grad: 332.497589, value: 15.203797, critic_norm: 4.106515, value_mean: 90.118362, advantage: 3.863921
101
actor: -0.017248, actor_grad: 0.343685, policy mean: 0.523783, policy: -0.058353, entropy: -0.053592, actor_norm: 0.094697, critic: 5.572740, critic_grad: 16.876545, value: 0.910584, critic_norm: 4.662156, value_mean: 124.362946, advantage: -0.058401
101
actor: 8.234785, actor_grad: 9.305798, policy mean: 0.512418, policy: 8.188265, entropy: -0.051820, actor_norm: 0.098341, critic: 256.375336, critic_grad: 1771.987671, value: 252.083221, critic_norm: 4.292118, value_mean: 115.652878, advantage: 15.761414
101
actor: -6.419343, actor_grad: 5.096465, policy mean: 0.471016, policy: -6.463492, entropy: -0.054602, actor_norm: 0.098751, critic: 197.066376, critic_grad: 1744.720459, value: 192.177979, critic_norm: 4.888402, value_mean: 138.293411, advantage: -13.781588
101
actor: -2.379143, actor_grad: 2.535501, policy mean: 0.582412, policy: -2.426444, entropy: -0.051151, actor_norm: 0.098453, critic: 22.036373, critic_grad: 505.515503, value: 17.728331, critic_norm: 4.308043, value_mean: 114.712029, advantage: -4.148654
101
actor: 6.639254, actor_grad: 9.478219, policy mean: 0.551882, policy: 6.586078, entropy: -0.049110, actor_norm: 0.102286, critic: 148.241364, critic_grad: 1094.086182, value: 144.368164, critic_norm: 3.873194, value_mean: 92.192734, advantage: 11.935762
101
actor: -0.915905, actor_grad: 1.226729, policy mean: 0.510871, policy: -0.966694, entropy: -0.049127, actor_norm: 0.099917, critic: 8.554759, critic_grad: 156.373230, value: 4.067863, critic_norm: 4.486896, value_mean: 109.967010, advantage: -1.791814
101
actor: -6.173982, actor_grad: 3.813107, policy mean: 0.501438, policy: -6.231210, entropy: -0.050103, actor_norm: 0.107331, critic: 160.163437, critic_grad: 1269.426392, value: 155.777069, critic_norm: 4.386368, value_mean: 110.251663, advantage: -12.465410
101
actor: -1.327471, actor_grad: 1.629109, policy mean: 0.437423, policy: -1.386593, entropy: -0.050026, actor_norm: 0.109148, critic: 14.976528, critic_grad: 240.525513, value: 10.998278, critic_norm: 3.978250, value_mean: 96.136490, advantage: -3.218344
101
actor: 6.206375, actor_grad: 2.920513, policy mean: 0.496515, policy: 6.144307, entropy: -0.044877, actor_norm: 0.106945, critic: 159.623367, critic_grad: 1006.903198, value: 155.789398, critic_norm: 3.833973, value_mean: 88.009697, advantage: 12.439155
101
actor: 3.540111, actor_grad: 1.178320, policy mean: 0.634383, policy: 3.482238, entropy: -0.045907, actor_norm: 0.103780, critic: 43.268909, critic_grad: 524.894653, value: 38.956123, critic_norm: 4.312787, value_mean: 105.219666, advantage: 5.927264
101
actor: -8.987603, actor_grad: 3.283859, policy mean: 0.564945, policy: -9.031938, entropy: -0.052147, actor_norm: 0.096482, critic: 259.297852, critic_grad: 1971.615967, value: 254.786423, critic_norm: 4.511442, value_mean: 115.465897, advantage: -15.946957
101
actor: -6.870729, actor_grad: 9.657613, policy mean: 0.611052, policy: -6.919692, entropy: -0.046936, actor_norm: 0.095898, critic: 137.912903, critic_grad: 982.563965, value: 133.748016, critic_norm: 4.164891, value_mean: 102.687531, advantage: -11.419703
101
actor: 5.409477, actor_grad: 1.076878, policy mean: 0.472311, policy: 5.365978, entropy: -0.053272, actor_norm: 0.096771, critic: 133.355209, critic_grad: 857.750977, value: 129.485107, critic_norm: 3.870109, value_mean: 91.804642, advantage: 11.368860
101
actor: 4.583668, actor_grad: 7.411083, policy mean: 0.563925, policy: 4.534656, entropy: -0.047640, actor_norm: 0.096652, critic: 68.375710, critic_grad: 894.626587, value: 63.903446, critic_norm: 4.472262, value_mean: 119.306023, advantage: 7.889894
101
actor: -6.550009, actor_grad: 5.085865, policy mean: 0.476795, policy: -6.593674, entropy: -0.054964, actor_norm: 0.098628, critic: 196.292801, critic_grad: 2168.239258, value: 191.712311, critic_norm: 4.580495, value_mean: 127.089996, advantage: -13.842281
101
actor: -1.263760, actor_grad: 1.915475, policy mean: 0.593060, policy: -1.311279, entropy: -0.049216, actor_norm: 0.096736, critic: 8.763613, critic_grad: 228.350204, value: 4.747972, critic_norm: 4.015640, value_mean: 99.534668, advantage: -2.039557
101
actor: 7.121590, actor_grad: 8.419964, policy mean: 0.533729, policy: 7.074874, entropy: -0.051554, actor_norm: 0.098269, critic: 185.986206, critic_grad: 1119.277466, value: 181.877991, critic_norm: 4.108220, value_mean: 99.490051, advantage: 13.418069
101
actor: -1.048300, actor_grad: 1.608735, policy mean: 0.653166, policy: -1.100425, entropy: -0.045884, actor_norm: 0.098009, critic: 10.047010, critic_grad: 163.744461, value: 5.659264, critic_norm: 4.387746, value_mean: 109.532501, advantage: -1.895837
101
actor: -8.514194, actor_grad: 12.951208, policy mean: 0.638146, policy: -8.566418, entropy: -0.047473, actor_norm: 0.099696, critic: 182.543945, critic_grad: 1356.199585, value: 178.244293, critic_norm: 4.299660, value_mean: 108.106270, advantage: -13.339013
101
actor: 0.743483, actor_grad: 1.772917, policy mean: 0.708847, policy: 0.679334, entropy: -0.036326, actor_norm: 0.100475, critic: 5.705483, critic_grad: 76.703789, value: 1.822711, critic_norm: 3.882772, value_mean: 88.604553, advantage: 0.983108
101
actor: 6.804555, actor_grad: 1.232102, policy mean: 0.489694, policy: 6.755740, entropy: -0.050807, actor_norm: 0.099622, critic: 192.488541, critic_grad: 1005.086426, value: 188.539886, critic_norm: 3.948661, value_mean: 92.580780, advantage: 13.691758
101
actor: -2.328474, actor_grad: 2.183736, policy mean: 0.471003, policy: -2.373665, entropy: -0.052902, actor_norm: 0.098094, critic: 30.273291, critic_grad: 478.125885, value: 25.789932, critic_norm: 4.483358, value_mean: 108.859116, advantage: -5.034252
101
actor: -5.814687, actor_grad: 2.578817, policy mean: 0.492662, policy: -5.855836, entropy: -0.054107, actor_norm: 0.095257, critic: 145.280869, critic_grad: 1012.703247, value: 141.026733, critic_norm: 4.254129, value_mean: 103.961281, advantage: -11.848673
101
actor: 1.695568, actor_grad: 0.312440, policy mean: 0.498367, policy: 1.647749, entropy: -0.045800, actor_norm: 0.093620, critic: 14.373771, critic_grad: 230.114594, value: 10.504490, critic_norm: 3.869281, value_mean: 91.610107, advantage: 3.187478
101
actor: 7.212360, actor_grad: 3.121656, policy mean: 0.532927, policy: 7.166219, entropy: -0.051037, actor_norm: 0.097178, critic: 182.479263, critic_grad: 981.434204, value: 178.560898, critic_norm: 3.918365, value_mean: 93.127312, advantage: 13.332134
101
actor: 4.321025, actor_grad: 2.503692, policy mean: 0.452536, policy: 4.283690, entropy: -0.056917, actor_norm: 0.094251, critic: 95.226204, critic_grad: 815.860229, value: 91.086143, critic_norm: 4.140059, value_mean: 95.375854, advantage: 9.467447
101
actor: -6.714415, actor_grad: 3.909070, policy mean: 0.529597, policy: -6.752311, entropy: -0.052906, actor_norm: 0.090802, critic: 167.413925, critic_grad: 1099.902222, value: 162.911163, critic_norm: 4.502755, value_mean: 112.537598, advantage: -12.761728
101
actor: -6.092876, actor_grad: 3.057330, policy mean: 0.551389, policy: -6.128622, entropy: -0.053396, actor_norm: 0.089142, critic: 129.049469, critic_grad: 858.601440, value: 124.842621, critic_norm: 4.206843, value_mean: 101.821472, advantage: -11.068644
101
actor: -0.484073, actor_grad: 0.234176, policy mean: 0.507645, policy: -0.508429, entropy: -0.058433, actor_norm: 0.082790, critic: 5.985194, critic_grad: 92.526550, value: 1.875990, critic_norm: 4.109204, value_mean: 101.772507, advantage: -1.053222
79
actor: -120.364220, actor_grad: 181.762650, policy mean: 1.276783, policy: -120.427986, entropy: -0.022185, actor_norm: 0.085954, critic: 9222.289062, critic_grad: 9226.962891, value: 9218.185547, critic_norm: 4.103514, value_mean: 103.542725, advantage: -95.904213
101
actor: -3.306097, actor_grad: 1.892925, policy mean: 0.560131, policy: -3.335434, entropy: -0.054698, actor_norm: 0.084035, critic: 39.360382, critic_grad: 533.800598, value: 34.984154, critic_norm: 4.376229, value_mean: 110.342789, advantage: -5.882812
101
actor: -6.652530, actor_grad: 3.508713, policy mean: 0.552951, policy: -6.679859, entropy: -0.055495, actor_norm: 0.082825, critic: 149.095963, critic_grad: 1033.098022, value: 144.699341, critic_norm: 4.396626, value_mean: 107.512192, advantage: -11.993689
101
actor: -5.016367, actor_grad: 1.729771, policy mean: 0.678048, policy: -5.050841, entropy: -0.048696, actor_norm: 0.083169, critic: 60.941921, critic_grad: 605.916382, value: 56.662144, critic_norm: 4.279776, value_mean: 105.419678, advantage: -7.412635
101
actor: -6.179287, actor_grad: 1.795624, policy mean: 0.561097, policy: -6.204448, entropy: -0.055845, actor_norm: 0.081006, critic: 126.955177, critic_grad: 969.227722, value: 122.732986, critic_norm: 4.222189, value_mean: 102.651764, advantage: -11.043834
101
actor: 1.731957, actor_grad: 1.100343, policy mean: 0.631138, policy: 1.701589, entropy: -0.051680, actor_norm: 0.082048, critic: 11.875476, critic_grad: 197.139282, value: 7.971085, critic_norm: 3.904392, value_mean: 91.170685, advantage: 2.750906
101
actor: 6.602998, actor_grad: 3.873396, policy mean: 0.568379, policy: 6.574211, entropy: -0.055875, actor_norm: 0.084662, critic: 138.220703, critic_grad: 920.235779, value: 134.258179, critic_norm: 3.962518, value_mean: 92.959969, advantage: 11.583394
101
actor: 6.447961, actor_grad: 3.741116, policy mean: 0.569796, policy: 6.412466, entropy: -0.051661, actor_norm: 0.087156, critic: 133.151901, critic_grad: 845.663757, value: 129.171738, critic_norm: 3.980170, value_mean: 95.821915, advantage: 11.347894
101
actor: 1.241299, actor_grad: 1.820777, policy mean: 0.588646, policy: 1.196071, entropy: -0.040919, actor_norm: 0.086147, critic: 9.101881, critic_grad: 148.376465, value: 4.904092, critic_norm: 4.197790, value_mean: 103.428398, advantage: 1.830113
101
actor: -4.245802, actor_grad: 2.951157, policy mean: 0.573358, policy: -4.277015, entropy: -0.053623, actor_norm: 0.084836, critic: 60.559540, critic_grad: 711.007141, value: 56.165596, critic_norm: 4.393945, value_mean: 110.755661, advantage: -7.487122
101
actor: -8.167315, actor_grad: 5.547272, policy mean: 0.553927, policy: -8.196602, entropy: -0.055744, actor_norm: 0.085031, critic: 223.802063, critic_grad: 1345.721069, value: 219.491806, critic_norm: 4.310253, value_mean: 108.145462, advantage: -14.758327
101
actor: 2.849302, actor_grad: 0.453914, policy mean: 0.521010, policy: 2.828832, entropy: -0.060036, actor_norm: 0.080507, critic: 34.647350, critic_grad: 425.661774, value: 30.719580, critic_norm: 3.927770, value_mean: 93.011169, advantage: 5.455627
101
actor: 5.952144, actor_grad: 2.234572, policy mean: 0.644505, policy: 5.925881, entropy: -0.051910, actor_norm: 0.078173, critic: 90.488213, critic_grad: 711.803589, value: 86.551933, critic_norm: 3.936279, value_mean: 95.898659, advantage: 9.273739
101
actor: 3.921402, actor_grad: 4.732212, policy mean: 0.778124, policy: 3.884732, entropy: -0.039925, actor_norm: 0.076595, critic: 33.335445, critic_grad: 443.844391, value: 29.143837, critic_norm: 4.191608, value_mean: 103.615265, advantage: 5.101177
101
actor: -2.870616, actor_grad: 0.805917, policy mean: 0.597392, policy: -2.890685, entropy: -0.056454, actor_norm: 0.076523, critic: 28.526859, critic_grad: 458.472839, value: 24.107914, critic_norm: 4.418945, value_mean: 115.187363, advantage: -4.885604
101
actor: 0.107676, actor_grad: 0.449996, policy mean: 0.527790, policy: 0.087002, entropy: -0.061036, actor_norm: 0.081711, critic: 4.254384, critic_grad: 14.631102, value: 0.284297, critic_norm: 3.970087, value_mean: 98.626205, advantage: 0.132412
101
actor: 6.386642, actor_grad: 0.827709, policy mean: 0.544345, policy: 6.368337, entropy: -0.060467, actor_norm: 0.078773, critic: 137.930069, critic_grad: 859.855591, value: 133.962845, critic_norm: 3.967224, value_mean: 93.922775, advantage: 11.560709
101
actor: 4.166824, actor_grad: 1.811447, policy mean: 0.537655, policy: 4.147840, entropy: -0.058742, actor_norm: 0.077726, critic: 63.482544, critic_grad: 592.334900, value: 59.379997, critic_norm: 4.102548, value_mean: 100.264908, advantage: 7.646401
101
actor: -2.247309, actor_grad: 0.478203, policy mean: 0.558469, policy: -2.266057, entropy: -0.057163, actor_norm: 0.075911, critic: 21.385994, critic_grad: 349.830566, value: 17.080364, critic_norm: 4.305629, value_mean: 107.955048, advantage: -4.037138
101
actor: -7.855382, actor_grad: 4.149764, policy mean: 0.648167, policy: -7.881938, entropy: -0.050018, actor_norm: 0.076574, critic: 156.504486, critic_grad: 1066.915527, value: 152.156799, critic_norm: 4.347682, value_mean: 107.676247, advantage: -12.305288
101
actor: -3.419857, actor_grad: 1.322319, policy mean: 0.524214, policy: -3.439482, entropy: -0.059013, actor_norm: 0.078638, critic: 47.888145, critic_grad: 503.493347, value: 43.729076, critic_norm: 4.159070, value_mean: 101.047249, advantage: -6.532228
101
actor: 2.616421, actor_grad: 2.474282, policy mean: 0.628679, policy: 2.582370, entropy: -0.049411, actor_norm: 0.083463, critic: 20.232244, critic_grad: 296.359344, value: 16.392307, critic_norm: 3.839937, value_mean: 91.032440, advantage: 3.998462
101
actor: 6.100195, actor_grad: 6.045425, policy mean: 0.606339, policy: 6.073639, entropy: -0.052849, actor_norm: 0.079405, critic: 103.987450, critic_grad: 747.750732, value: 100.086571, critic_norm: 3.900878, value_mean: 92.384216, advantage: 9.999993
101
actor: 7.136070, actor_grad: 2.680998, policy mean: 0.648543, policy: 7.110426, entropy: -0.053008, actor_norm: 0.078651, critic: 127.342033, critic_grad: 842.894104, value: 123.278503, critic_norm: 4.063529, value_mean: 95.899452, advantage: 11.022350
101
actor: -3.244932, actor_grad: 3.100847, policy mean: 0.581283, policy: -3.271218, entropy: -0.054405, actor_norm: 0.080692, critic: 36.704052, critic_grad: 712.781677, value: 32.380890, critic_norm: 4.323161, value_mean: 111.082779, advantage: -5.650537
101
actor: -5.555566, actor_grad: 5.233744, policy mean: 0.537984, policy: -5.581138, entropy: -0.055594, actor_norm: 0.081166, critic: 112.634811, critic_grad: 863.314453, value: 108.259804, critic_norm: 4.375007, value_mean: 109.693138, advantage: -10.397142
101
actor: -3.930412, actor_grad: 3.822961, policy mean: 0.578792, policy: -3.964339, entropy: -0.051728, actor_norm: 0.085655, critic: 50.199062, critic_grad: 520.958008, value: 46.141193, critic_norm: 4.057868, value_mean: 97.657883, advantage: -6.738750
101
actor: 4.619072, actor_grad: 2.358025, policy mean: 0.515046, policy: 4.591157, entropy: -0.058301, actor_norm: 0.086215, critic: 83.871933, critic_grad: 704.189514, value: 79.988403, critic_norm: 3.883532, value_mean: 92.790421, advantage: 8.921885
101
actor: 6.149201, actor_grad: 6.163473, policy mean: 0.527109, policy: 6.119875, entropy: -0.055303, actor_norm: 0.084629, critic: 142.415222, critic_grad: 928.214478, value: 138.438782, critic_norm: 3.976444, value_mean: 96.080765, advantage: 11.743796
101
actor: -0.961800, actor_grad: 1.549078, policy mean: 0.635224, policy: -1.001265, entropy: -0.048314, actor_norm: 0.087779, critic: 7.080356, critic_grad: 115.935280, value: 2.840362, critic_norm: 4.239994, value_mean: 101.802917, advantage: -1.393915
101
actor: -6.052356, actor_grad: 0.741905, policy mean: 0.527247, policy: -6.085562, entropy: -0.053865, actor_norm: 0.087071, critic: 139.845764, critic_grad: 1323.676880, value: 135.357849, critic_norm: 4.487911, value_mean: 125.906174, advantage: -11.554676
101
actor: -3.130177, actor_grad: 2.008400, policy mean: 0.682845, policy: -3.172441, entropy: -0.046851, actor_norm: 0.089115, critic: 36.629585, critic_grad: 612.646729, value: 32.606815, critic_norm: 4.022772, value_mean: 111.138016, advantage: -5.202010
101
actor: -0.256215, actor_grad: 0.471913, policy mean: 0.510886, policy: -0.295019, entropy: -0.053289, actor_norm: 0.092092, critic: 5.265531, critic_grad: 67.733780, value: 0.868759, critic_norm: 4.396771, value_mean: 121.374474, advantage: -0.603644
101
actor: 6.182135, actor_grad: 4.917556, policy mean: 0.604015, policy: 6.134833, entropy: -0.048846, actor_norm: 0.096147, critic: 112.782021, critic_grad: 810.947693, value: 108.591896, critic_norm: 4.190126, value_mean: 102.740303, advantage: 10.332535
101
actor: -5.404311, actor_grad: 0.427660, policy mean: 0.533892, policy: -5.453024, entropy: -0.050245, actor_norm: 0.098958, critic: 108.139252, critic_grad: 897.446716, value: 103.597137, critic_norm: 4.542111, value_mean: 117.448685, advantage: -10.164781
101
actor: -3.357028, actor_grad: 5.661633, policy mean: 0.555178, policy: -3.414228, entropy: -0.044993, actor_norm: 0.102193, critic: 49.174969, critic_grad: 587.145264, value: 44.907257, critic_norm: 4.267711, value_mean: 113.020126, advantage: -6.356646
101
actor: 3.301034, actor_grad: 1.396741, policy mean: 0.435446, policy: 3.245516, entropy: -0.053614, actor_norm: 0.109133, critic: 59.543739, critic_grad: 737.033752, value: 55.349876, critic_norm: 4.193864, value_mean: 111.577103, advantage: 7.405924
50
actor: -80.461548, actor_grad: 186.670990, policy mean: 0.732524, policy: -80.541519, entropy: -0.027927, actor_norm: 0.107897, critic: 12019.376953, critic_grad: 10187.307617, value: 12015.005859, critic_norm: 4.370626, value_mean: 118.410957, advantage: -109.428375
90
actor: -64.843925, actor_grad: 245.399811, policy mean: 0.758024, policy: -64.918961, entropy: -0.031712, actor_norm: 0.106750, critic: 7399.936035, critic_grad: 10084.515625, value: 7395.965820, critic_norm: 3.970083, value_mean: 98.133110, advantage: -85.693619
101
actor: 8.184918, actor_grad: 2.194580, policy mean: 0.445521, policy: 8.127252, entropy: -0.050989, actor_norm: 0.108656, critic: 339.050964, critic_grad: 1877.836426, value: 335.045898, critic_norm: 4.005056, value_mean: 88.817757, advantage: 18.250303
101
actor: 3.254532, actor_grad: 1.587013, policy mean: 0.442261, policy: 3.203875, entropy: -0.052560, actor_norm: 0.103218, critic: 59.148685, critic_grad: 583.984131, value: 55.272961, critic_norm: 3.875725, value_mean: 84.608978, advantage: 7.202052
101
actor: 3.953667, actor_grad: 3.222378, policy mean: 0.558470, policy: 3.899580, entropy: -0.047395, actor_norm: 0.101481, critic: 53.528370, critic_grad: 509.777008, value: 49.717888, critic_norm: 3.810481, value_mean: 67.694107, advantage: 7.007362
101
actor: -2.048556, actor_grad: 0.309572, policy mean: 0.564847, policy: -2.095283, entropy: -0.051029, actor_norm: 0.097756, critic: 19.441816, critic_grad: 302.119171, value: 15.155878, critic_norm: 4.285938, value_mean: 88.333649, advantage: -3.784629
101
actor: -2.982598, actor_grad: 2.168932, policy mean: 0.566418, policy: -3.030691, entropy: -0.052536, actor_norm: 0.100629, critic: 33.182842, critic_grad: 462.120178, value: 28.201796, critic_norm: 4.981047, value_mean: 111.672760, advantage: -5.274600
101
actor: 2.631459, actor_grad: 0.317700, policy mean: 0.508920, policy: 2.581858, entropy: -0.052331, actor_norm: 0.101933, critic: 31.860649, critic_grad: 412.959625, value: 27.643002, critic_norm: 4.217648, value_mean: 106.466515, advantage: 5.042240
101
actor: 1.475663, actor_grad: 3.301326, policy mean: 0.580559, policy: 1.414751, entropy: -0.044374, actor_norm: 0.105286, critic: 15.473057, critic_grad: 221.962448, value: 10.601660, critic_norm: 4.871397, value_mean: 137.459763, advantage: 2.131985
101
actor: 4.287059, actor_grad: 3.487054, policy mean: 0.625547, policy: 4.222205, entropy: -0.044285, actor_norm: 0.109139, critic: 45.564426, critic_grad: 796.397278, value: 40.678505, critic_norm: 4.885920, value_mean: 148.538971, advantage: 6.232840
101
actor: -8.389894, actor_grad: 11.798049, policy mean: 0.462854, policy: -8.455478, entropy: -0.048888, actor_norm: 0.114472, critic: 377.147736, critic_grad: 2779.754395, value: 370.769226, critic_norm: 6.378520, value_mean: 191.039307, advantage: -18.920340
101
actor: -8.426722, actor_grad: 12.458579, policy mean: 0.503655, policy: -8.512437, entropy: -0.042920, actor_norm: 0.128635, critic: 292.114929, critic_grad: 2903.465332, value: 285.891327, critic_norm: 6.223614, value_mean: 204.575974, advantage: -16.861843
101
actor: 4.427277, actor_grad: 11.973546, policy mean: 0.408838, policy: 4.326197, entropy: -0.044524, actor_norm: 0.145603, critic: 114.078751, critic_grad: 1660.593140, value: 108.447372, critic_norm: 5.631381, value_mean: 176.794388, advantage: 10.282063
36
actor: -631.006226, actor_grad: 630.537476, policy mean: 3.611772, policy: -631.167175, entropy: -0.000379, actor_norm: 0.161299, critic: 31102.523438, critic_grad: 23044.451172, value: 31096.359375, critic_norm: 6.163387, value_mean: 178.782257, advantage: -176.307129
101
actor: 1.933788, actor_grad: 0.709676, policy mean: 0.327204, policy: 1.807515, entropy: -0.040776, actor_norm: 0.167049, critic: 36.697842, critic_grad: 983.437073, value: 30.819302, critic_norm: 5.878541, value_mean: 188.501999, advantage: 5.064208
101
actor: 10.100734, actor_grad: 20.079721, policy mean: 0.559170, policy: 9.969925, entropy: -0.033170, actor_norm: 0.163979, critic: 329.881439, critic_grad: 2911.290771, value: 324.780396, critic_norm: 5.101030, value_mean: 138.523514, advantage: 17.283407
101
actor: 5.673885, actor_grad: 4.797123, policy mean: 0.816653, policy: 5.540576, entropy: -0.032221, actor_norm: 0.165530, critic: 48.538597, critic_grad: 719.267700, value: 44.079971, critic_norm: 4.458627, value_mean: 91.650391, advantage: 6.174153
101
actor: 0.230467, actor_grad: 1.568475, policy mean: 0.413133, policy: 0.108637, entropy: -0.038354, actor_norm: 0.160184, critic: 6.751460, critic_grad: 24.282703, value: 2.492155, critic_norm: 4.259305, value_mean: 68.213570, advantage: 0.068173
101
actor: 0.256041, actor_grad: 3.741919, policy mean: 0.500051, policy: 0.139908, entropy: -0.035910, actor_norm: 0.152043, critic: 14.914793, critic_grad: 131.100647, value: 10.719672, critic_norm: 4.195120, value_mean: 59.212097, advantage: 1.200776
101
actor: -1.980638, actor_grad: 3.876863, policy mean: 0.463138, policy: -2.081600, entropy: -0.040624, actor_norm: 0.141586, critic: 25.643398, critic_grad: 335.475739, value: 21.331743, critic_norm: 4.311655, value_mean: 72.951645, advantage: -4.568552
101
actor: -11.654058, actor_grad: 11.018508, policy mean: 0.570037, policy: -11.745459, entropy: -0.042298, actor_norm: 0.133699, critic: 451.104980, critic_grad: 1956.717163, value: 446.095276, critic_norm: 5.009708, value_mean: 126.993309, advantage: -21.093109
101
actor: -0.518906, actor_grad: 5.695510, policy mean: 0.948235, policy: -0.629764, entropy: -0.024044, actor_norm: 0.134901, critic: 29.394054, critic_grad: 191.346512, value: 22.873440, critic_norm: 6.520614, value_mean: 191.032440, advantage: -0.702948
101
actor: 11.017895, actor_grad: 4.378246, policy mean: 0.538318, policy: 10.919458, entropy: -0.039009, actor_norm: 0.137445, critic: 451.199738, critic_grad: 2623.071533, value: 445.565247, critic_norm: 5.634494, value_mean: 160.469559, advantage: 20.815666
101
actor: -2.885607, actor_grad: 2.663485, policy mean: 0.531316, policy: -2.981538, entropy: -0.042856, actor_norm: 0.138787, critic: 36.606789, critic_grad: 667.241028, value: 30.688248, critic_norm: 5.918542, value_mean: 165.859131, advantage: -5.515280
101
actor: -10.300631, actor_grad: 23.322741, policy mean: 0.552143, policy: -10.405264, entropy: -0.041619, actor_norm: 0.146252, critic: 361.645203, critic_grad: 3669.626221, value: 354.166382, critic_norm: 7.478835, value_mean: 250.580841, advantage: -18.801876
101
actor: 27.980753, actor_grad: 106.826683, policy mean: 0.712556, policy: 27.861782, entropy: -0.032810, actor_norm: 0.151781, critic: 1661.981201, critic_grad: 8792.415039, value: 1654.682129, critic_norm: 7.299030, value_mean: 249.094177, advantage: 40.090164
101
actor: 9.706056, actor_grad: 8.869999, policy mean: 0.685585, policy: 9.582675, entropy: -0.032416, actor_norm: 0.155797, critic: 184.234375, critic_grad: 1420.418091, value: 179.081268, critic_norm: 5.153109, value_mean: 135.892120, advantage: 13.359072
101
actor: 1.313498, actor_grad: 6.554160, policy mean: 0.474224, policy: 1.196726, entropy: -0.040258, actor_norm: 0.157030, critic: 14.819337, critic_grad: 300.764526, value: 9.061409, critic_norm: 5.757928, value_mean: 175.265167, advantage: 1.525308
28
actor: -107.839134, actor_grad: 196.989532, policy mean: 0.905160, policy: -107.973473, entropy: -0.024322, actor_norm: 0.158663, critic: 15362.918945, critic_grad: 12089.089844, value: 15357.563477, critic_norm: 5.355795, value_mean: 136.610672, advantage: -123.334923
46
actor: -137.714355, actor_grad: 230.038193, policy mean: 1.111795, policy: -137.852127, entropy: -0.022557, actor_norm: 0.160329, critic: 15021.193359, critic_grad: 12440.209961, value: 15016.165039, critic_norm: 5.028207, value_mean: 129.582855, advantage: -122.398315
101
actor: -1.787781, actor_grad: 1.516499, policy mean: 0.455407, policy: -1.907408, entropy: -0.036997, actor_norm: 0.156624, critic: 22.090092, critic_grad: 361.557983, value: 17.628799, critic_norm: 4.461293, value_mean: 91.893265, advantage: -4.185266
101
actor: 1.549801, actor_grad: 1.600955, policy mean: 0.334397, policy: 1.437426, entropy: -0.039235, actor_norm: 0.151610, critic: 24.116497, critic_grad: 391.436432, value: 19.744835, critic_norm: 4.371663, value_mean: 54.286011, advantage: 4.046943
101
actor: -2.234333, actor_grad: 7.600399, policy mean: 0.680071, policy: -2.344362, entropy: -0.034306, actor_norm: 0.144335, critic: 16.095272, critic_grad: 243.041809, value: 11.579626, critic_norm: 4.515646, value_mean: 53.685383, advantage: -3.294786
101
actor: -8.192790, actor_grad: 12.878322, policy mean: 0.602360, policy: -8.289550, entropy: -0.040390, actor_norm: 0.137150, critic: 193.233841, critic_grad: 1309.600952, value: 187.901978, critic_norm: 5.331867, value_mean: 128.342834, advantage: -13.700433
101
actor: -8.198424, actor_grad: 13.833112, policy mean: 0.583961, policy: -8.299506, entropy: -0.040099, actor_norm: 0.141181, critic: 204.173859, critic_grad: 1452.856323, value: 198.829391, critic_norm: 5.344469, value_mean: 138.643768, advantage: -14.064367
101
actor: -118.204117, actor_grad: 988.384399, policy mean: 0.579295, policy: -118.319748, entropy: -0.029094, actor_norm: 0.144722, critic: 41656.285156, critic_grad: 48435.679688, value: 41650.500000, critic_norm: 5.786755, value_mean: 207.032135, advantage: -204.071838
101
actor: 0.855162, actor_grad: 4.620831, policy mean: 0.487640, policy: 0.749210, entropy: -0.041567, actor_norm: 0.147520, critic: 26.551643, critic_grad: 454.226685, value: 20.751696, critic_norm: 5.799949, value_mean: 188.730194, advantage: 2.091986
101
actor: 3.935343, actor_grad: 1.226213, policy mean: 0.466846, policy: 3.817181, entropy: -0.037283, actor_norm: 0.155444, critic: 80.571228, critic_grad: 1396.691650, value: 75.588058, critic_norm: 4.983166, value_mean: 140.947968, advantage: 8.237653
101
actor: -5.766751, actor_grad: 4.018558, policy mean: 0.518278, policy: -5.882101, entropy: -0.038040, actor_norm: 0.153390, critic: 136.229568, critic_grad: 1187.245605, value: 131.519653, critic_norm: 4.709917, value_mean: 113.783951, advantage: -11.428030
101
actor: 1.873239, actor_grad: 11.459909, policy mean: 0.473794, policy: 1.760253, entropy: -0.037490, actor_norm: 0.150476, critic: 19.856436, critic_grad: 324.446686, value: 15.379631, critic_norm: 4.476805, value_mean: 66.822723, advantage: 2.911397
101
actor: -0.116293, actor_grad: 0.738980, policy mean: 0.427568, policy: -0.215218, entropy: -0.042873, actor_norm: 0.141798, critic: 5.567424, critic_grad: 53.454121, value: 0.899662, critic_norm: 4.667762, value_mean: 43.493469, advantage: -0.609855
101
actor: -8.475366, actor_grad: 16.455042, policy mean: 0.525744, policy: -8.566484, entropy: -0.045134, actor_norm: 0.136253, critic: 267.771942, critic_grad: 1402.054321, value: 262.761475, critic_norm: 5.010481, value_mean: 96.488228, advantage: -16.190859
101
actor: -7.238238, actor_grad: 19.248295, policy mean: 0.631546, policy: -7.336584, entropy: -0.038062, actor_norm: 0.136408, critic: 148.255554, critic_grad: 1415.606812, value: 142.632599, critic_norm: 5.622961, value_mean: 152.426788, advantage: -11.864904
101
actor: 7.193965, actor_grad: 4.074880, policy mean: 0.448794, policy: 7.102475, entropy: -0.047102, actor_norm: 0.138592, critic: 263.839935, critic_grad: 1529.102295, value: 259.326111, critic_norm: 4.513825, value_mean: 109.138718, advantage: 16.033028
101
actor: 9.590431, actor_grad: 8.414597, policy mean: 0.483034, policy: 9.499002, entropy: -0.046909, actor_norm: 0.138337, critic: 409.690643, critic_grad: 2520.380859, value: 405.154999, critic_norm: 4.535639, value_mean: 111.400253, advantage: 19.985125
101
actor: -1.256168, actor_grad: 2.876590, policy mean: 0.531927, policy: -1.344854, entropy: -0.044996, actor_norm: 0.133683, critic: 10.599001, critic_grad: 309.271484, value: 6.479864, critic_norm: 4.119137, value_mean: 90.093857, advantage: -2.488033
101
actor: -2.501627, actor_grad: 9.682954, policy mean: 0.610197, policy: -2.585952, entropy: -0.040494, actor_norm: 0.124819, critic: 22.318169, critic_grad: 388.942383, value: 18.524990, critic_norm: 3.793180, value_mean: 77.367294, advantage: -4.245088
101
actor: 6.956542, actor_grad: 6.345720, policy mean: 0.516376, policy: 6.885515, entropy: -0.049092, actor_norm: 0.120119, critic: 182.384445, critic_grad: 993.342468, value: 178.477173, critic_norm: 3.907279, value_mean: 92.906944, advantage: 13.286875
101
actor: -6.660166, actor_grad: 1.573023, policy mean: 0.628007, policy: -6.721084, entropy: -0.050998, actor_norm: 0.111916, critic: 120.262367, critic_grad: 1090.993530, value: 115.839661, critic_norm: 4.422705, value_mean: 111.034584, advantage: -10.756568
101
actor: -6.192295, actor_grad: 1.568187, policy mean: 0.562753, policy: -6.251447, entropy: -0.048453, actor_norm: 0.107605, critic: 130.559891, critic_grad: 883.263000, value: 126.607605, critic_norm: 3.952293, value_mean: 99.051979, advantage: -11.149251
101
actor: 6.380557, actor_grad: 3.934573, policy mean: 0.590337, policy: 6.323633, entropy: -0.045163, actor_norm: 0.102088, critic: 118.911095, critic_grad: 787.205261, value: 115.183517, critic_norm: 3.727578, value_mean: 90.968735, advantage: 10.731738
101
actor: 5.158525, actor_grad: 2.823757, policy mean: 0.499947, policy: 5.116439, entropy: -0.056043, actor_norm: 0.098130, critic: 107.908829, critic_grad: 756.544006, value: 104.083878, critic_norm: 3.824950, value_mean: 95.353821, advantage: 10.176611
101
actor: 2.507951, actor_grad: 1.137836, policy mean: 0.654523, policy: 2.456486, entropy: -0.047285, actor_norm: 0.098750, critic: 18.131865, critic_grad: 333.709656, value: 14.062206, critic_norm: 4.069659, value_mean: 104.996078, advantage: 3.673601
101
actor: -6.335206, actor_grad: 1.656269, policy mean: 0.597002, policy: -6.383385, entropy: -0.049150, actor_norm: 0.097329, critic: 117.613487, critic_grad: 1049.847168, value: 113.393692, critic_norm: 4.219796, value_mean: 109.343033, advantage: -10.624210
101
actor: -6.293970, actor_grad: 3.222338, policy mean: 0.588816, policy: -6.335521, entropy: -0.051869, actor_norm: 0.093420, critic: 120.860657, critic_grad: 915.661255, value: 116.741859, critic_norm: 4.118799, value_mean: 105.589523, advantage: -10.794101
101
actor: -1.007175, actor_grad: 0.970572, policy mean: 0.558847, policy: -1.047289, entropy: -0.054715, actor_norm: 0.094829, critic: 8.890745, critic_grad: 161.859650, value: 5.100552, critic_norm: 3.790193, value_mean: 92.055969, advantage: -1.849068
101
actor: 5.707557, actor_grad: 4.620087, policy mean: 0.510155, policy: 5.669961, entropy: -0.056450, actor_norm: 0.094047, critic: 128.782257, critic_grad: 994.957520, value: 125.087578, critic_norm: 3.694687, value_mean: 84.844322, advantage: 11.169419
101
actor: 7.483356, actor_grad: 11.118490, policy mean: 0.687200, policy: 7.428354, entropy: -0.038373, actor_norm: 0.093375, critic: 120.337196, critic_grad: 852.239502, value: 116.494499, critic_norm: 3.842697, value_mean: 92.234375, advantage: 10.781036
101
actor: 1.818085, actor_grad: 1.009634, policy mean: 0.548921, policy: 1.781054, entropy: -0.055483, actor_norm: 0.092514, critic: 14.851221, critic_grad: 296.503418, value: 10.767387, critic_norm: 4.083834, value_mean: 107.096657, advantage: 3.110242
101
actor: -4.456836, actor_grad: 0.723999, policy mean: 0.515905, policy: -4.486499, entropy: -0.057673, actor_norm: 0.087335, critic: 81.056068, critic_grad: 754.236450, value: 76.795242, critic_norm: 4.260829, value_mean: 113.041656, advantage: -8.721817
101
actor: -0.163222, actor_grad: 0.063116, policy mean: 0.538532, policy: -0.193449, entropy: -0.056799, actor_norm: 0.087026, critic: 4.300614, critic_grad: 27.403894, value: 0.458895, critic_norm: 3.841719, value_mean: 96.518097, advantage: -0.330306
101
actor: 5.036862, actor_grad: 1.894770, policy mean: 0.561873, policy: 5.001449, entropy: -0.053558, actor_norm: 0.088971, critic: 82.796005, critic_grad: 683.307678, value: 78.927086, critic_norm: 3.868920, value_mean: 95.611626, advantage: 8.876505
101
actor: 1.974015, actor_grad: 3.498853, policy mean: 0.680511, policy: 1.924982, entropy: -0.044782, actor_norm: 0.093815, critic: 12.725388, critic_grad: 268.416046, value: 8.413248, critic_norm: 4.312139, value_mean: 116.309845, advantage: 2.766439
101
actor: -4.192332, actor_grad: 5.591702, policy mean: 0.717421, policy: -4.237080, entropy: -0.046385, actor_norm: 0.091132, critic: 36.178104, critic_grad: 467.381378, value: 32.226536, critic_norm: 3.951568, value_mean: 96.306618, advantage: -5.650384
101
actor: 6.748705, actor_grad: 8.370853, policy mean: 0.840495, policy: 6.698293, entropy: -0.039129, actor_norm: 0.089541, critic: 67.926277, critic_grad: 563.299561, value: 64.219193, critic_norm: 3.707087, value_mean: 86.894363, advantage: 7.973629
101
actor: 6.841834, actor_grad: 4.292328, policy mean: 0.591606, policy: 6.803820, entropy: -0.049305, actor_norm: 0.087318, critic: 136.843704, critic_grad: 876.909607, value: 132.941208, critic_norm: 3.902492, value_mean: 98.603638, advantage: 11.518236
101
actor: 1.318416, actor_grad: 2.608925, policy mean: 0.720298, policy: 1.270429, entropy: -0.041131, actor_norm: 0.089119, critic: 9.003681, critic_grad: 191.055115, value: 4.969857, critic_norm: 4.033824, value_mean: 111.803139, advantage: 1.945325
101
actor: -4.289919, actor_grad: 0.512901, policy mean: 0.546865, policy: -4.325063, entropy: -0.053771, actor_norm: 0.088916, critic: 66.607903, critic_grad: 621.876587, value: 62.564091, critic_norm: 4.043812, value_mean: 102.435585, advantage: -7.896399
101
actor: -5.761516, actor_grad: 5.129138, policy mean: 0.490779, policy: -5.791121, entropy: -0.059891, actor_norm: 0.089498, critic: 142.961075, critic_grad: 998.562927, value: 138.803986, critic_norm: 4.157094, value_mean: 109.591385, advantage: -11.762057
101
actor: -1.207036, actor_grad: 0.445508, policy mean: 0.482228, policy: -1.246054, entropy: -0.054933, actor_norm: 0.093950, critic: 11.540956, critic_grad: 204.314667, value: 7.570037, critic_norm: 3.970920, value_mean: 100.242233, advantage: -2.641941
101
actor: 6.774020, actor_grad: 4.907130, policy mean: 0.588247, policy: 6.730930, entropy: -0.050421, actor_norm: 0.093511, critic: 136.342010, critic_grad: 1205.092163, value: 132.528152, critic_norm: 3.813858, value_mean: 91.323334, advantage: 11.471428
101
actor: 8.550735, actor_grad: 10.741899, policy mean: 0.498974, policy: 8.515522, entropy: -0.056321, actor_norm: 0.091534, critic: 296.867676, critic_grad: 2355.135010, value: 293.064117, critic_norm: 3.803559, value_mean: 93.631317, advantage: 17.084873
101
actor: 6.709247, actor_grad: 3.450264, policy mean: 0.613509, policy: 6.669339, entropy: -0.049839, actor_norm: 0.089747, critic: 123.484756, critic_grad: 944.156921, value: 119.640305, critic_norm: 3.844450, value_mean: 98.179153, advantage: 10.916096
101
actor: 1.921785, actor_grad: 1.621070, policy mean: 0.513860, policy: 1.889637, entropy: -0.057246, actor_norm: 0.089393, critic: 18.116283, critic_grad: 340.162903, value: 14.058226, critic_norm: 4.058057, value_mean: 107.338684, advantage: 3.690289
101
actor: -2.096702, actor_grad: 0.540619, policy mean: 0.594448, policy: -2.131026, entropy: -0.053110, actor_norm: 0.087433, critic: 17.047245, critic_grad: 294.448486, value: 12.905329, critic_norm: 4.141916, value_mean: 106.727188, advantage: -3.552980
101
actor: -6.853883, actor_grad: 3.706650, policy mean: 0.556267, policy: -6.893585, entropy: -0.050105, actor_norm: 0.089807, critic: 157.096588, critic_grad: 1295.503540, value: 152.893707, critic_norm: 4.202877, value_mean: 114.004395, advantage: -12.314435
101
actor: -5.144855, actor_grad: 3.900118, policy mean: 0.623889, policy: -5.182914, entropy: -0.050653, actor_norm: 0.088711, critic: 74.538864, critic_grad: 824.485474, value: 70.407837, critic_norm: 4.131025, value_mean: 110.109955, advantage: -8.317133
101
actor: -0.327040, actor_grad: 0.317946, policy mean: 0.578607, policy: -0.367485, entropy: -0.050739, actor_norm: 0.091184, critic: 5.217268, critic_grad: 44.002499, value: 1.370574, critic_norm: 3.846694, value_mean: 98.843430, advantage: -0.493922
101
actor: 3.921944, actor_grad: 1.223964, policy mean: 0.525435, policy: 3.883768, entropy: -0.054409, actor_norm: 0.092584, critic: 59.227249, critic_grad: 591.259216, value: 55.365456, critic_norm: 3.861795, value_mean: 97.613190, advantage: 7.399044
101
actor: 4.508498, actor_grad: 4.568081, policy mean: 0.501038, policy: 4.470756, entropy: -0.056409, actor_norm: 0.094152, critic: 83.409622, critic_grad: 671.034485, value: 79.607010, critic_norm: 3.802610, value_mean: 96.302887, advantage: 8.915132
101
actor: 5.692061, actor_grad: 11.424947, policy mean: 0.644003, policy: 5.640770, entropy: -0.045249, actor_norm: 0.096540, critic: 79.197586, critic_grad: 816.641968, value: 75.176666, critic_norm: 4.020916, value_mean: 106.362587, advantage: 8.601744
101
actor: -3.989413, actor_grad: 0.640393, policy mean: 0.537408, policy: -4.029173, entropy: -0.053875, actor_norm: 0.093634, critic: 60.587803, critic_grad: 904.619080, value: 56.357296, critic_norm: 4.230508, value_mean: 120.592117, advantage: -7.464326
101
actor: -6.834456, actor_grad: 8.012683, policy mean: 0.600512, policy: -6.875202, entropy: -0.051883, actor_norm: 0.092629, critic: 135.547775, critic_grad: 1150.566284, value: 131.285309, critic_norm: 4.262460, value_mean: 118.646683, advantage: -11.454126
101
actor: -6.569605, actor_grad: 0.991318, policy mean: 0.517899, policy: -6.609133, entropy: -0.055428, actor_norm: 0.094956, critic: 165.777069, critic_grad: 1506.383911, value: 161.579575, critic_norm: 4.197495, value_mean: 124.504501, advantage: -12.658959
88
actor: -80.353790, actor_grad: 162.090729, policy mean: 0.717768, policy: -80.418297, entropy: -0.035849, actor_norm: 0.100360, critic: 12443.881836, critic_grad: 10895.336914, value: 12439.632812, critic_norm: 4.248940, value_mean: 122.780884, advantage: -111.188637
101
actor: 4.884308, actor_grad: 11.246525, policy mean: 0.694076, policy: 4.831486, entropy: -0.049809, actor_norm: 0.102630, critic: 49.766808, critic_grad: 923.665833, value: 45.623070, critic_norm: 4.143736, value_mean: 128.804016, advantage: 6.746810
101
actor: -5.016963, actor_grad: 12.568815, policy mean: 0.506309, policy: -5.075501, entropy: -0.049088, actor_norm: 0.107627, critic: 106.982758, critic_grad: 1982.601562, value: 102.079170, critic_norm: 4.903585, value_mean: 185.990936, advantage: -9.967471
101
actor: 4.923570, actor_grad: 0.787439, policy mean: 0.458152, policy: 4.864755, entropy: -0.050713, actor_norm: 0.109528, critic: 118.442207, critic_grad: 1027.023438, value: 114.194794, critic_norm: 4.247414, value_mean: 117.426834, advantage: 10.679529
101
actor: -6.095402, actor_grad: 1.469213, policy mean: 0.591014, policy: -6.161185, entropy: -0.046344, actor_norm: 0.112127, critic: 113.285881, critic_grad: 1116.313721, value: 108.289383, critic_norm: 4.996495, value_mean: 148.562637, advantage: -10.372229
101
actor: -3.219097, actor_grad: 2.979697, policy mean: 0.537618, policy: -3.289797, entropy: -0.044888, actor_norm: 0.115588, critic: 46.183300, critic_grad: 941.608704, value: 41.076218, critic_norm: 5.107084, value_mean: 178.346497, advantage: -6.353950
101
actor: -9.928582, actor_grad: 13.654097, policy mean: 0.460720, policy: -10.009247, entropy: -0.041213, actor_norm: 0.121878, critic: 484.935364, critic_grad: 3945.083496, value: 478.989899, critic_norm: 5.945465, value_mean: 222.701843, advantage: -21.683048
101
actor: -0.621477, actor_grad: 1.017791, policy mean: 0.483260, policy: -0.710739, entropy: -0.041922, actor_norm: 0.131183, critic: 8.612574, critic_grad: 242.922836, value: 3.408301, critic_norm: 5.204272, value_mean: 183.334015, advantage: -1.628417
101
actor: 1.795297, actor_grad: 4.409703, policy mean: 0.468849, policy: 1.703341, entropy: -0.039842, actor_norm: 0.131798, critic: 27.862936, critic_grad: 578.746887, value: 22.490192, critic_norm: 5.372744, value_mean: 180.943756, advantage: 3.535496
101
actor: 8.089521, actor_grad: 14.038507, policy mean: 0.625582, policy: 7.987533, entropy: -0.035742, actor_norm: 0.137730, critic: 155.653397, critic_grad: 1444.329956, value: 151.124481, critic_norm: 4.528911, value_mean: 127.474678, advantage: 12.078870
101
actor: 4.942666, actor_grad: 4.835145, policy mean: 0.361007, policy: 4.843985, entropy: -0.041339, actor_norm: 0.140020, critic: 190.198593, critic_grad: 1689.732056, value: 185.855484, critic_norm: 4.343112, value_mean: 106.494812, advantage: 13.398520
101
actor: 4.838733, actor_grad: 5.842908, policy mean: 0.414820, policy: 4.735092, entropy: -0.038222, actor_norm: 0.141863, critic: 137.277206, critic_grad: 1026.237793, value: 132.894455, critic_norm: 4.382752, value_mean: 69.352890, advantage: 11.415771
101
actor: 13.044780, actor_grad: 62.894924, policy mean: 0.718532, policy: 12.934587, entropy: -0.031498, actor_norm: 0.141690, critic: 332.324310, critic_grad: 1994.275146, value: 327.502838, critic_norm: 4.821465, value_mean: 25.711210, advantage: 18.007307
101
actor: -6.457093, actor_grad: 3.651237, policy mean: 0.557841, policy: -6.552696, entropy: -0.041050, actor_norm: 0.136653, critic: 138.208527, critic_grad: 976.869507, value: 133.464355, critic_norm: 4.744169, value_mean: 95.699669, advantage: -11.483836
101
actor: -4.739953, actor_grad: 8.481081, policy mean: 0.611436, policy: -4.844432, entropy: -0.045037, actor_norm: 0.149517, critic: 68.037094, critic_grad: 656.618896, value: 63.326595, critic_norm: 4.710502, value_mean: 110.489304, advantage: -7.864418
101
actor: 9.608133, actor_grad: 22.179806, policy mean: 0.464625, policy: 9.514142, entropy: -0.045460, actor_norm: 0.139451, critic: 416.011810, critic_grad: 2434.424316, value: 411.550659, critic_norm: 4.461142, value_mean: 109.543030, advantage: 20.170511
101
actor: -1.585273, actor_grad: 2.068095, policy mean: 0.427187, policy: -1.673173, entropy: -0.047305, actor_norm: 0.135204, critic: 21.153744, critic_grad: 374.229950, value: 16.623100, critic_norm: 4.530643, value_mean: 114.430824, advantage: -4.001972
101
actor: -5.246903, actor_grad: 9.601864, policy mean: 0.466626, policy: -5.330564, entropy: -0.048231, actor_norm: 0.131892, critic: 135.377380, critic_grad: 997.698547, value: 131.135666, critic_norm: 4.241707, value_mean: 104.857208, advantage: -11.381760
101
actor: 6.222532, actor_grad: 3.272150, policy mean: 0.458574, policy: 6.139211, entropy: -0.046514, actor_norm: 0.129836, critic: 182.028885, critic_grad: 1017.437805, value: 177.984772, critic_norm: 4.044118, value_mean: 91.886139, advantage: 13.313900
101
actor: -1.872650, actor_grad: 0.501014, policy mean: 0.673215, policy: -1.945020, entropy: -0.046998, actor_norm: 0.119367, critic: 15.616449, critic_grad: 312.043213, value: 11.359753, critic_norm: 4.256697, value_mean: 104.219772, advantage: -3.306743
101
actor: -8.870107, actor_grad: 11.653838, policy mean: 0.669557, policy: -8.941698, entropy: -0.042634, actor_norm: 0.114225, critic: 193.109558, critic_grad: 1160.803955, value: 188.953079, critic_norm: 4.156474, value_mean: 102.947098, advantage: -13.686820
101
actor: 2.924895, actor_grad: 1.663857, policy mean: 0.632482, policy: 2.862115, entropy: -0.045629, actor_norm: 0.108408, critic: 24.477333, critic_grad: 329.100494, value: 20.679968, critic_norm: 3.797365, value_mean: 90.703812, advantage: 4.534889
101
actor: 6.420522, actor_grad: 4.991224, policy mean: 0.592623, policy: 6.363513, entropy: -0.047190, actor_norm: 0.104199, critic: 123.502670, critic_grad: 814.872559, value: 119.627266, critic_norm: 3.875407, value_mean: 94.666267, advantage: 10.920892
101
actor: 5.993237, actor_grad: 2.563353, policy mean: 0.492499, policy: 5.945461, entropy: -0.053421, actor_norm: 0.101197, critic: 151.849625, critic_grad: 1081.584473, value: 147.888977, critic_norm: 3.960642, value_mean: 103.863846, advantage: 12.136658
101
actor: -1.208621, actor_grad: 2.085906, policy mean: 0.544797, policy: -1.263466, entropy: -0.045884, actor_norm: 0.100729, critic: 9.574146, critic_grad: 190.159164, value: 5.337640, critic_norm: 4.236507, value_mean: 113.560753, advantage: -2.195293
101
actor: -5.896372, actor_grad: 3.515285, policy mean: 0.647778, policy: -5.949747, entropy: -0.046456, actor_norm: 0.099831, critic: 93.172798, critic_grad: 855.533020, value: 88.955849, critic_norm: 4.216952, value_mean: 114.191101, advantage: -9.331226
101
actor: -7.724795, actor_grad: 1.018218, policy mean: 0.501158, policy: -7.780165, entropy: -0.050395, actor_norm: 0.105764, critic: 249.979187, critic_grad: 1580.476074, value: 245.634033, critic_norm: 4.345148, value_mean: 126.291389, advantage: -15.613373
101
actor: 1.423419, actor_grad: 3.910948, policy mean: 0.639286, policy: 1.355517, entropy: -0.038288, actor_norm: 0.106189, critic: 13.661789, critic_grad: 220.652451, value: 9.280289, critic_norm: 4.381500, value_mean: 140.759888, advantage: 2.104437
101
actor: -5.772458, actor_grad: 3.427368, policy mean: 0.385287, policy: -5.824695, entropy: -0.054279, actor_norm: 0.106516, critic: 247.679504, critic_grad: 2336.147217, value: 242.587860, critic_norm: 5.091640, value_mean: 178.249603, advantage: -15.170349
101
actor: -8.012745, actor_grad: 9.838301, policy mean: 0.539906, policy: -8.080071, entropy: -0.044935, actor_norm: 0.112262, critic: 227.695038, critic_grad: 2312.423096, value: 222.251572, critic_norm: 5.443460, value_mean: 199.465439, advantage: -14.900455
101
actor: -27.912991, actor_grad: 92.827751, policy mean: 0.874690, policy: -27.999950, entropy: -0.035064, actor_norm: 0.122024, critic: 1127.214966, critic_grad: 6882.523926, value: 1120.030396, critic_norm: 7.184621, value_mean: 288.381531, advantage: -33.057041
101
actor: -4.765486, actor_grad: 11.845369, policy mean: 0.514360, policy: -4.846830, entropy: -0.041887, actor_norm: 0.123231, critic: 138.951599, critic_grad: 2705.072510, value: 130.943390, critic_norm: 8.008202, value_mean: 351.247253, advantage: -9.829331
101
actor: -9.665717, actor_grad: 6.537887, policy mean: 0.448531, policy: -9.751154, entropy: -0.040156, actor_norm: 0.125592, critic: 619.583435, critic_grad: 5793.523438, value: 609.638428, critic_norm: 9.945013, value_mean: 376.412170, advantage: -22.364212
95
actor: -329.422394, actor_grad: 290.150024, policy mean: 0.632290, policy: -329.514587, entropy: -0.037787, actor_norm: 0.129987, critic: 273247.906250, critic_grad: 177682.953125, value: 273236.406250, critic_norm: 11.498759, value_mean: 533.530396, advantage: -519.020630
101
actor: -5.177105, actor_grad: 29.239111, policy mean: 0.611977, policy: -5.267954, entropy: -0.040777, actor_norm: 0.131626, critic: 79.812057, critic_grad: 3204.066650, value: 66.273109, critic_norm: 13.538952, value_mean: 641.397156, advantage: -7.945656
101
actor: -44.783409, actor_grad: 176.318100, policy mean: 0.666915, policy: -44.881416, entropy: -0.036484, actor_norm: 0.134490, critic: 4474.665039, critic_grad: 35422.949219, value: 4458.403809, critic_norm: 16.261333, value_mean: 851.644653, advantage: -66.735229
101
actor: -2.630276, actor_grad: 3.201300, policy mean: 0.347316, policy: -2.727559, entropy: -0.041714, actor_norm: 0.138998, critic: 155.606506, critic_grad: 4055.560547, value: 137.768280, critic_norm: 17.838221, value_mean: 790.482178, advantage: -8.138119
101
actor: -27.187807, actor_grad: 5.689556, policy mean: 0.436128, policy: -27.290316, entropy: -0.038481, actor_norm: 0.140989, critic: 4307.870605, critic_grad: 35685.664062, value: 4287.291992, critic_norm: 20.578587, value_mean: 996.097961, advantage: -65.004082
34
actor: -60.065601, actor_grad: 496.245728, policy mean: 0.081313, policy: -60.184029, entropy: -0.024102, actor_norm: 0.142529, critic: 541097.062500, critic_grad: 248267.453125, value: 541074.937500, critic_norm: 22.100859, value_mean: 737.050049, advantage: -735.555054
46
actor: -728.871643, actor_grad: 1716.104858, policy mean: 0.866883, policy: -728.982849, entropy: -0.031923, actor_norm: 0.143130, critic: 650479.250000, critic_grad: 302227.031250, value: 650456.437500, critic_norm: 22.839756, value_mean: 812.315796, advantage: -805.131226
95
actor: -1013.109070, actor_grad: 3454.820801, policy mean: 0.811704, policy: -1013.229553, entropy: -0.029280, actor_norm: 0.149770, critic: 1613392.500000, critic_grad: 925535.187500, value: 1613368.375000, critic_norm: 24.121576, value_mean: 1280.371338, advantage: -1265.861572
31
actor: -489.613190, actor_grad: 1400.875488, policy mean: 0.755901, policy: -489.744354, entropy: -0.025864, actor_norm: 0.157043, critic: 396383.781250, critic_grad: 187926.890625, value: 396360.312500, critic_norm: 23.469702, value_mean: 640.457642, advantage: -625.947815
101
actor: 80.526855, actor_grad: 464.967163, policy mean: 0.884558, policy: 80.399551, entropy: -0.030631, actor_norm: 0.157936, critic: 9228.527344, critic_grad: 42921.839844, value: 9207.818359, critic_norm: 20.709423, value_mean: 778.907593, advantage: 91.379654
101
actor: 33.265163, actor_grad: 107.969772, policy mean: 0.395406, policy: 33.134567, entropy: -0.034307, actor_norm: 0.164901, critic: 7838.863770, critic_grad: 33482.128906, value: 7821.187500, critic_norm: 17.676174, value_mean: 599.727844, advantage: 85.687546
101
actor: 22.409286, actor_grad: 115.076416, policy mean: 0.322436, policy: 22.255737, entropy: -0.028498, actor_norm: 0.182046, critic: 4494.950195, critic_grad: 21933.884766, value: 4480.770508, critic_norm: 14.179752, value_mean: 414.450684, advantage: 63.932217
101
actor: 8.548787, actor_grad: 70.349396, policy mean: 0.353953, policy: 8.359236, entropy: -0.022749, actor_norm: 0.212300, critic: 828.263733, critic_grad: 6054.213867, value: 816.955933, critic_norm: 11.307810, value_mean: 227.604889, advantage: 24.983284
101
actor: 4.754470, actor_grad: 10.309884, policy mean: 0.369384, policy: 4.541945, entropy: -0.022686, actor_norm: 0.235210, critic: 182.989380, critic_grad: 1563.839966, value: 173.692459, critic_norm: 9.296914, value_mean: 124.592438, advantage: 11.198805
101
actor: 0.479195, actor_grad: 1.453006, policy mean: 0.150576, policy: 0.264339, entropy: -0.020701, actor_norm: 0.235558, critic: 16.708946, critic_grad: 168.317627, value: 9.862157, critic_norm: 6.846789, value_mean: 88.132416, advantage: 1.466827
101
actor: 0.901894, actor_grad: 4.623999, policy mean: 0.477247, policy: 0.689247, entropy: -0.020894, actor_norm: 0.233541, critic: 12.403149, critic_grad: 127.920731, value: 6.692098, critic_norm: 5.711051, value_mean: 62.416931, advantage: 1.243580
101
actor: -3.494462, actor_grad: 16.877686, policy mean: 0.858317, policy: -3.710547, entropy: -0.020634, actor_norm: 0.236719, critic: 22.458206, critic_grad: 354.563141, value: 17.522026, critic_norm: 4.936179, value_mean: 45.862831, advantage: -4.128695
101
actor: -2.423308, actor_grad: 5.323841, policy mean: 0.503706, policy: -2.620461, entropy: -0.023404, actor_norm: 0.220557, critic: 32.755028, critic_grad: 371.925140, value: 28.124424, critic_norm: 4.630602, value_mean: 87.099777, advantage: -5.195540
101
actor: -0.687587, actor_grad: 3.828295, policy mean: 0.101221, policy: -0.873376, entropy: -0.023215, actor_norm: 0.209004, critic: 72.186386, critic_grad: 619.594727, value: 67.603714, critic_norm: 4.582671, value_mean: 79.811020, advantage: -8.143160
101
actor: -0.809202, actor_grad: 1.834136, policy mean: 0.514464, policy: -0.993616, entropy: -0.025081, actor_norm: 0.209495, critic: 9.121716, critic_grad: 155.563126, value: 4.866600, critic_norm: 4.255115, value_mean: 77.058456, advantage: -2.133558
101
actor: -5.510532, actor_grad: 8.689045, policy mean: 0.440877, policy: -5.668299, entropy: -0.038503, actor_norm: 0.196269, critic: 170.681107, critic_grad: 1567.558716, value: 165.545288, critic_norm: 5.135812, value_mean: 152.092270, advantage: -12.760783
101
actor: 0.928988, actor_grad: 3.689372, policy mean: 0.854687, policy: 0.748535, entropy: -0.023199, actor_norm: 0.203652, critic: 28.332420, critic_grad: 133.962585, value: 22.562700, critic_norm: 5.769721, value_mean: 186.256012, advantage: 0.049024
101
actor: -5.008286, actor_grad: 2.897073, policy mean: 0.524043, policy: -5.187902, entropy: -0.035910, actor_norm: 0.215527, critic: 112.382072, critic_grad: 1533.212280, value: 105.951447, critic_norm: 6.430628, value_mean: 213.264313, advantage: -9.903616
101
actor: -3.643598, actor_grad: 17.220886, policy mean: 0.724972, policy: -3.832526, entropy: -0.025992, actor_norm: 0.214920, critic: 38.655834, critic_grad: 1227.562988, value: 32.199661, critic_norm: 6.456172, value_mean: 238.302322, advantage: -5.605607
101
actor: 8.191926, actor_grad: 3.941004, policy mean: 0.587952, policy: 8.007716, entropy: -0.034247, actor_norm: 0.218457, critic: 209.653870, critic_grad: 2486.499268, value: 203.577362, critic_norm: 6.076511, value_mean: 179.060852, advantage: 13.910124
101
actor: 4.253313, actor_grad: 11.588011, policy mean: 0.781700, policy: 4.062903, entropy: -0.029111, actor_norm: 0.219522, critic: 66.944473, critic_grad: 1202.482178, value: 61.131237, critic_norm: 5.813236, value_mean: 138.747101, advantage: 7.134457
101
actor: 3.688431, actor_grad: 3.508954, policy mean: 0.490118, policy: 3.507187, entropy: -0.034797, actor_norm: 0.216041, critic: 48.152798, critic_grad: 862.063110, value: 42.502781, critic_norm: 5.650016, value_mean: 107.109077, advantage: 5.994100
101
actor: 2.602984, actor_grad: 5.653830, policy mean: 0.567772, policy: 2.423579, entropy: -0.034693, actor_norm: 0.214098, critic: 22.139551, critic_grad: 423.114258, value: 16.621691, critic_norm: 5.517860, value_mean: 90.443039, advantage: 3.910433
101
actor: 1.694870, actor_grad: 5.961277, policy mean: 0.365627, policy: 1.517712, entropy: -0.032988, actor_norm: 0.210146, critic: 22.677975, critic_grad: 385.491089, value: 17.324736, critic_norm: 5.353239, value_mean: 82.614891, advantage: 4.057711
101
actor: 0.816970, actor_grad: 2.541162, policy mean: 0.277212, policy: 0.649237, entropy: -0.038191, actor_norm: 0.205924, critic: 11.155188, critic_grad: 263.400421, value: 5.915364, critic_norm: 5.239823, value_mean: 59.424286, advantage: 2.166043
101
actor: 0.257549, actor_grad: 0.160095, policy mean: 0.490635, policy: 0.089204, entropy: -0.034678, actor_norm: 0.203023, critic: 5.788024, critic_grad: 16.947348, value: 0.640552, critic_norm: 5.147472, value_mean: 72.233093, advantage: 0.165923
101
actor: -0.312285, actor_grad: 0.629344, policy mean: 0.475597, policy: -0.481853, entropy: -0.032698, actor_norm: 0.202267, critic: 7.307055, critic_grad: 68.489281, value: 2.194671, critic_norm: 5.112383, value_mean: 65.062569, advantage: -0.826040
101
actor: 0.076178, actor_grad: 0.512848, policy mean: 0.232550, policy: -0.086134, entropy: -0.039501, actor_norm: 0.201814, critic: 5.526567, critic_grad: 33.505241, value: 0.437965, critic_norm: 5.088601, value_mean: 61.075359, advantage: -0.341024

In [3]:
import pandas as pd
import seaborn as sns
%matplotlib inline

In [4]:
tmp = pd.DataFrame(GD)
tmp.iloc[:500,:1].plot(figsize=(16,6))


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x30003c50>

In [5]:
tmp = pd.DataFrame(GD)
tmp.plot(figsize=(16,6))


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x302a76a0>

In [ ]: