[177] reward: -0.00, reward 100-step MA: 0.98, action: [-0.21955845], td-error: 0.0000: 2%|▏ | 178/8000 [00:00<09:58, 13.08it/s] | 1/8000 [00:00<41:30, 3.21it/s]
Total episode reward: 96.4231978292. Finished in 130 steps.
[307] reward: -0.09, reward 100-step MA: -0.06, action: [ 0.96571648], td-error: 0.0000: 4%|▍ | 308/8000 [00:05<08:25, 15.21it/s]256] reward: -0.11, reward 100-step MA: -0.01, action: [ 1.03491187], td-error: 0.0000: 3%|▎ | 257/8000 [00:01<05:12, 24.76it/s]
Total episode reward: 92.7599452618. Finished in 181 steps.
[2159] reward: 99.91, reward 100-step MA: 0.96, action: [-0.97013456], td-error: 98.2218: 27%|██▋ | 2160/8000 [02:57<09:11, 10.59it/s]33] reward: -0.09, reward 100-step MA: 0.92, action: [ 0.97408241], td-error: 0.0000: 4%|▍ | 334/8000 [00:08<09:56, 12.85it/s]
Total episode reward: 8.25764464207. Finished in 1849 steps.
[2839] reward: 99.92, reward 100-step MA: 0.92, action: [-0.87456381], td-error: 98.6304: 36%|███▌ | 2840/8000 [04:01<08:07, 10.59it/s][2163] reward: -0.09, reward 100-step MA: 0.95, action: [ 0.95159531], td-error: 0.0000: 27%|██▋ | 2164/8000 [02:58<09:07, 10.66it/s]
Total episode reward: 48.8485913878. Finished in 680 steps.
[2943] reward: -0.00, reward 100-step MA: 0.92, action: [ 0.11740303], td-error: 0.0000: 37%|███▋ | 2944/8000 [04:11<07:44, 10.90it/s] [2843] reward: -0.03, reward 100-step MA: 0.92, action: [ 0.50309128], td-error: 0.0000: 36%|███▌ | 2844/8000 [04:01<08:04, 10.65it/s]
Total episode reward: 92.158596952. Finished in 103 steps.
[3069] reward: -0.06, reward 100-step MA: 0.92, action: [ 0.80320817], td-error: 0.0000: 38%|███▊ | 3070/8000 [04:22<07:48, 10.53it/s] 2947] reward: -0.00, reward 100-step MA: 0.93, action: [ 0.13784266], td-error: 0.0000: 37%|███▋ | 2948/8000 [04:11<07:44, 10.87it/s]
Total episode reward: 91.1983401741. Finished in 126 steps.
[3181] reward: 99.97, reward 100-step MA: 0.93, action: [ 0.50303936], td-error: 99.8536: 40%|███▉ | 3182/8000 [04:33<07:23, 10.86it/s]3073] reward: -0.07, reward 100-step MA: 0.92, action: [ 0.8408705], td-error: 0.0000: 38%|███▊ | 3074/8000 [04:23<07:38, 10.75it/s]
Total episode reward: 91.625918678. Finished in 113 steps.
[3297] reward: -0.00, reward 100-step MA: 0.93, action: [-0.10780644], td-error: 0.0000: 41%|████ | 3298/8000 [04:44<07:16, 10.78it/s] [3185] reward: -0.04, reward 100-step MA: 0.93, action: [ 0.66720861], td-error: 0.0000: 40%|███▉ | 3186/8000 [04:33<07:23, 10.85it/s]
Total episode reward: 91.7372396827. Finished in 115 steps.
[3411] reward: 99.98, reward 100-step MA: 0.94, action: [ 0.48037547], td-error: 100.2430: 43%|████▎ | 3412/8000 [04:54<07:25, 10.31it/s]301] reward: -0.00, reward 100-step MA: 0.93, action: [ 0.05319721], td-error: 0.0000: 41%|████▏ | 3302/8000 [04:44<07:19, 10.69it/s]
Total episode reward: 93.9884714781. Finished in 115 steps.
[3559] reward: -0.03, reward 100-step MA: 0.94, action: [ 0.53071296], td-error: 0.0000: 44%|████▍ | 3560/8000 [05:09<07:03, 10.49it/s] [3415] reward: -0.09, reward 100-step MA: 0.94, action: [-0.96730715], td-error: 0.8575: 43%|████▎ | 3416/8000 [04:55<07:15, 10.53it/s]
Total episode reward: 92.5132563255. Finished in 147 steps.
[3669] reward: -0.07, reward 100-step MA: 0.93, action: [ 0.84691906], td-error: 0.0000: 46%|████▌ | 3670/8000 [05:20<06:44, 10.70it/s] 3563] reward: -0.06, reward 100-step MA: 0.94, action: [ 0.78599143], td-error: 0.0000: 45%|████▍ | 3564/8000 [05:09<07:01, 10.52it/s]
Total episode reward: 92.693000711. Finished in 110 steps.
[3815] reward: 99.99, reward 100-step MA: 0.96, action: [ 0.34155673], td-error: 100.4361: 48%|████▊ | 3816/8000 [05:33<06:35, 10.59it/s]673] reward: -0.08, reward 100-step MA: 0.93, action: [ 0.90532643], td-error: 0.0000: 46%|████▌ | 3674/8000 [05:20<06:48, 10.60it/s]
Total episode reward: 92.2462467649. Finished in 147 steps.
[3913] reward: 99.99, reward 100-step MA: 1.95, action: [ 0.36616325], td-error: 100.3555: 49%|████▉ | 3914/8000 [05:43<06:33, 10.38it/s][3819] reward: -0.00, reward 100-step MA: 0.97, action: [-0.21762282], td-error: 0.0000: 48%|████▊ | 3820/8000 [05:34<06:31, 10.68it/s]
Total episode reward: 95.0193921351. Finished in 98 steps.
[3997] reward: 99.98, reward 100-step MA: 1.96, action: [ 0.46925616], td-error: 100.3180: 50%|████▉ | 3998/8000 [05:51<06:20, 10.51it/s][3917] reward: -0.09, reward 100-step MA: 0.95, action: [-0.96975958], td-error: 0.5279: 49%|████▉ | 3918/8000 [05:43<06:27, 10.54it/s]
Total episode reward: 95.9540731023. Finished in 84 steps.
[4151] reward: -0.00, reward 100-step MA: 0.97, action: [-0.13247102], td-error: 0.0000: 52%|█████▏ | 4152/8000 [06:05<06:07, 10.47it/s] [4001] reward: -0.01, reward 100-step MA: 1.96, action: [-0.38163775], td-error: 0.0875: 50%|█████ | 4002/8000 [05:51<06:27, 10.32it/s]
Total episode reward: 97.2207880179. Finished in 153 steps.
[4249] reward: -0.01, reward 100-step MA: 1.95, action: [ 0.30502889], td-error: 0.0000: 53%|█████▎ | 4250/8000 [06:14<06:01, 10.38it/s][4155] reward: -0.01, reward 100-step MA: 0.97, action: [-0.27297813], td-error: 0.0000: 52%|█████▏ | 4156/8000 [06:05<06:03, 10.59it/s]
Total episode reward: 95.1885289408. Finished in 98 steps.
[4371] reward: 99.90, reward 100-step MA: 0.93, action: [-1.00593388], td-error: 100.8507: 55%|█████▍ | 4372/8000 [06:26<05:50, 10.35it/s]253] reward: -0.02, reward 100-step MA: 0.95, action: [ 0.39229238], td-error: 0.0000: 53%|█████▎ | 4254/8000 [06:15<05:54, 10.58it/s]
Total episode reward: 92.7324281326. Finished in 123 steps.
[4457] reward: -0.01, reward 100-step MA: 1.95, action: [-0.3517307], td-error: 0.0000: 56%|█████▌ | 4458/8000 [06:34<05:40, 10.42it/s] [4375] reward: -0.07, reward 100-step MA: 0.93, action: [-0.83842325], td-error: 0.9273: 55%|█████▍ | 4376/8000 [06:26<05:50, 10.33it/s]
Total episode reward: 95.8260672685. Finished in 85 steps.
[4755] reward: -0.04, reward 100-step MA: 0.95, action: [-0.63292646], td-error: 0.2361: 59%|█████▉ | 4756/8000 [07:03<05:11, 10.40it/s] 4461] reward: -0.01, reward 100-step MA: 1.95, action: [-0.36525297], td-error: 0.0000: 56%|█████▌ | 4462/8000 [06:35<05:39, 10.41it/s]
Total episode reward: 93.035235595. Finished in 298 steps.
[4841] reward: 99.98, reward 100-step MA: 1.94, action: [ 0.39141309], td-error: 100.3855: 61%|██████ | 4842/8000 [07:11<04:56, 10.66it/s]759] reward: -0.09, reward 100-step MA: 0.95, action: [-0.96538091], td-error: 0.2859: 60%|█████▉ | 4760/8000 [07:03<05:08, 10.52it/s]
Total episode reward: 94.8312813484. Finished in 87 steps.
[4925] reward: 99.98, reward 100-step MA: 1.96, action: [ 0.49004173], td-error: 100.3243: 62%|██████▏ | 4926/8000 [07:19<04:47, 10.71it/s][4845] reward: -0.09, reward 100-step MA: 1.94, action: [-0.93365896], td-error: 0.6885: 61%|██████ | 4846/8000 [07:11<04:54, 10.70it/s]
Total episode reward: 96.0140516063. Finished in 84 steps.
[5079] reward: 99.98, reward 100-step MA: 0.95, action: [ 0.39531633], td-error: 100.3240: 64%|██████▎ | 5080/8000 [07:33<04:38, 10.49it/s][4929] reward: -0.04, reward 100-step MA: 1.96, action: [-0.64737892], td-error: 0.0000: 62%|██████▏ | 4930/8000 [07:19<04:48, 10.65it/s]
Total episode reward: 92.9024497463. Finished in 154 steps.
[5163] reward: 99.97, reward 100-step MA: 1.95, action: [ 0.55646342], td-error: 100.2253: 65%|██████▍ | 5164/8000 [07:41<04:31, 10.45it/s][5083] reward: -0.10, reward 100-step MA: 0.95, action: [-1.01687372], td-error: 0.0000: 64%|██████▎ | 5084/8000 [07:34<04:44, 10.26it/s]
Total episode reward: 95.4212130095. Finished in 84 steps.
[5247] reward: -0.06, reward 100-step MA: 1.95, action: [-0.7753799], td-error: 0.4614: 66%|██████▌ | 5248/8000 [07:49<04:20, 10.57it/s] [5167] reward: -0.09, reward 100-step MA: 1.95, action: [-0.96202737], td-error: 0.0000: 65%|██████▍ | 5168/8000 [07:42<04:24, 10.69it/s]
Total episode reward: 95.6067360548. Finished in 83 steps.
[5329] reward: 99.98, reward 100-step MA: 1.95, action: [ 0.39217404], td-error: 100.3029: 67%|██████▋ | 5330/8000 [07:57<04:16, 10.42it/s]51] reward: -0.11, reward 100-step MA: 1.95, action: [-1.02692771], td-error: 0.1899: 66%|██████▌ | 5252/8000 [07:50<04:23, 10.43it/s]
Total episode reward: 95.542213514. Finished in 83 steps.
[5413] reward: -0.08, reward 100-step MA: 1.95, action: [-0.89246649], td-error: 0.2957: 68%|██████▊ | 5414/8000 [08:05<04:00, 10.73it/s] [5333] reward: -0.08, reward 100-step MA: 1.95, action: [-0.90938842], td-error: 0.1973: 67%|██████▋ | 5334/8000 [07:58<04:14, 10.47it/s]
Total episode reward: 95.2691523042. Finished in 83 steps.
[5495] reward: -0.10, reward 100-step MA: 1.94, action: [-1.0229435], td-error: 0.0000: 69%|██████▊ | 5496/8000 [08:13<04:01, 10.37it/s] [5417] reward: -0.11, reward 100-step MA: 1.95, action: [-1.02978289], td-error: 0.0000: 68%|██████▊ | 5418/8000 [08:06<04:00, 10.71it/s]
Total episode reward: 94.5354219415. Finished in 82 steps.
[5579] reward: 99.96, reward 100-step MA: 1.94, action: [ 0.6225996], td-error: 100.0896: 70%|██████▉ | 5580/8000 [08:21<03:55, 10.29it/s]5499] reward: -0.10, reward 100-step MA: 1.94, action: [-0.99046183], td-error: 0.0000: 69%|██████▉ | 5500/8000 [08:13<03:56, 10.56it/s]
Total episode reward: 94.4167161511. Finished in 85 steps.
[5659] reward: -0.13, reward 100-step MA: 1.94, action: [-1.13845289], td-error: 0.0000: 71%|███████ | 5660/8000 [08:29<03:45, 10.36it/s] [5583] reward: -0.12, reward 100-step MA: 1.94, action: [-1.07340693], td-error: 0.0000: 70%|██████▉ | 5584/8000 [08:21<03:53, 10.33it/s]
Total episode reward: 95.1464687986. Finished in 79 steps.
[5747] reward: -0.09, reward 100-step MA: 1.94, action: [-0.9601081], td-error: 0.0311: 72%|███████▏ | 5748/8000 [08:37<03:32, 10.59it/s] [5663] reward: -0.12, reward 100-step MA: 1.94, action: [-1.0841912], td-error: 0.0000: 71%|███████ | 5664/8000 [08:29<03:43, 10.44it/s]
Total episode reward: 94.2133339555. Finished in 88 steps.
[5827] reward: 99.98, reward 100-step MA: 1.94, action: [ 0.46903309], td-error: 100.1296: 73%|███████▎ | 5828/8000 [08:45<03:24, 10.61it/s]751] reward: -0.09, reward 100-step MA: 1.94, action: [-0.94492143], td-error: 0.0000: 72%|███████▏ | 5752/8000 [08:37<03:35, 10.44it/s]
Total episode reward: 94.9310086949. Finished in 81 steps.
[5907] reward: 99.97, reward 100-step MA: 1.94, action: [ 0.51486492], td-error: 100.0965: 74%|███████▍ | 5908/8000 [08:52<03:21, 10.36it/s][5831] reward: -0.09, reward 100-step MA: 1.94, action: [-0.95784825], td-error: 0.0000: 73%|███████▎ | 5832/8000 [08:45<03:23, 10.67it/s]
Total episode reward: 94.7047823062. Finished in 80 steps.
[5987] reward: -0.04, reward 100-step MA: 1.94, action: [-0.65630955], td-error: 0.0000: 75%|███████▍ | 5988/8000 [09:00<03:17, 10.16it/s] [5911] reward: -0.12, reward 100-step MA: 1.94, action: [-1.10545516], td-error: 0.0000: 74%|███████▍ | 5912/8000 [08:53<03:21, 10.34it/s]
Total episode reward: 94.6738877204. Finished in 79 steps.
[6067] reward: -0.09, reward 100-step MA: 1.94, action: [-0.94778693], td-error: 0.0000: 76%|███████▌ | 6068/8000 [09:08<03:06, 10.35it/s][5991] reward: -0.12, reward 100-step MA: 1.94, action: [-1.08704591], td-error: 0.0421: 75%|███████▍ | 5992/8000 [09:00<03:18, 10.14it/s]
Total episode reward: 94.5869934792. Finished in 80 steps.
[6149] reward: 99.96, reward 100-step MA: 1.94, action: [-0.65713215], td-error: 99.8057: 77%|███████▋ | 6150/8000 [09:16<02:55, 10.52it/s]6071] reward: -0.10, reward 100-step MA: 1.94, action: [-1.00193417], td-error: 0.0407: 76%|███████▌ | 6072/8000 [09:08<03:09, 10.20it/s]
Total episode reward: 94.2203140627. Finished in 83 steps.
[6229] reward: 99.93, reward 100-step MA: 1.93, action: [-0.85575598], td-error: 99.8588: 78%|███████▊ | 6230/8000 [09:23<02:45, 10.69it/s][6153] reward: -0.10, reward 100-step MA: 1.93, action: [-0.9988296], td-error: 0.0971: 77%|███████▋ | 6154/8000 [09:16<02:53, 10.61it/s]
Total episode reward: 94.2034539632. Finished in 80 steps.
[6307] reward: 99.89, reward 100-step MA: 1.93, action: [-1.04406977], td-error: 99.8618: 79%|███████▉ | 6308/8000 [09:31<02:42, 10.43it/s][6233] reward: -0.09, reward 100-step MA: 1.93, action: [-0.93090022], td-error: 0.1263: 78%|███████▊ | 6234/8000 [09:24<02:48, 10.49it/s]
Total episode reward: 94.2401164427. Finished in 78 steps.
[6385] reward: -0.09, reward 100-step MA: 1.93, action: [-0.96210933], td-error: 0.0000: 80%|███████▉ | 6386/8000 [09:38<02:31, 10.62it/s] [6311] reward: -0.11, reward 100-step MA: 1.93, action: [-1.06415105], td-error: 0.0000: 79%|███████▉ | 6312/8000 [09:31<02:44, 10.26it/s]
Total episode reward: 94.6304114332. Finished in 77 steps.
[6461] reward: 100.00, reward 100-step MA: 1.94, action: [-0.04223126], td-error: 99.8767: 81%|████████ | 6462/8000 [09:46<02:33, 10.04it/s]389] reward: -0.08, reward 100-step MA: 1.93, action: [-0.91193974], td-error: 0.0000: 80%|███████▉ | 6390/8000 [09:39<02:33, 10.49it/s]
Total episode reward: 94.9238486185. Finished in 77 steps.
[6545] reward: -0.12, reward 100-step MA: 1.94, action: [-1.09871626], td-error: 0.0357: 82%|████████▏ | 6546/8000 [09:54<02:19, 10.44it/s] [6465] reward: -0.10, reward 100-step MA: 1.94, action: [-1.0034076], td-error: 0.0325: 81%|████████ | 6466/8000 [09:46<02:27, 10.37it/s]
Total episode reward: 95.0166638492. Finished in 83 steps.
[6625] reward: -0.06, reward 100-step MA: 1.94, action: [-0.77199543], td-error: 0.5316: 83%|████████▎ | 6626/8000 [10:01<02:10, 10.49it/s][6549] reward: -0.08, reward 100-step MA: 1.94, action: [-0.89217454], td-error: 0.1115: 82%|████████▏ | 6550/8000 [09:54<02:18, 10.46it/s]
Total episode reward: 94.5468855464. Finished in 80 steps.
[6699] reward: 99.89, reward 100-step MA: 1.93, action: [-1.04121447], td-error: 99.7117: 84%|████████▍ | 6700/8000 [10:08<02:04, 10.45it/s]6629] reward: -0.11, reward 100-step MA: 1.94, action: [-1.04835248], td-error: 0.1109: 83%|████████▎ | 6630/8000 [10:02<02:09, 10.59it/s]
Total episode reward: 94.5327636079. Finished in 75 steps.
[6789] reward: 99.90, reward 100-step MA: 1.94, action: [ 1.00359285], td-error: 100.1130: 85%|████████▍ | 6790/8000 [10:17<01:56, 10.38it/s]6703] reward: -0.06, reward 100-step MA: 1.93, action: [-0.80564976], td-error: 0.0000: 84%|████████▍ | 6704/8000 [10:09<02:04, 10.41it/s]
Total episode reward: 94.0956922018. Finished in 90 steps.
[6869] reward: -0.09, reward 100-step MA: 1.94, action: [-0.96265513], td-error: 0.7995: 86%|████████▌ | 6870/8000 [10:25<01:47, 10.48it/s] [6793] reward: -0.09, reward 100-step MA: 1.93, action: [-0.95715183], td-error: 0.0000: 85%|████████▍ | 6794/8000 [10:17<01:55, 10.40it/s]
Total episode reward: 95.2610286331. Finished in 79 steps.
[6947] reward: -0.10, reward 100-step MA: 1.93, action: [-0.97843295], td-error: 0.0000: 87%|████████▋ | 6948/8000 [10:32<01:42, 10.21it/s][6873] reward: -0.10, reward 100-step MA: 1.94, action: [-0.9856953], td-error: 0.4948: 86%|████████▌ | 6874/8000 [10:25<01:48, 10.38it/s]
Total episode reward: 94.1285489207. Finished in 78 steps.
[7025] reward: 99.98, reward 100-step MA: 1.93, action: [ 0.42938101], td-error: 99.9245: 88%|████████▊ | 7026/8000 [10:40<01:35, 10.24it/s]6951] reward: -0.09, reward 100-step MA: 1.93, action: [-0.95984733], td-error: 0.0733: 87%|████████▋ | 6952/8000 [10:33<01:43, 10.14it/s]
Total episode reward: 94.5143749322. Finished in 79 steps.
[7099] reward: -0.04, reward 100-step MA: 1.93, action: [-0.63026351], td-error: 0.0000: 89%|████████▉ | 7100/8000 [10:47<01:25, 10.48it/s] [7029] reward: -0.07, reward 100-step MA: 1.93, action: [-0.82460988], td-error: 0.1434: 88%|████████▊ | 7030/8000 [10:40<01:33, 10.40it/s]
Total episode reward: 94.9412080657. Finished in 73 steps.
[7173] reward: 99.89, reward 100-step MA: 1.93, action: [-1.02583122], td-error: 99.8727: 90%|████████▉ | 7174/8000 [10:54<01:20, 10.32it/s]7103] reward: -0.10, reward 100-step MA: 1.93, action: [-1.00551629], td-error: 0.1975: 89%|████████▉ | 7104/8000 [10:47<01:27, 10.27it/s]
Total episode reward: 94.6832196613. Finished in 75 steps.
[7249] reward: 99.91, reward 100-step MA: 1.93, action: [-0.94779313], td-error: 99.8496: 91%|█████████ | 7250/8000 [11:02<01:12, 10.34it/s][7177] reward: -0.11, reward 100-step MA: 1.93, action: [-1.05738091], td-error: 0.1826: 90%|████████▉ | 7178/8000 [10:55<01:18, 10.51it/s]
Total episode reward: 94.5361680119. Finished in 76 steps.
[7327] reward: -0.08, reward 100-step MA: 1.93, action: [-0.91985613], td-error: 0.0000: 92%|█████████▏| 7328/8000 [11:09<01:04, 10.45it/s] [7253] reward: -0.11, reward 100-step MA: 1.93, action: [-1.03249884], td-error: 0.5564: 91%|█████████ | 7254/8000 [11:02<01:12, 10.31it/s]
Total episode reward: 94.2408981571. Finished in 77 steps.
[7409] reward: 99.96, reward 100-step MA: 1.93, action: [ 0.61620492], td-error: 99.8825: 93%|█████████▎| 7410/8000 [11:17<00:56, 10.37it/s]7331] reward: -0.09, reward 100-step MA: 1.93, action: [-0.97202724], td-error: 0.0000: 92%|█████████▏| 7332/8000 [11:09<01:04, 10.29it/s]
Total episode reward: 94.5204475236. Finished in 83 steps.
[7501] reward: -0.09, reward 100-step MA: 1.95, action: [-0.94281501], td-error: 0.0000: 94%|█████████▍| 7502/8000 [11:26<00:49, 10.01it/s] [7413] reward: -0.08, reward 100-step MA: 1.94, action: [-0.87150162], td-error: 0.0000: 93%|█████████▎| 7414/8000 [11:17<00:57, 10.23it/s]
Total episode reward: 94.8214472012. Finished in 91 steps.
[7576] reward: -0.14, reward 100-step MA: 1.93, action: [-1.17773247], td-error: 0.0000: 95%|█████████▍| 7577/8000 [11:33<00:41, 10.25it/s][7505] reward: -0.08, reward 100-step MA: 1.94, action: [-0.90554327], td-error: 0.2872: 94%|█████████▍| 7506/8000 [11:26<00:48, 10.12it/s]
Total episode reward: 94.5517668356. Finished in 75 steps.
[7656] reward: 99.96, reward 100-step MA: 1.93, action: [ 0.62307554], td-error: 99.9433: 96%|█████████▌| 7657/8000 [11:41<00:33, 10.17it/s]7580] reward: -0.13, reward 100-step MA: 1.93, action: [-1.13226557], td-error: 0.0392: 95%|█████████▍| 7581/8000 [11:34<00:40, 10.26it/s]
Total episode reward: 94.608751134. Finished in 81 steps.
[7736] reward: 99.98, reward 100-step MA: 1.94, action: [ 0.49290159], td-error: 99.9680: 97%|█████████▋| 7737/8000 [11:49<00:25, 10.44it/s][7660] reward: -0.11, reward 100-step MA: 1.93, action: [-1.03042662], td-error: 0.0613: 96%|█████████▌| 7661/8000 [11:42<00:33, 10.21it/s]
Total episode reward: 94.1926933433. Finished in 80 steps.
[7814] reward: -0.05, reward 100-step MA: 1.94, action: [-0.73354656], td-error: 0.0000: 98%|█████████▊| 7815/8000 [11:57<00:18, 10.16it/s] [7740] reward: -0.10, reward 100-step MA: 1.93, action: [-0.9869296], td-error: 0.0000: 97%|█████████▋| 7741/8000 [11:49<00:24, 10.49it/s]
Total episode reward: 95.0124570819. Finished in 77 steps.
[7888] reward: -0.11, reward 100-step MA: 1.94, action: [-1.03100061], td-error: 0.1384: 99%|█████████▊| 7889/8000 [12:04<00:10, 10.14it/s][7818] reward: -0.07, reward 100-step MA: 1.94, action: [-0.83864552], td-error: 0.0000: 98%|█████████▊| 7819/8000 [11:57<00:17, 10.46it/s]
Total episode reward: 95.0429873847. Finished in 74 steps.
[7964] reward: -0.14, reward 100-step MA: 1.93, action: [-1.19227219], td-error: 0.7090: 100%|█████████▉| 7965/8000 [12:11<00:03, 10.25it/s][7892] reward: -0.10, reward 100-step MA: 1.94, action: [-1.01194441], td-error: 0.2942: 99%|█████████▊| 7893/8000 [12:04<00:10, 10.28it/s]
Total episode reward: 94.4267093404. Finished in 76 steps.
[7999] reward: -0.02, reward 100-step MA: 0.92, action: [-0.4674266], td-error: 0.0000: 100%|██████████| 8000/8000 [12:15<00:00, 10.88it/s] [7968] reward: -0.11, reward 100-step MA: 1.93, action: [-1.04532933], td-error: 0.3538: 100%|█████████▉| 7969/8000 [12:12<00:03, 10.10it/s]