('Episode: 1', 'Total reward: 14.0', 'Training loss: 1.3354', 'Explore P: 0.9986')
('Episode: 2', 'Total reward: 11.0', 'Training loss: 1.2615', 'Explore P: 0.9975')
('Episode: 3', 'Total reward: 10.0', 'Training loss: 1.2881', 'Explore P: 0.9965')
('Episode: 4', 'Total reward: 22.0', 'Training loss: 1.1207', 'Explore P: 0.9944')
('Episode: 5', 'Total reward: 22.0', 'Training loss: 1.1573', 'Explore P: 0.9922')
('Episode: 6', 'Total reward: 11.0', 'Training loss: 1.3152', 'Explore P: 0.9911')
('Episode: 7', 'Total reward: 13.0', 'Training loss: 1.1766', 'Explore P: 0.9899')
('Episode: 8', 'Total reward: 36.0', 'Training loss: 1.1244', 'Explore P: 0.9863')
('Episode: 9', 'Total reward: 14.0', 'Training loss: 1.3296', 'Explore P: 0.9850')
('Episode: 10', 'Total reward: 34.0', 'Training loss: 1.1735', 'Explore P: 0.9817')
('Episode: 11', 'Total reward: 74.0', 'Training loss: 1.2641', 'Explore P: 0.9745')
('Episode: 12', 'Total reward: 21.0', 'Training loss: 1.0279', 'Explore P: 0.9725')
('Episode: 13', 'Total reward: 24.0', 'Training loss: 1.1746', 'Explore P: 0.9702')
('Episode: 14', 'Total reward: 17.0', 'Training loss: 1.1140', 'Explore P: 0.9685')
('Episode: 15', 'Total reward: 17.0', 'Training loss: 1.4663', 'Explore P: 0.9669')
('Episode: 16', 'Total reward: 11.0', 'Training loss: 1.2109', 'Explore P: 0.9659')
('Episode: 17', 'Total reward: 55.0', 'Training loss: 2.5304', 'Explore P: 0.9606')
('Episode: 18', 'Total reward: 27.0', 'Training loss: 1.5479', 'Explore P: 0.9580')
('Episode: 19', 'Total reward: 15.0', 'Training loss: 1.0221', 'Explore P: 0.9566')
('Episode: 20', 'Total reward: 9.0', 'Training loss: 1.0150', 'Explore P: 0.9558')
('Episode: 21', 'Total reward: 30.0', 'Training loss: 1.2156', 'Explore P: 0.9529')
('Episode: 22', 'Total reward: 49.0', 'Training loss: 1.1872', 'Explore P: 0.9483')
('Episode: 23', 'Total reward: 41.0', 'Training loss: 1.3635', 'Explore P: 0.9445')
('Episode: 24', 'Total reward: 13.0', 'Training loss: 1.0574', 'Explore P: 0.9433')
('Episode: 25', 'Total reward: 35.0', 'Training loss: 1.0422', 'Explore P: 0.9400')
('Episode: 26', 'Total reward: 20.0', 'Training loss: 0.9315', 'Explore P: 0.9382')
('Episode: 27', 'Total reward: 11.0', 'Training loss: 1.5957', 'Explore P: 0.9371')
('Episode: 28', 'Total reward: 13.0', 'Training loss: 1.2928', 'Explore P: 0.9359')
('Episode: 29', 'Total reward: 38.0', 'Training loss: 1.1060', 'Explore P: 0.9324')
('Episode: 30', 'Total reward: 16.0', 'Training loss: 1.6828', 'Explore P: 0.9309')
('Episode: 31', 'Total reward: 27.0', 'Training loss: 1.1002', 'Explore P: 0.9285')
('Episode: 32', 'Total reward: 23.0', 'Training loss: 1.6576', 'Explore P: 0.9264')
('Episode: 33', 'Total reward: 24.0', 'Training loss: 1.1307', 'Explore P: 0.9242')
('Episode: 34', 'Total reward: 27.0', 'Training loss: 1.6552', 'Explore P: 0.9217')
('Episode: 35', 'Total reward: 62.0', 'Training loss: 1.5002', 'Explore P: 0.9161')
('Episode: 36', 'Total reward: 12.0', 'Training loss: 2.1145', 'Explore P: 0.9150')
('Episode: 37', 'Total reward: 17.0', 'Training loss: 2.0756', 'Explore P: 0.9134')
('Episode: 38', 'Total reward: 11.0', 'Training loss: 1.5009', 'Explore P: 0.9124')
('Episode: 39', 'Total reward: 29.0', 'Training loss: 2.0597', 'Explore P: 0.9098')
('Episode: 40', 'Total reward: 8.0', 'Training loss: 2.8411', 'Explore P: 0.9091')
('Episode: 41', 'Total reward: 19.0', 'Training loss: 2.1776', 'Explore P: 0.9074')
('Episode: 42', 'Total reward: 14.0', 'Training loss: 2.2898', 'Explore P: 0.9061')
('Episode: 43', 'Total reward: 15.0', 'Training loss: 2.5015', 'Explore P: 0.9048')
('Episode: 44', 'Total reward: 17.0', 'Training loss: 2.7332', 'Explore P: 0.9033')
('Episode: 45', 'Total reward: 10.0', 'Training loss: 3.4489', 'Explore P: 0.9024')
('Episode: 46', 'Total reward: 18.0', 'Training loss: 11.1051', 'Explore P: 0.9008')
('Episode: 47', 'Total reward: 12.0', 'Training loss: 3.5470', 'Explore P: 0.8997')
('Episode: 48', 'Total reward: 17.0', 'Training loss: 3.3059', 'Explore P: 0.8982')
('Episode: 49', 'Total reward: 31.0', 'Training loss: 3.0474', 'Explore P: 0.8955')
('Episode: 50', 'Total reward: 29.0', 'Training loss: 38.4936', 'Explore P: 0.8929')
('Episode: 51', 'Total reward: 44.0', 'Training loss: 2.6283', 'Explore P: 0.8890')
('Episode: 52', 'Total reward: 14.0', 'Training loss: 2.5245', 'Explore P: 0.8878')
('Episode: 53', 'Total reward: 17.0', 'Training loss: 2.7175', 'Explore P: 0.8863')
('Episode: 54', 'Total reward: 45.0', 'Training loss: 3.1457', 'Explore P: 0.8824')
('Episode: 55', 'Total reward: 11.0', 'Training loss: 2.7895', 'Explore P: 0.8814')
('Episode: 56', 'Total reward: 14.0', 'Training loss: 3.1660', 'Explore P: 0.8802')
('Episode: 57', 'Total reward: 14.0', 'Training loss: 3.5401', 'Explore P: 0.8790')
('Episode: 58', 'Total reward: 10.0', 'Training loss: 3.3772', 'Explore P: 0.8781')
('Episode: 59', 'Total reward: 22.0', 'Training loss: 24.8031', 'Explore P: 0.8762')
('Episode: 60', 'Total reward: 22.0', 'Training loss: 4.1782', 'Explore P: 0.8743')
('Episode: 61', 'Total reward: 8.0', 'Training loss: 3.3215', 'Explore P: 0.8736')
('Episode: 62', 'Total reward: 9.0', 'Training loss: 2.9034', 'Explore P: 0.8728')
('Episode: 63', 'Total reward: 17.0', 'Training loss: 3.2243', 'Explore P: 0.8714')
('Episode: 64', 'Total reward: 12.0', 'Training loss: 4.1688', 'Explore P: 0.8703')
('Episode: 65', 'Total reward: 9.0', 'Training loss: 4.2127', 'Explore P: 0.8695')
('Episode: 66', 'Total reward: 17.0', 'Training loss: 3.8544', 'Explore P: 0.8681')
('Episode: 67', 'Total reward: 18.0', 'Training loss: 2.5564', 'Explore P: 0.8665')
('Episode: 68', 'Total reward: 23.0', 'Training loss: 2.6813', 'Explore P: 0.8646')
('Episode: 69', 'Total reward: 11.0', 'Training loss: 3.5403', 'Explore P: 0.8636')
('Episode: 70', 'Total reward: 12.0', 'Training loss: 2.7131', 'Explore P: 0.8626')
('Episode: 71', 'Total reward: 16.0', 'Training loss: 46.8760', 'Explore P: 0.8612')
('Episode: 72', 'Total reward: 13.0', 'Training loss: 2.1305', 'Explore P: 0.8601')
('Episode: 73', 'Total reward: 18.0', 'Training loss: 46.8836', 'Explore P: 0.8586')
('Episode: 74', 'Total reward: 14.0', 'Training loss: 2.4349', 'Explore P: 0.8574')
('Episode: 75', 'Total reward: 15.0', 'Training loss: 366.7116', 'Explore P: 0.8562')
('Episode: 76', 'Total reward: 46.0', 'Training loss: 2.8052', 'Explore P: 0.8523')
('Episode: 77', 'Total reward: 24.0', 'Training loss: 4.2389', 'Explore P: 0.8503')
('Episode: 78', 'Total reward: 16.0', 'Training loss: 3.9073', 'Explore P: 0.8489')
('Episode: 79', 'Total reward: 28.0', 'Training loss: 78.5599', 'Explore P: 0.8466')
('Episode: 80', 'Total reward: 9.0', 'Training loss: 2.8071', 'Explore P: 0.8458')
('Episode: 81', 'Total reward: 29.0', 'Training loss: 3.7634', 'Explore P: 0.8434')
('Episode: 82', 'Total reward: 21.0', 'Training loss: 72.5087', 'Explore P: 0.8416')
('Episode: 83', 'Total reward: 17.0', 'Training loss: 3.7944', 'Explore P: 0.8402')
('Episode: 84', 'Total reward: 18.0', 'Training loss: 4.8664', 'Explore P: 0.8387')
('Episode: 85', 'Total reward: 12.0', 'Training loss: 3.3175', 'Explore P: 0.8377')
('Episode: 86', 'Total reward: 15.0', 'Training loss: 5.7755', 'Explore P: 0.8365')
('Episode: 87', 'Total reward: 21.0', 'Training loss: 4.4009', 'Explore P: 0.8348')
('Episode: 88', 'Total reward: 10.0', 'Training loss: 6.3343', 'Explore P: 0.8339')
('Episode: 89', 'Total reward: 18.0', 'Training loss: 4.7864', 'Explore P: 0.8325')
('Episode: 90', 'Total reward: 23.0', 'Training loss: 6.1287', 'Explore P: 0.8306')
('Episode: 91', 'Total reward: 23.0', 'Training loss: 6.1139', 'Explore P: 0.8287')
('Episode: 92', 'Total reward: 14.0', 'Training loss: 133.1346', 'Explore P: 0.8275')
('Episode: 93', 'Total reward: 12.0', 'Training loss: 3.2039', 'Explore P: 0.8266')
('Episode: 94', 'Total reward: 23.0', 'Training loss: 3.3615', 'Explore P: 0.8247')
('Episode: 95', 'Total reward: 19.0', 'Training loss: 4.3340', 'Explore P: 0.8231')
('Episode: 96', 'Total reward: 19.0', 'Training loss: 4.9146', 'Explore P: 0.8216')
('Episode: 97', 'Total reward: 9.0', 'Training loss: 4.0262', 'Explore P: 0.8209')
('Episode: 98', 'Total reward: 14.0', 'Training loss: 3.1591', 'Explore P: 0.8197')
('Episode: 99', 'Total reward: 19.0', 'Training loss: 4.7543', 'Explore P: 0.8182')
('Episode: 100', 'Total reward: 11.0', 'Training loss: 4.7512', 'Explore P: 0.8173')
('Episode: 101', 'Total reward: 12.0', 'Training loss: 5.3513', 'Explore P: 0.8163')
('Episode: 102', 'Total reward: 27.0', 'Training loss: 4.8479', 'Explore P: 0.8142')
('Episode: 103', 'Total reward: 25.0', 'Training loss: 4.6370', 'Explore P: 0.8122')
('Episode: 104', 'Total reward: 10.0', 'Training loss: 6.8069', 'Explore P: 0.8114')
('Episode: 105', 'Total reward: 35.0', 'Training loss: 5.3532', 'Explore P: 0.8086')
('Episode: 106', 'Total reward: 11.0', 'Training loss: 7.4405', 'Explore P: 0.8077')
('Episode: 107', 'Total reward: 21.0', 'Training loss: 6.2195', 'Explore P: 0.8060')
('Episode: 108', 'Total reward: 14.0', 'Training loss: 4.7760', 'Explore P: 0.8049')
('Episode: 109', 'Total reward: 16.0', 'Training loss: 106.5786', 'Explore P: 0.8036')
('Episode: 110', 'Total reward: 11.0', 'Training loss: 4.5310', 'Explore P: 0.8027')
('Episode: 111', 'Total reward: 9.0', 'Training loss: 80.9838', 'Explore P: 0.8020')
('Episode: 112', 'Total reward: 16.0', 'Training loss: 5.9511', 'Explore P: 0.8008')
('Episode: 113', 'Total reward: 19.0', 'Training loss: 4.4148', 'Explore P: 0.7993')
('Episode: 114', 'Total reward: 12.0', 'Training loss: 84.7038', 'Explore P: 0.7983')
('Episode: 115', 'Total reward: 11.0', 'Training loss: 112.8099', 'Explore P: 0.7975')
('Episode: 116', 'Total reward: 15.0', 'Training loss: 5.3757', 'Explore P: 0.7963')
('Episode: 117', 'Total reward: 21.0', 'Training loss: 5.7174', 'Explore P: 0.7946')
('Episode: 118', 'Total reward: 13.0', 'Training loss: 4.8574', 'Explore P: 0.7936')
('Episode: 119', 'Total reward: 20.0', 'Training loss: 3.8381', 'Explore P: 0.7920')
('Episode: 120', 'Total reward: 17.0', 'Training loss: 5.1802', 'Explore P: 0.7907')
('Episode: 121', 'Total reward: 8.0', 'Training loss: 88.1074', 'Explore P: 0.7901')
('Episode: 122', 'Total reward: 12.0', 'Training loss: 3.9001', 'Explore P: 0.7892')
('Episode: 123', 'Total reward: 67.0', 'Training loss: 4.1587', 'Explore P: 0.7839')
('Episode: 124', 'Total reward: 12.0', 'Training loss: 123.5244', 'Explore P: 0.7830')
('Episode: 125', 'Total reward: 29.0', 'Training loss: 6.6677', 'Explore P: 0.7808')
('Episode: 126', 'Total reward: 30.0', 'Training loss: 5.6174', 'Explore P: 0.7785')
('Episode: 127', 'Total reward: 19.0', 'Training loss: 5.0408', 'Explore P: 0.7770')
('Episode: 128', 'Total reward: 15.0', 'Training loss: 5.7497', 'Explore P: 0.7759')
('Episode: 129', 'Total reward: 11.0', 'Training loss: 199.0690', 'Explore P: 0.7750')
('Episode: 130', 'Total reward: 10.0', 'Training loss: 5.3443', 'Explore P: 0.7743')
('Episode: 131', 'Total reward: 37.0', 'Training loss: 5.9478', 'Explore P: 0.7714')
('Episode: 132', 'Total reward: 10.0', 'Training loss: 3.8117', 'Explore P: 0.7707')
('Episode: 133', 'Total reward: 11.0', 'Training loss: 3.7679', 'Explore P: 0.7698')
('Episode: 134', 'Total reward: 24.0', 'Training loss: 2.9758', 'Explore P: 0.7680')
('Episode: 135', 'Total reward: 15.0', 'Training loss: 3.0024', 'Explore P: 0.7669')
('Episode: 136', 'Total reward: 16.0', 'Training loss: 97.3766', 'Explore P: 0.7657')
('Episode: 137', 'Total reward: 17.0', 'Training loss: 3.5794', 'Explore P: 0.7644')
('Episode: 138', 'Total reward: 14.0', 'Training loss: 3.7807', 'Explore P: 0.7633')
('Episode: 139', 'Total reward: 16.0', 'Training loss: 4.0166', 'Explore P: 0.7621')
('Episode: 140', 'Total reward: 21.0', 'Training loss: 3.0394', 'Explore P: 0.7605')
('Episode: 141', 'Total reward: 12.0', 'Training loss: 3.0634', 'Explore P: 0.7596')
('Episode: 142', 'Total reward: 15.0', 'Training loss: 3.1577', 'Explore P: 0.7585')
('Episode: 143', 'Total reward: 11.0', 'Training loss: 2.7585', 'Explore P: 0.7577')
('Episode: 144', 'Total reward: 11.0', 'Training loss: 85.5526', 'Explore P: 0.7569')
('Episode: 145', 'Total reward: 67.0', 'Training loss: 2.5226', 'Explore P: 0.7519')
('Episode: 146', 'Total reward: 10.0', 'Training loss: 122.8458', 'Explore P: 0.7512')
('Episode: 147', 'Total reward: 9.0', 'Training loss: 1.4399', 'Explore P: 0.7505')
('Episode: 148', 'Total reward: 11.0', 'Training loss: 944.2547', 'Explore P: 0.7497')
('Episode: 149', 'Total reward: 20.0', 'Training loss: 1.3949', 'Explore P: 0.7482')
('Episode: 150', 'Total reward: 19.0', 'Training loss: 2.1447', 'Explore P: 0.7468')
('Episode: 151', 'Total reward: 42.0', 'Training loss: 1.4989', 'Explore P: 0.7437')
('Episode: 152', 'Total reward: 22.0', 'Training loss: 185.4185', 'Explore P: 0.7421')
('Episode: 153', 'Total reward: 26.0', 'Training loss: 248.6520', 'Explore P: 0.7402')
('Episode: 154', 'Total reward: 17.0', 'Training loss: 136.9839', 'Explore P: 0.7389')
('Episode: 155', 'Total reward: 38.0', 'Training loss: 242.8854', 'Explore P: 0.7362')
('Episode: 156', 'Total reward: 40.0', 'Training loss: 158.5295', 'Explore P: 0.7333')
('Episode: 157', 'Total reward: 10.0', 'Training loss: 2.3096', 'Explore P: 0.7326')
('Episode: 158', 'Total reward: 41.0', 'Training loss: 0.8155', 'Explore P: 0.7296')
('Episode: 159', 'Total reward: 17.0', 'Training loss: 148.0941', 'Explore P: 0.7284')
('Episode: 160', 'Total reward: 13.0', 'Training loss: 0.9545', 'Explore P: 0.7275')
('Episode: 161', 'Total reward: 36.0', 'Training loss: 145.7867', 'Explore P: 0.7249')
('Episode: 162', 'Total reward: 18.0', 'Training loss: 814.4630', 'Explore P: 0.7236')
('Episode: 163', 'Total reward: 22.0', 'Training loss: 1.4253', 'Explore P: 0.7220')
('Episode: 164', 'Total reward: 10.0', 'Training loss: 0.8967', 'Explore P: 0.7213')
('Episode: 165', 'Total reward: 7.0', 'Training loss: 112.2260', 'Explore P: 0.7208')
('Episode: 166', 'Total reward: 8.0', 'Training loss: 831.9282', 'Explore P: 0.7202')
('Episode: 167', 'Total reward: 17.0', 'Training loss: 0.8129', 'Explore P: 0.7190')
('Episode: 168', 'Total reward: 10.0', 'Training loss: 1.3742', 'Explore P: 0.7183')
('Episode: 169', 'Total reward: 18.0', 'Training loss: 151.2824', 'Explore P: 0.7171')
('Episode: 170', 'Total reward: 21.0', 'Training loss: 1.1473', 'Explore P: 0.7156')
('Episode: 171', 'Total reward: 23.0', 'Training loss: 1.3085', 'Explore P: 0.7139')
('Episode: 172', 'Total reward: 9.0', 'Training loss: 0.7314', 'Explore P: 0.7133')
('Episode: 173', 'Total reward: 14.0', 'Training loss: 0.7887', 'Explore P: 0.7123')
('Episode: 174', 'Total reward: 14.0', 'Training loss: 92.8841', 'Explore P: 0.7113')
('Episode: 175', 'Total reward: 11.0', 'Training loss: 162.8810', 'Explore P: 0.7106')
('Episode: 176', 'Total reward: 11.0', 'Training loss: 140.7751', 'Explore P: 0.7098')
('Episode: 177', 'Total reward: 35.0', 'Training loss: 1.1000', 'Explore P: 0.7074')
('Episode: 178', 'Total reward: 11.0', 'Training loss: 0.8916', 'Explore P: 0.7066')
('Episode: 179', 'Total reward: 22.0', 'Training loss: 63.5687', 'Explore P: 0.7051')
('Episode: 180', 'Total reward: 15.0', 'Training loss: 1.1569', 'Explore P: 0.7040')
('Episode: 181', 'Total reward: 23.0', 'Training loss: 1.5643', 'Explore P: 0.7024')
('Episode: 182', 'Total reward: 7.0', 'Training loss: 0.8610', 'Explore P: 0.7019')
('Episode: 183', 'Total reward: 23.0', 'Training loss: 784.5954', 'Explore P: 0.7004')
('Episode: 184', 'Total reward: 19.0', 'Training loss: 106.6684', 'Explore P: 0.6990')
('Episode: 185', 'Total reward: 14.0', 'Training loss: 0.6465', 'Explore P: 0.6981')
('Episode: 186', 'Total reward: 16.0', 'Training loss: 1.6593', 'Explore P: 0.6970')
('Episode: 187', 'Total reward: 20.0', 'Training loss: 137.3450', 'Explore P: 0.6956')
('Episode: 188', 'Total reward: 18.0', 'Training loss: 707.9849', 'Explore P: 0.6944')
('Episode: 189', 'Total reward: 12.0', 'Training loss: 283.1118', 'Explore P: 0.6936')
('Episode: 190', 'Total reward: 14.0', 'Training loss: 0.5509', 'Explore P: 0.6926')
('Episode: 191', 'Total reward: 10.0', 'Training loss: 676.3582', 'Explore P: 0.6919')
('Episode: 192', 'Total reward: 20.0', 'Training loss: 162.3590', 'Explore P: 0.6906')
('Episode: 193', 'Total reward: 46.0', 'Training loss: 1.1702', 'Explore P: 0.6874')
('Episode: 194', 'Total reward: 42.0', 'Training loss: 1.0797', 'Explore P: 0.6846')
('Episode: 195', 'Total reward: 51.0', 'Training loss: 1.3231', 'Explore P: 0.6812')
('Episode: 196', 'Total reward: 23.0', 'Training loss: 1.7784', 'Explore P: 0.6796')
('Episode: 197', 'Total reward: 16.0', 'Training loss: 655.3892', 'Explore P: 0.6785')
('Episode: 198', 'Total reward: 46.0', 'Training loss: 1.6682', 'Explore P: 0.6755')
('Episode: 199', 'Total reward: 21.0', 'Training loss: 1.1467', 'Explore P: 0.6741')
('Episode: 200', 'Total reward: 10.0', 'Training loss: 60.7564', 'Explore P: 0.6734')
('Episode: 201', 'Total reward: 9.0', 'Training loss: 1.5006', 'Explore P: 0.6728')
('Episode: 202', 'Total reward: 14.0', 'Training loss: 1.5805', 'Explore P: 0.6719')
('Episode: 203', 'Total reward: 9.0', 'Training loss: 1.0099', 'Explore P: 0.6713')
('Episode: 204', 'Total reward: 21.0', 'Training loss: 1.7271', 'Explore P: 0.6699')
('Episode: 205', 'Total reward: 23.0', 'Training loss: 1.2704', 'Explore P: 0.6684')
('Episode: 206', 'Total reward: 34.0', 'Training loss: 1.2930', 'Explore P: 0.6662')
('Episode: 207', 'Total reward: 54.0', 'Training loss: 0.5619', 'Explore P: 0.6626')
('Episode: 208', 'Total reward: 20.0', 'Training loss: 0.8378', 'Explore P: 0.6613')
('Episode: 209', 'Total reward: 51.0', 'Training loss: 1.0915', 'Explore P: 0.6580')
('Episode: 210', 'Total reward: 33.0', 'Training loss: 60.9021', 'Explore P: 0.6559')
('Episode: 211', 'Total reward: 13.0', 'Training loss: 1.4572', 'Explore P: 0.6550')
('Episode: 212', 'Total reward: 23.0', 'Training loss: 35.8581', 'Explore P: 0.6536')
('Episode: 213', 'Total reward: 10.0', 'Training loss: 2.5685', 'Explore P: 0.6529')
('Episode: 214', 'Total reward: 11.0', 'Training loss: 123.6346', 'Explore P: 0.6522')
('Episode: 215', 'Total reward: 15.0', 'Training loss: 2.1982', 'Explore P: 0.6512')
('Episode: 216', 'Total reward: 10.0', 'Training loss: 146.2321', 'Explore P: 0.6506')
('Episode: 217', 'Total reward: 23.0', 'Training loss: 2.3704', 'Explore P: 0.6491')
('Episode: 218', 'Total reward: 10.0', 'Training loss: 42.9966', 'Explore P: 0.6485')
('Episode: 219', 'Total reward: 38.0', 'Training loss: 1.8932', 'Explore P: 0.6461')
('Episode: 220', 'Total reward: 15.0', 'Training loss: 2.5832', 'Explore P: 0.6451')
('Episode: 221', 'Total reward: 9.0', 'Training loss: 1.7463', 'Explore P: 0.6445')
('Episode: 222', 'Total reward: 11.0', 'Training loss: 175.8271', 'Explore P: 0.6438')
('Episode: 223', 'Total reward: 24.0', 'Training loss: 2.0981', 'Explore P: 0.6423')
('Episode: 224', 'Total reward: 41.0', 'Training loss: 48.6865', 'Explore P: 0.6397')
('Episode: 225', 'Total reward: 20.0', 'Training loss: 1.8532', 'Explore P: 0.6385')
('Episode: 226', 'Total reward: 10.0', 'Training loss: 2.0073', 'Explore P: 0.6379')
('Episode: 227', 'Total reward: 9.0', 'Training loss: 39.4053', 'Explore P: 0.6373')
('Episode: 228', 'Total reward: 13.0', 'Training loss: 1.5703', 'Explore P: 0.6365')
('Episode: 229', 'Total reward: 13.0', 'Training loss: 1.4870', 'Explore P: 0.6357')
('Episode: 230', 'Total reward: 10.0', 'Training loss: 2.3620', 'Explore P: 0.6350')
('Episode: 231', 'Total reward: 9.0', 'Training loss: 145.5522', 'Explore P: 0.6345')
('Episode: 232', 'Total reward: 11.0', 'Training loss: 1.9543', 'Explore P: 0.6338')
('Episode: 233', 'Total reward: 12.0', 'Training loss: 32.0905', 'Explore P: 0.6330')
('Episode: 234', 'Total reward: 17.0', 'Training loss: 74.7153', 'Explore P: 0.6320')
('Episode: 235', 'Total reward: 16.0', 'Training loss: 1.8221', 'Explore P: 0.6310')
('Episode: 236', 'Total reward: 29.0', 'Training loss: 2.3786', 'Explore P: 0.6292')
('Episode: 237', 'Total reward: 23.0', 'Training loss: 1.7726', 'Explore P: 0.6278')
('Episode: 238', 'Total reward: 11.0', 'Training loss: 1.2302', 'Explore P: 0.6271')
('Episode: 239', 'Total reward: 9.0', 'Training loss: 2.2844', 'Explore P: 0.6265')
('Episode: 240', 'Total reward: 12.0', 'Training loss: 2.1337', 'Explore P: 0.6258')
('Episode: 241', 'Total reward: 9.0', 'Training loss: 2.0293', 'Explore P: 0.6252')
('Episode: 242', 'Total reward: 26.0', 'Training loss: 199.8405', 'Explore P: 0.6236')
('Episode: 243', 'Total reward: 52.0', 'Training loss: 176.0740', 'Explore P: 0.6205')
('Episode: 244', 'Total reward: 16.0', 'Training loss: 614.3672', 'Explore P: 0.6195')
('Episode: 245', 'Total reward: 28.0', 'Training loss: 1.9156', 'Explore P: 0.6178')
('Episode: 246', 'Total reward: 18.0', 'Training loss: 2.9568', 'Explore P: 0.6167')
('Episode: 247', 'Total reward: 13.0', 'Training loss: 3.2045', 'Explore P: 0.6159')
('Episode: 248', 'Total reward: 18.0', 'Training loss: 2.2705', 'Explore P: 0.6148')
('Episode: 249', 'Total reward: 11.0', 'Training loss: 1.8708', 'Explore P: 0.6141')
('Episode: 250', 'Total reward: 19.0', 'Training loss: 1.8822', 'Explore P: 0.6130')
('Episode: 251', 'Total reward: 25.0', 'Training loss: 3.5328', 'Explore P: 0.6115')
('Episode: 252', 'Total reward: 10.0', 'Training loss: 2.6698', 'Explore P: 0.6109')
('Episode: 253', 'Total reward: 13.0', 'Training loss: 1.5339', 'Explore P: 0.6101')
('Episode: 254', 'Total reward: 16.0', 'Training loss: 600.4520', 'Explore P: 0.6091')
('Episode: 255', 'Total reward: 16.0', 'Training loss: 1.1714', 'Explore P: 0.6082')
('Episode: 256', 'Total reward: 23.0', 'Training loss: 33.2517', 'Explore P: 0.6068')
('Episode: 257', 'Total reward: 11.0', 'Training loss: 2.5996', 'Explore P: 0.6062')
('Episode: 258', 'Total reward: 17.0', 'Training loss: 40.8801', 'Explore P: 0.6051')
('Episode: 259', 'Total reward: 14.0', 'Training loss: 121.7327', 'Explore P: 0.6043')
('Episode: 260', 'Total reward: 11.0', 'Training loss: 1.5197', 'Explore P: 0.6037')
('Episode: 261', 'Total reward: 26.0', 'Training loss: 128.0704', 'Explore P: 0.6021')
('Episode: 262', 'Total reward: 12.0', 'Training loss: 2.1123', 'Explore P: 0.6014')
('Episode: 263', 'Total reward: 30.0', 'Training loss: 170.8071', 'Explore P: 0.5996')
('Episode: 264', 'Total reward: 13.0', 'Training loss: 1.2525', 'Explore P: 0.5989')
('Episode: 265', 'Total reward: 11.0', 'Training loss: 1.8243', 'Explore P: 0.5982')
('Episode: 266', 'Total reward: 13.0', 'Training loss: 47.0908', 'Explore P: 0.5975')
('Episode: 267', 'Total reward: 14.0', 'Training loss: 31.5861', 'Explore P: 0.5966')
('Episode: 268', 'Total reward: 14.0', 'Training loss: 2.1602', 'Explore P: 0.5958')
('Episode: 269', 'Total reward: 13.0', 'Training loss: 1.0960', 'Explore P: 0.5951')
('Episode: 270', 'Total reward: 22.0', 'Training loss: 1.5637', 'Explore P: 0.5938')
('Episode: 271', 'Total reward: 14.0', 'Training loss: 1.5053', 'Explore P: 0.5930')
('Episode: 272', 'Total reward: 7.0', 'Training loss: 1.1773', 'Explore P: 0.5925')
('Episode: 273', 'Total reward: 13.0', 'Training loss: 1.2849', 'Explore P: 0.5918')
('Episode: 274', 'Total reward: 38.0', 'Training loss: 33.2253', 'Explore P: 0.5896')
('Episode: 275', 'Total reward: 30.0', 'Training loss: 61.9920', 'Explore P: 0.5878')
('Episode: 276', 'Total reward: 22.0', 'Training loss: 1.3305', 'Explore P: 0.5866')
('Episode: 277', 'Total reward: 22.0', 'Training loss: 43.9686', 'Explore P: 0.5853')
('Episode: 278', 'Total reward: 11.0', 'Training loss: 95.0974', 'Explore P: 0.5847')
('Episode: 279', 'Total reward: 10.0', 'Training loss: 47.7959', 'Explore P: 0.5841')
('Episode: 280', 'Total reward: 12.0', 'Training loss: 44.1316', 'Explore P: 0.5834')
('Episode: 281', 'Total reward: 18.0', 'Training loss: 1.7190', 'Explore P: 0.5824')
('Episode: 282', 'Total reward: 19.0', 'Training loss: 1.4724', 'Explore P: 0.5813')
('Episode: 283', 'Total reward: 19.0', 'Training loss: 1.3168', 'Explore P: 0.5802')
('Episode: 284', 'Total reward: 21.0', 'Training loss: 109.3901', 'Explore P: 0.5790')
('Episode: 285', 'Total reward: 17.0', 'Training loss: 1.9562', 'Explore P: 0.5780')
('Episode: 286', 'Total reward: 13.0', 'Training loss: 2.4552', 'Explore P: 0.5773')
('Episode: 287', 'Total reward: 15.0', 'Training loss: 1.2744', 'Explore P: 0.5765')
('Episode: 288', 'Total reward: 14.0', 'Training loss: 38.6516', 'Explore P: 0.5757')
('Episode: 289', 'Total reward: 10.0', 'Training loss: 0.9850', 'Explore P: 0.5751')
('Episode: 290', 'Total reward: 17.0', 'Training loss: 1.5636', 'Explore P: 0.5741')
('Episode: 291', 'Total reward: 14.0', 'Training loss: 31.3107', 'Explore P: 0.5734')
('Episode: 292', 'Total reward: 15.0', 'Training loss: 108.1185', 'Explore P: 0.5725')
('Episode: 293', 'Total reward: 24.0', 'Training loss: 93.1982', 'Explore P: 0.5712')
('Episode: 294', 'Total reward: 13.0', 'Training loss: 2.5192', 'Explore P: 0.5704')
('Episode: 295', 'Total reward: 18.0', 'Training loss: 1070.6215', 'Explore P: 0.5694')
('Episode: 296', 'Total reward: 12.0', 'Training loss: 1.3838', 'Explore P: 0.5688')
('Episode: 297', 'Total reward: 13.0', 'Training loss: 97.0579', 'Explore P: 0.5680')
('Episode: 298', 'Total reward: 19.0', 'Training loss: 26.4416', 'Explore P: 0.5670')
('Episode: 299', 'Total reward: 14.0', 'Training loss: 128.8008', 'Explore P: 0.5662')
('Episode: 300', 'Total reward: 12.0', 'Training loss: 96.5542', 'Explore P: 0.5655')
('Episode: 301', 'Total reward: 8.0', 'Training loss: 1.3873', 'Explore P: 0.5651')
('Episode: 302', 'Total reward: 11.0', 'Training loss: 503.2018', 'Explore P: 0.5645')
('Episode: 303', 'Total reward: 26.0', 'Training loss: 107.3717', 'Explore P: 0.5630')
('Episode: 304', 'Total reward: 11.0', 'Training loss: 2.0902', 'Explore P: 0.5624')
('Episode: 305', 'Total reward: 15.0', 'Training loss: 86.8954', 'Explore P: 0.5616')
('Episode: 306', 'Total reward: 21.0', 'Training loss: 79.2237', 'Explore P: 0.5604')
('Episode: 307', 'Total reward: 11.0', 'Training loss: 30.3690', 'Explore P: 0.5598')
('Episode: 308', 'Total reward: 8.0', 'Training loss: 26.4966', 'Explore P: 0.5594')
('Episode: 309', 'Total reward: 19.0', 'Training loss: 0.8817', 'Explore P: 0.5583')
('Episode: 310', 'Total reward: 42.0', 'Training loss: 66.6350', 'Explore P: 0.5560')
('Episode: 311', 'Total reward: 16.0', 'Training loss: 96.3863', 'Explore P: 0.5552')
('Episode: 312', 'Total reward: 12.0', 'Training loss: 1.6679', 'Explore P: 0.5545')
('Episode: 313', 'Total reward: 22.0', 'Training loss: 111.6868', 'Explore P: 0.5533')
('Episode: 314', 'Total reward: 10.0', 'Training loss: 2.0218', 'Explore P: 0.5528')
('Episode: 315', 'Total reward: 29.0', 'Training loss: 2.1940', 'Explore P: 0.5512')
('Episode: 316', 'Total reward: 13.0', 'Training loss: 1.4130', 'Explore P: 0.5505')
('Episode: 317', 'Total reward: 16.0', 'Training loss: 28.3571', 'Explore P: 0.5496')
('Episode: 318', 'Total reward: 8.0', 'Training loss: 2.0156', 'Explore P: 0.5492')
('Episode: 319', 'Total reward: 23.0', 'Training loss: 2.6229', 'Explore P: 0.5480')
('Episode: 320', 'Total reward: 14.0', 'Training loss: 1.7880', 'Explore P: 0.5472')
('Episode: 321', 'Total reward: 10.0', 'Training loss: 89.7186', 'Explore P: 0.5467')
('Episode: 322', 'Total reward: 27.0', 'Training loss: 1.0509', 'Explore P: 0.5452')
('Episode: 323', 'Total reward: 20.0', 'Training loss: 1.1363', 'Explore P: 0.5442')
('Episode: 324', 'Total reward: 12.0', 'Training loss: 25.5868', 'Explore P: 0.5435')
('Episode: 325', 'Total reward: 11.0', 'Training loss: 0.9542', 'Explore P: 0.5429')
('Episode: 326', 'Total reward: 17.0', 'Training loss: 1.1911', 'Explore P: 0.5420')
('Episode: 327', 'Total reward: 22.0', 'Training loss: 1.7246', 'Explore P: 0.5409')
('Episode: 328', 'Total reward: 85.0', 'Training loss: 1.3600', 'Explore P: 0.5364')
('Episode: 329', 'Total reward: 29.0', 'Training loss: 1.4741', 'Explore P: 0.5348')
('Episode: 330', 'Total reward: 67.0', 'Training loss: 59.8564', 'Explore P: 0.5313')
('Episode: 331', 'Total reward: 37.0', 'Training loss: 24.8771', 'Explore P: 0.5294')
('Episode: 332', 'Total reward: 18.0', 'Training loss: 92.3425', 'Explore P: 0.5285')
('Episode: 333', 'Total reward: 29.0', 'Training loss: 23.4815', 'Explore P: 0.5270')
('Episode: 334', 'Total reward: 33.0', 'Training loss: 104.2494', 'Explore P: 0.5253')
('Episode: 335', 'Total reward: 84.0', 'Training loss: 19.3987', 'Explore P: 0.5210')
('Episode: 336', 'Total reward: 23.0', 'Training loss: 59.2825', 'Explore P: 0.5198')
('Episode: 337', 'Total reward: 57.0', 'Training loss: 61.6551', 'Explore P: 0.5169')
('Episode: 338', 'Total reward: 52.0', 'Training loss: 11.4161', 'Explore P: 0.5143')
('Episode: 339', 'Total reward: 47.0', 'Training loss: 59.4008', 'Explore P: 0.5119')
('Episode: 340', 'Total reward: 59.0', 'Training loss: 21.4763', 'Explore P: 0.5089')
('Episode: 341', 'Total reward: 39.0', 'Training loss: 1.3342', 'Explore P: 0.5070')
('Episode: 342', 'Total reward: 78.0', 'Training loss: 1.8339', 'Explore P: 0.5031')
('Episode: 343', 'Total reward: 104.0', 'Training loss: 2.0050', 'Explore P: 0.4980')
('Episode: 344', 'Total reward: 41.0', 'Training loss: 1.3145', 'Explore P: 0.4960')
('Episode: 345', 'Total reward: 53.0', 'Training loss: 19.0929', 'Explore P: 0.4935')
('Episode: 346', 'Total reward: 27.0', 'Training loss: 45.4048', 'Explore P: 0.4922')
('Episode: 347', 'Total reward: 80.0', 'Training loss: 44.2982', 'Explore P: 0.4883')
('Episode: 348', 'Total reward: 49.0', 'Training loss: 2.1410', 'Explore P: 0.4860')
('Episode: 349', 'Total reward: 108.0', 'Training loss: 58.5255', 'Explore P: 0.4809')
('Episode: 350', 'Total reward: 45.0', 'Training loss: 1.6960', 'Explore P: 0.4788')
('Episode: 351', 'Total reward: 166.0', 'Training loss: 16.4372', 'Explore P: 0.4710')
('Episode: 352', 'Total reward: 49.0', 'Training loss: 40.7293', 'Explore P: 0.4688')
('Episode: 353', 'Total reward: 20.0', 'Training loss: 2.0647', 'Explore P: 0.4679')
('Episode: 354', 'Total reward: 125.0', 'Training loss: 51.0365', 'Explore P: 0.4622')
('Episode: 355', 'Total reward: 78.0', 'Training loss: 1.5964', 'Explore P: 0.4587')
('Episode: 356', 'Total reward: 30.0', 'Training loss: 370.3826', 'Explore P: 0.4573')
('Episode: 357', 'Total reward: 32.0', 'Training loss: 1.3290', 'Explore P: 0.4559')
('Episode: 358', 'Total reward: 12.0', 'Training loss: 8.0639', 'Explore P: 0.4554')
('Episode: 359', 'Total reward: 90.0', 'Training loss: 1.9868', 'Explore P: 0.4514')
('Episode: 360', 'Total reward: 47.0', 'Training loss: 1.8446', 'Explore P: 0.4493')
('Episode: 361', 'Total reward: 49.0', 'Training loss: 2.4528', 'Explore P: 0.4472')
('Episode: 362', 'Total reward: 39.0', 'Training loss: 0.8638', 'Explore P: 0.4455')
('Episode: 363', 'Total reward: 43.0', 'Training loss: 0.8637', 'Explore P: 0.4436')
('Episode: 364', 'Total reward: 60.0', 'Training loss: 1.6988', 'Explore P: 0.4410')
('Episode: 365', 'Total reward: 61.0', 'Training loss: 65.6957', 'Explore P: 0.4384')
('Episode: 366', 'Total reward: 101.0', 'Training loss: 36.6862', 'Explore P: 0.4341')
('Episode: 367', 'Total reward: 70.0', 'Training loss: 19.6712', 'Explore P: 0.4311')
('Episode: 368', 'Total reward: 70.0', 'Training loss: 2.1248', 'Explore P: 0.4282')
('Episode: 369', 'Total reward: 18.0', 'Training loss: 73.2337', 'Explore P: 0.4274')
('Episode: 370', 'Total reward: 30.0', 'Training loss: 31.2927', 'Explore P: 0.4262')
('Episode: 371', 'Total reward: 59.0', 'Training loss: 55.0269', 'Explore P: 0.4237')
('Episode: 372', 'Total reward: 24.0', 'Training loss: 2.2433', 'Explore P: 0.4227')
('Episode: 373', 'Total reward: 68.0', 'Training loss: 176.3735', 'Explore P: 0.4199')
('Episode: 374', 'Total reward: 79.0', 'Training loss: 1.4363', 'Explore P: 0.4167')
('Episode: 375', 'Total reward: 34.0', 'Training loss: 13.1078', 'Explore P: 0.4153')
('Episode: 376', 'Total reward: 53.0', 'Training loss: 22.0935', 'Explore P: 0.4132')
('Episode: 377', 'Total reward: 32.0', 'Training loss: 1.8319', 'Explore P: 0.4119')
('Episode: 378', 'Total reward: 34.0', 'Training loss: 1.8440', 'Explore P: 0.4105')
('Episode: 379', 'Total reward: 66.0', 'Training loss: 31.2358', 'Explore P: 0.4079')
('Episode: 380', 'Total reward: 29.0', 'Training loss: 1.7573', 'Explore P: 0.4067')
('Episode: 381', 'Total reward: 82.0', 'Training loss: 57.4010', 'Explore P: 0.4035')
('Episode: 382', 'Total reward: 46.0', 'Training loss: 42.5308', 'Explore P: 0.4017')
('Episode: 383', 'Total reward: 54.0', 'Training loss: 1.4211', 'Explore P: 0.3996')
('Episode: 384', 'Total reward: 69.0', 'Training loss: 1.8155', 'Explore P: 0.3969')
('Episode: 385', 'Total reward: 76.0', 'Training loss: 2.0400', 'Explore P: 0.3940')
('Episode: 386', 'Total reward: 50.0', 'Training loss: 74.7295', 'Explore P: 0.3921')
('Episode: 387', 'Total reward: 73.0', 'Training loss: 1.9034', 'Explore P: 0.3893')
('Episode: 388', 'Total reward: 26.0', 'Training loss: 103.2858', 'Explore P: 0.3883')
('Episode: 389', 'Total reward: 66.0', 'Training loss: 20.0982', 'Explore P: 0.3858')
('Episode: 390', 'Total reward: 47.0', 'Training loss: 1.8841', 'Explore P: 0.3841')
('Episode: 391', 'Total reward: 33.0', 'Training loss: 37.1798', 'Explore P: 0.3828')
('Episode: 392', 'Total reward: 51.0', 'Training loss: 78.6548', 'Explore P: 0.3809')
('Episode: 393', 'Total reward: 31.0', 'Training loss: 52.3942', 'Explore P: 0.3798')
('Episode: 394', 'Total reward: 74.0', 'Training loss: 13.5067', 'Explore P: 0.3771')
('Episode: 395', 'Total reward: 61.0', 'Training loss: 182.4726', 'Explore P: 0.3748')
('Episode: 396', 'Total reward: 43.0', 'Training loss: 41.4492', 'Explore P: 0.3733')
('Episode: 397', 'Total reward: 62.0', 'Training loss: 30.5537', 'Explore P: 0.3710')
('Episode: 398', 'Total reward: 88.0', 'Training loss: 43.2076', 'Explore P: 0.3678')
('Episode: 399', 'Total reward: 59.0', 'Training loss: 124.3097', 'Explore P: 0.3657')
('Episode: 400', 'Total reward: 63.0', 'Training loss: 3.5307', 'Explore P: 0.3635')
('Episode: 401', 'Total reward: 59.0', 'Training loss: 37.7500', 'Explore P: 0.3614')
('Episode: 402', 'Total reward: 45.0', 'Training loss: 58.2618', 'Explore P: 0.3599')
('Episode: 403', 'Total reward: 54.0', 'Training loss: 20.1496', 'Explore P: 0.3580')
('Episode: 404', 'Total reward: 70.0', 'Training loss: 2.3054', 'Explore P: 0.3555')
('Episode: 405', 'Total reward: 34.0', 'Training loss: 2.9857', 'Explore P: 0.3544')
('Episode: 406', 'Total reward: 59.0', 'Training loss: 60.9344', 'Explore P: 0.3523')
('Episode: 407', 'Total reward: 50.0', 'Training loss: 1.1797', 'Explore P: 0.3506')
('Episode: 408', 'Total reward: 95.0', 'Training loss: 3.5253', 'Explore P: 0.3474')
('Episode: 409', 'Total reward: 41.0', 'Training loss: 43.0695', 'Explore P: 0.3460')
('Episode: 410', 'Total reward: 112.0', 'Training loss: 8.4257', 'Explore P: 0.3423')
('Episode: 411', 'Total reward: 15.0', 'Training loss: 185.3367', 'Explore P: 0.3418')
('Episode: 412', 'Total reward: 43.0', 'Training loss: 3.0925', 'Explore P: 0.3404')
('Episode: 413', 'Total reward: 30.0', 'Training loss: 21.6198', 'Explore P: 0.3394')
('Episode: 414', 'Total reward: 44.0', 'Training loss: 9.0338', 'Explore P: 0.3379')
('Episode: 415', 'Total reward: 36.0', 'Training loss: 13.2735', 'Explore P: 0.3368')
('Episode: 416', 'Total reward: 66.0', 'Training loss: 1.6974', 'Explore P: 0.3346')
('Episode: 417', 'Total reward: 42.0', 'Training loss: 3.4244', 'Explore P: 0.3332')
('Episode: 418', 'Total reward: 46.0', 'Training loss: 1.6624', 'Explore P: 0.3318')
('Episode: 419', 'Total reward: 48.0', 'Training loss: 3.7789', 'Explore P: 0.3302')
('Episode: 420', 'Total reward: 66.0', 'Training loss: 165.3972', 'Explore P: 0.3281')
('Episode: 421', 'Total reward: 78.0', 'Training loss: 120.0487', 'Explore P: 0.3256')
('Episode: 422', 'Total reward: 39.0', 'Training loss: 73.0541', 'Explore P: 0.3244')
('Episode: 423', 'Total reward: 58.0', 'Training loss: 19.2774', 'Explore P: 0.3226')
('Episode: 424', 'Total reward: 64.0', 'Training loss: 0.7082', 'Explore P: 0.3206')
('Episode: 425', 'Total reward: 112.0', 'Training loss: 2.7562', 'Explore P: 0.3171')
('Episode: 426', 'Total reward: 135.0', 'Training loss: 1.7622', 'Explore P: 0.3130')
('Episode: 427', 'Total reward: 66.0', 'Training loss: 131.6906', 'Explore P: 0.3110')
('Episode: 428', 'Total reward: 53.0', 'Training loss: 27.2541', 'Explore P: 0.3094')
('Episode: 429', 'Total reward: 42.0', 'Training loss: 1.5742', 'Explore P: 0.3082')
('Episode: 430', 'Total reward: 44.0', 'Training loss: 4.0009', 'Explore P: 0.3069')
('Episode: 431', 'Total reward: 64.0', 'Training loss: 2.5791', 'Explore P: 0.3050')
('Episode: 432', 'Total reward: 114.0', 'Training loss: 2.8070', 'Explore P: 0.3016')
('Episode: 433', 'Total reward: 96.0', 'Training loss: 58.5695', 'Explore P: 0.2988')
('Episode: 434', 'Total reward: 62.0', 'Training loss: 3.7337', 'Explore P: 0.2971')
('Episode: 435', 'Total reward: 84.0', 'Training loss: 3.4556', 'Explore P: 0.2947')
('Episode: 436', 'Total reward: 61.0', 'Training loss: 1.8334', 'Explore P: 0.2929')
('Episode: 437', 'Total reward: 65.0', 'Training loss: 47.7905', 'Explore P: 0.2911')
('Episode: 438', 'Total reward: 58.0', 'Training loss: 2.3992', 'Explore P: 0.2895')
('Episode: 439', 'Total reward: 27.0', 'Training loss: 59.1510', 'Explore P: 0.2887')
('Episode: 440', 'Total reward: 94.0', 'Training loss: 1.8577', 'Explore P: 0.2861')
('Episode: 441', 'Total reward: 69.0', 'Training loss: 65.3774', 'Explore P: 0.2842')
('Episode: 442', 'Total reward: 62.0', 'Training loss: 47.2362', 'Explore P: 0.2825')
('Episode: 443', 'Total reward: 96.0', 'Training loss: 2.8464', 'Explore P: 0.2799')
('Episode: 444', 'Total reward: 47.0', 'Training loss: 29.9691', 'Explore P: 0.2786')
('Episode: 445', 'Total reward: 70.0', 'Training loss: 1.6416', 'Explore P: 0.2768')
('Episode: 446', 'Total reward: 46.0', 'Training loss: 2.4804', 'Explore P: 0.2756')
('Episode: 447', 'Total reward: 60.0', 'Training loss: 54.8693', 'Explore P: 0.2740')
('Episode: 448', 'Total reward: 71.0', 'Training loss: 3.5973', 'Explore P: 0.2721')
('Episode: 449', 'Total reward: 60.0', 'Training loss: 2.9581', 'Explore P: 0.2705')
('Episode: 450', 'Total reward: 52.0', 'Training loss: 1.8881', 'Explore P: 0.2692')
('Episode: 451', 'Total reward: 47.0', 'Training loss: 51.6402', 'Explore P: 0.2680')
('Episode: 452', 'Total reward: 57.0', 'Training loss: 15.3503', 'Explore P: 0.2665')
('Episode: 453', 'Total reward: 64.0', 'Training loss: 2.6170', 'Explore P: 0.2649')
('Episode: 454', 'Total reward: 57.0', 'Training loss: 2.9107', 'Explore P: 0.2634')
('Episode: 455', 'Total reward: 55.0', 'Training loss: 3.2154', 'Explore P: 0.2620')
('Episode: 456', 'Total reward: 53.0', 'Training loss: 10.1481', 'Explore P: 0.2607')
('Episode: 457', 'Total reward: 61.0', 'Training loss: 10.6657', 'Explore P: 0.2592')
('Episode: 458', 'Total reward: 51.0', 'Training loss: 2.4073', 'Explore P: 0.2579')
('Episode: 459', 'Total reward: 73.0', 'Training loss: 13.6499', 'Explore P: 0.2561')
('Episode: 460', 'Total reward: 40.0', 'Training loss: 78.9345', 'Explore P: 0.2551')
('Episode: 461', 'Total reward: 73.0', 'Training loss: 27.1916', 'Explore P: 0.2533')
('Episode: 462', 'Total reward: 66.0', 'Training loss: 18.5371', 'Explore P: 0.2517')
('Episode: 463', 'Total reward: 41.0', 'Training loss: 21.0087', 'Explore P: 0.2507')
('Episode: 464', 'Total reward: 27.0', 'Training loss: 5.9864', 'Explore P: 0.2501')
('Episode: 465', 'Total reward: 69.0', 'Training loss: 17.9483', 'Explore P: 0.2484')
('Episode: 466', 'Total reward: 39.0', 'Training loss: 0.7911', 'Explore P: 0.2475')
('Episode: 467', 'Total reward: 34.0', 'Training loss: 13.5040', 'Explore P: 0.2467')
('Episode: 468', 'Total reward: 14.0', 'Training loss: 16.5725', 'Explore P: 0.2464')
('Episode: 469', 'Total reward: 61.0', 'Training loss: 2.6459', 'Explore P: 0.2449')
('Episode: 470', 'Total reward: 32.0', 'Training loss: 2.0874', 'Explore P: 0.2442')
('Episode: 471', 'Total reward: 50.0', 'Training loss: 14.8788', 'Explore P: 0.2430')
('Episode: 472', 'Total reward: 61.0', 'Training loss: 71.4135', 'Explore P: 0.2416')
('Episode: 473', 'Total reward: 51.0', 'Training loss: 40.7287', 'Explore P: 0.2404')
('Episode: 474', 'Total reward: 49.0', 'Training loss: 1.9067', 'Explore P: 0.2393')
('Episode: 475', 'Total reward: 31.0', 'Training loss: 98.0407', 'Explore P: 0.2386')
('Episode: 476', 'Total reward: 71.0', 'Training loss: 2.5893', 'Explore P: 0.2370')
('Episode: 477', 'Total reward: 60.0', 'Training loss: 232.5318', 'Explore P: 0.2356')
('Episode: 478', 'Total reward: 38.0', 'Training loss: 1.7715', 'Explore P: 0.2348')
('Episode: 479', 'Total reward: 41.0', 'Training loss: 18.1259', 'Explore P: 0.2338')
('Episode: 480', 'Total reward: 65.0', 'Training loss: 1.2297', 'Explore P: 0.2324')
('Episode: 481', 'Total reward: 78.0', 'Training loss: 111.9050', 'Explore P: 0.2307')
('Episode: 482', 'Total reward: 45.0', 'Training loss: 1.9868', 'Explore P: 0.2297')
('Episode: 483', 'Total reward: 47.0', 'Training loss: 73.0530', 'Explore P: 0.2286')
('Episode: 484', 'Total reward: 108.0', 'Training loss: 0.5661', 'Explore P: 0.2263')
('Episode: 485', 'Total reward: 36.0', 'Training loss: 2.5150', 'Explore P: 0.2255')
('Episode: 486', 'Total reward: 62.0', 'Training loss: 1.2964', 'Explore P: 0.2242')
('Episode: 487', 'Total reward: 104.0', 'Training loss: 3.0185', 'Explore P: 0.2220')
('Episode: 488', 'Total reward: 119.0', 'Training loss: 28.2995', 'Explore P: 0.2195')
('Episode: 489', 'Total reward: 111.0', 'Training loss: 2.3287', 'Explore P: 0.2171')
('Episode: 490', 'Total reward: 125.0', 'Training loss: 2.7987', 'Explore P: 0.2146')
('Episode: 491', 'Total reward: 165.0', 'Training loss: 1.2480', 'Explore P: 0.2112')
('Episode: 492', 'Total reward: 152.0', 'Training loss: 25.4154', 'Explore P: 0.2082')
('Episode: 493', 'Total reward: 162.0', 'Training loss: 212.6568', 'Explore P: 0.2050')
('Episode: 494', 'Total reward: 185.0', 'Training loss: 1.8355', 'Explore P: 0.2014')
('Episode: 495', 'Total reward: 183.0', 'Training loss: 29.4973', 'Explore P: 0.1980')
('Episode: 496', 'Total reward: 176.0', 'Training loss: 0.9942', 'Explore P: 0.1947')
('Episode: 498', 'Total reward: 34.0', 'Training loss: 0.9085', 'Explore P: 0.1904')
('Episode: 501', 'Total reward: 198.0', 'Training loss: 0.5859', 'Explore P: 0.1799')
('Episode: 503', 'Total reward: 46.0', 'Training loss: 0.5220', 'Explore P: 0.1758')
('Episode: 506', 'Total reward: 198.0', 'Training loss: 1.7559', 'Explore P: 0.1662')
('Episode: 508', 'Total reward: 6.0', 'Training loss: 0.4824', 'Explore P: 0.1630')
('Episode: 511', 'Total reward: 26.0', 'Training loss: 1.3560', 'Explore P: 0.1566')
('Episode: 513', 'Total reward: 60.0', 'Training loss: 0.7747', 'Explore P: 0.1529')
('Episode: 514', 'Total reward: 188.0', 'Training loss: 0.7885', 'Explore P: 0.1502')
('Episode: 517', 'Total reward: 93.0', 'Training loss: 2.0137', 'Explore P: 0.1434')
('Episode: 519', 'Total reward: 61.0', 'Training loss: 0.7831', 'Explore P: 0.1400')
('Episode: 521', 'Total reward: 115.0', 'Training loss: 0.5284', 'Explore P: 0.1360')
('Episode: 523', 'Total reward: 69.0', 'Training loss: 0.6879', 'Explore P: 0.1326')
('Episode: 525', 'Total reward: 96.0', 'Training loss: 0.3543', 'Explore P: 0.1291')
('Episode: 526', 'Total reward: 198.0', 'Training loss: 1.0986', 'Explore P: 0.1267')
('Episode: 527', 'Total reward: 177.0', 'Training loss: 0.7019', 'Explore P: 0.1247')
('Episode: 528', 'Total reward: 168.0', 'Training loss: 1.2272', 'Explore P: 0.1228')
('Episode: 529', 'Total reward: 135.0', 'Training loss: 0.7767', 'Explore P: 0.1213')
('Episode: 530', 'Total reward: 130.0', 'Training loss: 1.2608', 'Explore P: 0.1198')
('Episode: 531', 'Total reward: 124.0', 'Training loss: 248.9004', 'Explore P: 0.1185')
('Episode: 532', 'Total reward: 160.0', 'Training loss: 1.5553', 'Explore P: 0.1167')
('Episode: 533', 'Total reward: 123.0', 'Training loss: 0.8362', 'Explore P: 0.1154')
('Episode: 534', 'Total reward: 108.0', 'Training loss: 2.1143', 'Explore P: 0.1143')
('Episode: 535', 'Total reward: 117.0', 'Training loss: 1.5347', 'Explore P: 0.1131')
('Episode: 536', 'Total reward: 112.0', 'Training loss: 154.1465', 'Explore P: 0.1119')
('Episode: 537', 'Total reward: 95.0', 'Training loss: 2.1717', 'Explore P: 0.1110')
('Episode: 538', 'Total reward: 108.0', 'Training loss: 1.1803', 'Explore P: 0.1099')
('Episode: 539', 'Total reward: 27.0', 'Training loss: 1.4408', 'Explore P: 0.1096')
('Episode: 540', 'Total reward: 31.0', 'Training loss: 1.8835', 'Explore P: 0.1093')
('Episode: 541', 'Total reward: 25.0', 'Training loss: 1.5436', 'Explore P: 0.1091')
('Episode: 542', 'Total reward: 71.0', 'Training loss: 1.0837', 'Explore P: 0.1084')
('Episode: 543', 'Total reward: 33.0', 'Training loss: 1.0288', 'Explore P: 0.1080')
('Episode: 544', 'Total reward: 24.0', 'Training loss: 1.7819', 'Explore P: 0.1078')
('Episode: 545', 'Total reward: 20.0', 'Training loss: 1.6577', 'Explore P: 0.1076')
('Episode: 546', 'Total reward: 25.0', 'Training loss: 3.5732', 'Explore P: 0.1074')
('Episode: 547', 'Total reward: 33.0', 'Training loss: 2.3142', 'Explore P: 0.1070')
('Episode: 548', 'Total reward: 41.0', 'Training loss: 1.7982', 'Explore P: 0.1067')
('Episode: 549', 'Total reward: 103.0', 'Training loss: 1.4368', 'Explore P: 0.1057')
('Episode: 550', 'Total reward: 120.0', 'Training loss: 2.5397', 'Explore P: 0.1045')
('Episode: 551', 'Total reward: 91.0', 'Training loss: 1.3019', 'Explore P: 0.1037')
('Episode: 552', 'Total reward: 116.0', 'Training loss: 728.8776', 'Explore P: 0.1026')
('Episode: 553', 'Total reward: 112.0', 'Training loss: 1.6713', 'Explore P: 0.1016')
('Episode: 554', 'Total reward: 26.0', 'Training loss: 1124.3153', 'Explore P: 0.1013')
('Episode: 555', 'Total reward: 33.0', 'Training loss: 1.9666', 'Explore P: 0.1010')
('Episode: 556', 'Total reward: 26.0', 'Training loss: 1.8586', 'Explore P: 0.1008')
('Episode: 557', 'Total reward: 25.0', 'Training loss: 2.0883', 'Explore P: 0.1006')
('Episode: 558', 'Total reward: 24.0', 'Training loss: 1.3518', 'Explore P: 0.1003')
('Episode: 559', 'Total reward: 30.0', 'Training loss: 140.0042', 'Explore P: 0.1001')
('Episode: 560', 'Total reward: 39.0', 'Training loss: 1.3568', 'Explore P: 0.0997')
('Episode: 561', 'Total reward: 50.0', 'Training loss: 2.5028', 'Explore P: 0.0993')
('Episode: 562', 'Total reward: 45.0', 'Training loss: 0.8105', 'Explore P: 0.0989')
('Episode: 563', 'Total reward: 121.0', 'Training loss: 1.4326', 'Explore P: 0.0978')
('Episode: 564', 'Total reward: 35.0', 'Training loss: 1.8010', 'Explore P: 0.0975')
('Episode: 565', 'Total reward: 22.0', 'Training loss: 2.6747', 'Explore P: 0.0973')
('Episode: 566', 'Total reward: 24.0', 'Training loss: 1.6915', 'Explore P: 0.0971')
('Episode: 567', 'Total reward: 29.0', 'Training loss: 2.1017', 'Explore P: 0.0968')
('Episode: 568', 'Total reward: 24.0', 'Training loss: 2.5097', 'Explore P: 0.0966')
('Episode: 569', 'Total reward: 31.0', 'Training loss: 0.8870', 'Explore P: 0.0964')
('Episode: 570', 'Total reward: 39.0', 'Training loss: 199.2448', 'Explore P: 0.0960')
('Episode: 571', 'Total reward: 36.0', 'Training loss: 1.5540', 'Explore P: 0.0957')
('Episode: 572', 'Total reward: 35.0', 'Training loss: 1.5280', 'Explore P: 0.0954')
('Episode: 573', 'Total reward: 35.0', 'Training loss: 3.1849', 'Explore P: 0.0951')
('Episode: 574', 'Total reward: 26.0', 'Training loss: 1.8312', 'Explore P: 0.0949')
('Episode: 575', 'Total reward: 22.0', 'Training loss: 570.2103', 'Explore P: 0.0947')
('Episode: 576', 'Total reward: 45.0', 'Training loss: 1.3452', 'Explore P: 0.0943')
('Episode: 577', 'Total reward: 109.0', 'Training loss: 0.9985', 'Explore P: 0.0934')
('Episode: 578', 'Total reward: 110.0', 'Training loss: 1.1849', 'Explore P: 0.0925')
('Episode: 579', 'Total reward: 52.0', 'Training loss: 2.5769', 'Explore P: 0.0921')
('Episode: 580', 'Total reward: 29.0', 'Training loss: 1.7514', 'Explore P: 0.0918')
('Episode: 581', 'Total reward: 30.0', 'Training loss: 907.0981', 'Explore P: 0.0916')
('Episode: 582', 'Total reward: 25.0', 'Training loss: 2.4207', 'Explore P: 0.0914')
('Episode: 583', 'Total reward: 41.0', 'Training loss: 394.4892', 'Explore P: 0.0911')
('Episode: 584', 'Total reward: 65.0', 'Training loss: 1.1335', 'Explore P: 0.0905')
('Episode: 585', 'Total reward: 50.0', 'Training loss: 1.6426', 'Explore P: 0.0901')
('Episode: 586', 'Total reward: 131.0', 'Training loss: 1.3362', 'Explore P: 0.0891')
('Episode: 587', 'Total reward: 88.0', 'Training loss: 2.2653', 'Explore P: 0.0884')
('Episode: 588', 'Total reward: 35.0', 'Training loss: 1.5294', 'Explore P: 0.0881')
('Episode: 589', 'Total reward: 82.0', 'Training loss: 1.6778', 'Explore P: 0.0875')
('Episode: 590', 'Total reward: 105.0', 'Training loss: 584.9018', 'Explore P: 0.0867')
('Episode: 591', 'Total reward: 137.0', 'Training loss: 1.1455', 'Explore P: 0.0856')
('Episode: 592', 'Total reward: 144.0', 'Training loss: 466.2288', 'Explore P: 0.0845')
('Episode: 593', 'Total reward: 132.0', 'Training loss: 0.7948', 'Explore P: 0.0836')
('Episode: 594', 'Total reward: 128.0', 'Training loss: 1.1540', 'Explore P: 0.0826')
('Episode: 595', 'Total reward: 126.0', 'Training loss: 1.3042', 'Explore P: 0.0817')
('Episode: 596', 'Total reward: 128.0', 'Training loss: 0.9304', 'Explore P: 0.0808')
('Episode: 597', 'Total reward: 121.0', 'Training loss: 1.0401', 'Explore P: 0.0800')
('Episode: 599', 'Total reward: 43.0', 'Training loss: 417.6943', 'Explore P: 0.0783')
('Episode: 600', 'Total reward: 131.0', 'Training loss: 0.5511', 'Explore P: 0.0774')
('Episode: 601', 'Total reward: 137.0', 'Training loss: 1.4016', 'Explore P: 0.0765')
('Episode: 602', 'Total reward: 118.0', 'Training loss: 0.7105', 'Explore P: 0.0757')
('Episode: 603', 'Total reward: 147.0', 'Training loss: 0.9455', 'Explore P: 0.0747')
('Episode: 605', 'Total reward: 58.0', 'Training loss: 1.3652', 'Explore P: 0.0731')
('Episode: 606', 'Total reward: 83.0', 'Training loss: 0.7413', 'Explore P: 0.0726')
('Episode: 607', 'Total reward: 117.0', 'Training loss: 339.7480', 'Explore P: 0.0718')
('Episode: 608', 'Total reward: 137.0', 'Training loss: 0.3403', 'Explore P: 0.0710')
('Episode: 611', 'Total reward: 129.0', 'Training loss: 0.5616', 'Explore P: 0.0679')
('Episode: 612', 'Total reward: 133.0', 'Training loss: 0.4743', 'Explore P: 0.0671')
('Episode: 613', 'Total reward: 175.0', 'Training loss: 0.5251', 'Explore P: 0.0661')
('Episode: 614', 'Total reward: 98.0', 'Training loss: 168.2797', 'Explore P: 0.0656')
('Episode: 615', 'Total reward: 130.0', 'Training loss: 0.1696', 'Explore P: 0.0648')
('Episode: 616', 'Total reward: 127.0', 'Training loss: 0.3654', 'Explore P: 0.0641')
('Episode: 617', 'Total reward: 111.0', 'Training loss: 0.8384', 'Explore P: 0.0635')
('Episode: 619', 'Total reward: 62.0', 'Training loss: 0.4047', 'Explore P: 0.0622')
('Episode: 620', 'Total reward: 183.0', 'Training loss: 98.4526', 'Explore P: 0.0612')
('Episode: 621', 'Total reward: 111.0', 'Training loss: 0.7241', 'Explore P: 0.0606')
('Episode: 622', 'Total reward: 128.0', 'Training loss: 0.4145', 'Explore P: 0.0600')
('Episode: 623', 'Total reward: 138.0', 'Training loss: 0.3983', 'Explore P: 0.0593')
('Episode: 624', 'Total reward: 181.0', 'Training loss: 60.5166', 'Explore P: 0.0584')
('Episode: 625', 'Total reward: 124.0', 'Training loss: 0.6887', 'Explore P: 0.0578')
('Episode: 627', 'Total reward: 117.0', 'Training loss: 0.5221', 'Explore P: 0.0563')
('Episode: 628', 'Total reward: 104.0', 'Training loss: 0.5083', 'Explore P: 0.0559')
('Episode: 629', 'Total reward: 190.0', 'Training loss: 0.6249', 'Explore P: 0.0550')
('Episode: 631', 'Total reward: 136.0', 'Training loss: 0.3329', 'Explore P: 0.0535')
('Episode: 639', 'Total reward: 59.0', 'Training loss: 0.4003', 'Explore P: 0.0476')
('Episode: 640', 'Total reward: 179.0', 'Training loss: 0.4963', 'Explore P: 0.0469')
('Episode: 641', 'Total reward: 177.0', 'Training loss: 0.6678', 'Explore P: 0.0463')
('Episode: 644', 'Total reward: 79.0', 'Training loss: 0.3278', 'Explore P: 0.0446')
('Episode: 645', 'Total reward: 173.0', 'Training loss: 0.4492', 'Explore P: 0.0440')
('Episode: 649', 'Total reward: 49.0', 'Training loss: 0.1229', 'Explore P: 0.0419')
('Episode: 650', 'Total reward: 152.0', 'Training loss: 0.2010', 'Explore P: 0.0414')
('Episode: 651', 'Total reward: 182.0', 'Training loss: 0.1729', 'Explore P: 0.0408')
('Episode: 654', 'Total reward: 143.0', 'Training loss: 0.1426', 'Explore P: 0.0392')
('Episode: 656', 'Total reward: 131.0', 'Training loss: 0.2659', 'Explore P: 0.0382')
('Episode: 658', 'Total reward: 123.0', 'Training loss: 0.4135', 'Explore P: 0.0373')
('Episode: 660', 'Total reward: 25.0', 'Training loss: 0.1983', 'Explore P: 0.0367')
('Episode: 662', 'Total reward: 83.0', 'Training loss: 0.3753', 'Explore P: 0.0360')
('Episode: 663', 'Total reward: 180.0', 'Training loss: 0.3161', 'Explore P: 0.0355')
('Episode: 664', 'Total reward: 125.0', 'Training loss: 0.3341', 'Explore P: 0.0352')
('Episode: 666', 'Total reward: 34.0', 'Training loss: 0.4683', 'Explore P: 0.0346')
('Episode: 667', 'Total reward: 159.0', 'Training loss: 0.4679', 'Explore P: 0.0342')
('Episode: 668', 'Total reward: 132.0', 'Training loss: 410.0340', 'Explore P: 0.0339')
('Episode: 670', 'Total reward: 143.0', 'Training loss: 0.6705', 'Explore P: 0.0331')
('Episode: 671', 'Total reward: 141.0', 'Training loss: 0.5838', 'Explore P: 0.0328')
('Episode: 672', 'Total reward: 124.0', 'Training loss: 0.7207', 'Explore P: 0.0325')
('Episode: 673', 'Total reward: 116.0', 'Training loss: 0.5406', 'Explore P: 0.0322')
('Episode: 674', 'Total reward: 125.0', 'Training loss: 0.7760', 'Explore P: 0.0320')
('Episode: 675', 'Total reward: 107.0', 'Training loss: 0.3803', 'Explore P: 0.0317')
('Episode: 676', 'Total reward: 132.0', 'Training loss: 0.5059', 'Explore P: 0.0315')
('Episode: 677', 'Total reward: 52.0', 'Training loss: 0.7145', 'Explore P: 0.0313')
('Episode: 678', 'Total reward: 53.0', 'Training loss: 0.7508', 'Explore P: 0.0312')
('Episode: 679', 'Total reward: 105.0', 'Training loss: 0.8373', 'Explore P: 0.0310')
('Episode: 680', 'Total reward: 109.0', 'Training loss: 0.5825', 'Explore P: 0.0308')
('Episode: 681', 'Total reward: 38.0', 'Training loss: 0.6950', 'Explore P: 0.0307')
('Episode: 682', 'Total reward: 78.0', 'Training loss: 0.7735', 'Explore P: 0.0305')
('Episode: 683', 'Total reward: 45.0', 'Training loss: 529.0507', 'Explore P: 0.0304')
('Episode: 684', 'Total reward: 43.0', 'Training loss: 0.6798', 'Explore P: 0.0304')
('Episode: 685', 'Total reward: 46.0', 'Training loss: 1.0038', 'Explore P: 0.0303')
('Episode: 686', 'Total reward: 61.0', 'Training loss: 0.6615', 'Explore P: 0.0301')
('Episode: 687', 'Total reward: 58.0', 'Training loss: 0.5814', 'Explore P: 0.0300')
('Episode: 688', 'Total reward: 41.0', 'Training loss: 0.7533', 'Explore P: 0.0299')
('Episode: 689', 'Total reward: 41.0', 'Training loss: 1.0502', 'Explore P: 0.0299')
('Episode: 690', 'Total reward: 48.0', 'Training loss: 148.0786', 'Explore P: 0.0298')
('Episode: 691', 'Total reward: 42.0', 'Training loss: 0.9133', 'Explore P: 0.0297')
('Episode: 692', 'Total reward: 28.0', 'Training loss: 0.9504', 'Explore P: 0.0296')
('Episode: 693', 'Total reward: 39.0', 'Training loss: 1.0191', 'Explore P: 0.0296')
('Episode: 694', 'Total reward: 36.0', 'Training loss: 1.1383', 'Explore P: 0.0295')
('Episode: 695', 'Total reward: 36.0', 'Training loss: 1.2371', 'Explore P: 0.0294')
('Episode: 696', 'Total reward: 38.0', 'Training loss: 1.2391', 'Explore P: 0.0293')
('Episode: 697', 'Total reward: 28.0', 'Training loss: 1.0012', 'Explore P: 0.0293')
('Episode: 698', 'Total reward: 29.0', 'Training loss: 0.8075', 'Explore P: 0.0292')
('Episode: 699', 'Total reward: 40.0', 'Training loss: 1.0288', 'Explore P: 0.0292')
('Episode: 700', 'Total reward: 34.0', 'Training loss: 1.4521', 'Explore P: 0.0291')
('Episode: 701', 'Total reward: 37.0', 'Training loss: 1.2089', 'Explore P: 0.0290')
('Episode: 702', 'Total reward: 27.0', 'Training loss: 2.2300', 'Explore P: 0.0290')
('Episode: 703', 'Total reward: 22.0', 'Training loss: 281.8775', 'Explore P: 0.0289')
('Episode: 704', 'Total reward: 35.0', 'Training loss: 1.3613', 'Explore P: 0.0289')
('Episode: 705', 'Total reward: 41.0', 'Training loss: 0.9388', 'Explore P: 0.0288')
('Episode: 706', 'Total reward: 35.0', 'Training loss: 1.4376', 'Explore P: 0.0287')
('Episode: 707', 'Total reward: 47.0', 'Training loss: 0.9755', 'Explore P: 0.0286')
('Episode: 708', 'Total reward: 30.0', 'Training loss: 1.1249', 'Explore P: 0.0286')
('Episode: 709', 'Total reward: 31.0', 'Training loss: 1.1601', 'Explore P: 0.0285')
('Episode: 710', 'Total reward: 34.0', 'Training loss: 0.8267', 'Explore P: 0.0285')
('Episode: 711', 'Total reward: 37.0', 'Training loss: 1.9893', 'Explore P: 0.0284')
('Episode: 712', 'Total reward: 27.0', 'Training loss: 2.0631', 'Explore P: 0.0283')
('Episode: 713', 'Total reward: 25.0', 'Training loss: 2.0964', 'Explore P: 0.0283')
('Episode: 714', 'Total reward: 33.0', 'Training loss: 1.4847', 'Explore P: 0.0282')
('Episode: 715', 'Total reward: 24.0', 'Training loss: 1.3476', 'Explore P: 0.0282')
('Episode: 716', 'Total reward: 23.0', 'Training loss: 2.0094', 'Explore P: 0.0281')
('Episode: 717', 'Total reward: 28.0', 'Training loss: 1.7652', 'Explore P: 0.0281')
('Episode: 718', 'Total reward: 37.0', 'Training loss: 0.8661', 'Explore P: 0.0280')
('Episode: 719', 'Total reward: 38.0', 'Training loss: 0.9912', 'Explore P: 0.0280')
('Episode: 720', 'Total reward: 44.0', 'Training loss: 0.9353', 'Explore P: 0.0279')
('Episode: 721', 'Total reward: 33.0', 'Training loss: 1841.9574', 'Explore P: 0.0278')
('Episode: 722', 'Total reward: 44.0', 'Training loss: 175.1404', 'Explore P: 0.0277')
('Episode: 723', 'Total reward: 107.0', 'Training loss: 0.9275', 'Explore P: 0.0276')
('Episode: 724', 'Total reward: 103.0', 'Training loss: 0.6488', 'Explore P: 0.0274')
('Episode: 725', 'Total reward: 118.0', 'Training loss: 0.5442', 'Explore P: 0.0272')
('Episode: 726', 'Total reward: 129.0', 'Training loss: 357.0566', 'Explore P: 0.0269')
('Episode: 727', 'Total reward: 117.0', 'Training loss: 0.5868', 'Explore P: 0.0268')
('Episode: 728', 'Total reward: 124.0', 'Training loss: 1.5604', 'Explore P: 0.0265')
('Episode: 730', 'Total reward: 48.0', 'Training loss: 279.4468', 'Explore P: 0.0261')
('Episode: 731', 'Total reward: 165.0', 'Training loss: 68.5160', 'Explore P: 0.0259')
('Episode: 732', 'Total reward: 195.0', 'Training loss: 0.9041', 'Explore P: 0.0256')
('Episode: 736', 'Total reward: 130.0', 'Training loss: 0.4251', 'Explore P: 0.0245')
('Episode: 739', 'Total reward: 73.0', 'Training loss: 0.5885', 'Explore P: 0.0238')
('Episode: 741', 'Total reward: 8.0', 'Training loss: 183.7896', 'Explore P: 0.0235')
('Episode: 743', 'Total reward: 181.0', 'Training loss: 0.5301', 'Explore P: 0.0230')
('Episode: 744', 'Total reward: 198.0', 'Training loss: 44.0226', 'Explore P: 0.0228')
('Episode: 748', 'Total reward: 186.0', 'Training loss: 0.5509', 'Explore P: 0.0218')
('Episode: 753', 'Total reward: 139.0', 'Training loss: 219.5010', 'Explore P: 0.0207')
('Episode: 755', 'Total reward: 146.0', 'Training loss: 0.9692', 'Explore P: 0.0204')
('Episode: 760', 'Total reward: 65.0', 'Training loss: 1.0978', 'Explore P: 0.0195')
('Episode: 763', 'Total reward: 173.0', 'Training loss: 169.2515', 'Explore P: 0.0190')
('Episode: 765', 'Total reward: 30.0', 'Training loss: 0.7446', 'Explore P: 0.0188')
('Episode: 768', 'Total reward: 152.0', 'Training loss: 0.8921', 'Explore P: 0.0183')
('Episode: 770', 'Total reward: 58.0', 'Training loss: 0.8910', 'Explore P: 0.0181')
('Episode: 771', 'Total reward: 129.0', 'Training loss: 0.7178', 'Explore P: 0.0180')
('Episode: 772', 'Total reward: 170.0', 'Training loss: 0.7428', 'Explore P: 0.0179')
('Episode: 773', 'Total reward: 128.0', 'Training loss: 1.0130', 'Explore P: 0.0178')
('Episode: 775', 'Total reward: 8.0', 'Training loss: 0.7021', 'Explore P: 0.0176')
('Episode: 776', 'Total reward: 136.0', 'Training loss: 0.9393', 'Explore P: 0.0175')
('Episode: 777', 'Total reward: 141.0', 'Training loss: 0.5124', 'Explore P: 0.0174')
('Episode: 778', 'Total reward: 176.0', 'Training loss: 134.4326', 'Explore P: 0.0173')
('Episode: 779', 'Total reward: 106.0', 'Training loss: 1.3043', 'Explore P: 0.0172')
('Episode: 780', 'Total reward: 99.0', 'Training loss: 0.6617', 'Explore P: 0.0171')
('Episode: 781', 'Total reward: 100.0', 'Training loss: 0.8844', 'Explore P: 0.0170')
('Episode: 782', 'Total reward: 99.0', 'Training loss: 0.9240', 'Explore P: 0.0170')
('Episode: 783', 'Total reward: 106.0', 'Training loss: 171.4286', 'Explore P: 0.0169')
('Episode: 784', 'Total reward: 91.0', 'Training loss: 1.2148', 'Explore P: 0.0168')
('Episode: 785', 'Total reward: 113.0', 'Training loss: 1.4495', 'Explore P: 0.0168')
('Episode: 786', 'Total reward: 84.0', 'Training loss: 0.2551', 'Explore P: 0.0167')
('Episode: 787', 'Total reward: 98.0', 'Training loss: 0.1729', 'Explore P: 0.0166')
('Episode: 788', 'Total reward: 111.0', 'Training loss: 0.4512', 'Explore P: 0.0166')
('Episode: 789', 'Total reward: 86.0', 'Training loss: 0.5685', 'Explore P: 0.0165')
('Episode: 790', 'Total reward: 105.0', 'Training loss: 0.2163', 'Explore P: 0.0164')
('Episode: 791', 'Total reward: 99.0', 'Training loss: 0.7048', 'Explore P: 0.0164')
('Episode: 792', 'Total reward: 94.0', 'Training loss: 0.5403', 'Explore P: 0.0163')
('Episode: 793', 'Total reward: 99.0', 'Training loss: 0.6430', 'Explore P: 0.0163')
('Episode: 794', 'Total reward: 94.0', 'Training loss: 0.3755', 'Explore P: 0.0162')
('Episode: 795', 'Total reward: 98.0', 'Training loss: 0.2549', 'Explore P: 0.0161')
('Episode: 796', 'Total reward: 112.0', 'Training loss: 0.3453', 'Explore P: 0.0161')
('Episode: 797', 'Total reward: 113.0', 'Training loss: 0.4073', 'Explore P: 0.0160')
('Episode: 798', 'Total reward: 122.0', 'Training loss: 0.1378', 'Explore P: 0.0159')
('Episode: 800', 'Total reward: 85.0', 'Training loss: 0.1234', 'Explore P: 0.0158')
('Episode: 801', 'Total reward: 178.0', 'Training loss: 0.3336', 'Explore P: 0.0157')
('Episode: 802', 'Total reward: 121.0', 'Training loss: 0.2850', 'Explore P: 0.0156')
('Episode: 804', 'Total reward: 14.0', 'Training loss: 0.1361', 'Explore P: 0.0155')
('Episode: 805', 'Total reward: 106.0', 'Training loss: 0.2345', 'Explore P: 0.0154')
('Episode: 807', 'Total reward: 36.0', 'Training loss: 0.3448', 'Explore P: 0.0153')
('Episode: 812', 'Total reward: 14.0', 'Training loss: 0.1817', 'Explore P: 0.0149')
('Episode: 813', 'Total reward: 145.0', 'Training loss: 0.2932', 'Explore P: 0.0148')
('Episode: 820', 'Total reward: 94.0', 'Training loss: 0.1988', 'Explore P: 0.0142')
('Episode: 825', 'Total reward: 29.0', 'Training loss: 0.1141', 'Explore P: 0.0139')
('Episode: 830', 'Total reward: 44.0', 'Training loss: 0.1292', 'Explore P: 0.0136')
('Episode: 835', 'Total reward: 183.0', 'Training loss: 0.6262', 'Explore P: 0.0132')
('Episode: 839', 'Total reward: 161.0', 'Training loss: 0.3034', 'Explore P: 0.0130')
('Episode: 840', 'Total reward: 138.0', 'Training loss: 1.3792', 'Explore P: 0.0130')
('Episode: 842', 'Total reward: 38.0', 'Training loss: 0.2481', 'Explore P: 0.0129')
('Episode: 843', 'Total reward: 180.0', 'Training loss: 0.1293', 'Explore P: 0.0128')
('Episode: 846', 'Total reward: 57.0', 'Training loss: 0.1865', 'Explore P: 0.0127')
('Episode: 849', 'Total reward: 33.0', 'Training loss: 0.0685', 'Explore P: 0.0126')
('Episode: 851', 'Total reward: 130.0', 'Training loss: 0.2696', 'Explore P: 0.0125')
('Episode: 853', 'Total reward: 76.0', 'Training loss: 0.2397', 'Explore P: 0.0124')
('Episode: 855', 'Total reward: 58.0', 'Training loss: 0.0723', 'Explore P: 0.0124')
('Episode: 856', 'Total reward: 192.0', 'Training loss: 0.1939', 'Explore P: 0.0123')
('Episode: 857', 'Total reward: 152.0', 'Training loss: 0.6102', 'Explore P: 0.0123')
('Episode: 858', 'Total reward: 21.0', 'Training loss: 0.4827', 'Explore P: 0.0123')
('Episode: 859', 'Total reward: 19.0', 'Training loss: 0.7239', 'Explore P: 0.0123')
('Episode: 860', 'Total reward: 20.0', 'Training loss: 0.4178', 'Explore P: 0.0123')
('Episode: 861', 'Total reward: 15.0', 'Training loss: 0.6646', 'Explore P: 0.0123')
('Episode: 862', 'Total reward: 15.0', 'Training loss: 1.0301', 'Explore P: 0.0123')
('Episode: 863', 'Total reward: 18.0', 'Training loss: 0.9256', 'Explore P: 0.0123')
('Episode: 864', 'Total reward: 12.0', 'Training loss: 0.8893', 'Explore P: 0.0123')
('Episode: 865', 'Total reward: 16.0', 'Training loss: 1.2400', 'Explore P: 0.0123')
('Episode: 866', 'Total reward: 14.0', 'Training loss: 0.8475', 'Explore P: 0.0123')
('Episode: 867', 'Total reward: 11.0', 'Training loss: 0.9204', 'Explore P: 0.0123')
('Episode: 868', 'Total reward: 12.0', 'Training loss: 1.1716', 'Explore P: 0.0123')
('Episode: 869', 'Total reward: 13.0', 'Training loss: 1.2224', 'Explore P: 0.0123')
('Episode: 870', 'Total reward: 16.0', 'Training loss: 0.8077', 'Explore P: 0.0123')
('Episode: 871', 'Total reward: 12.0', 'Training loss: 1.1141', 'Explore P: 0.0123')
('Episode: 872', 'Total reward: 11.0', 'Training loss: 1.2642', 'Explore P: 0.0122')
('Episode: 873', 'Total reward: 10.0', 'Training loss: 1.9699', 'Explore P: 0.0122')
('Episode: 874', 'Total reward: 13.0', 'Training loss: 2.6408', 'Explore P: 0.0122')
('Episode: 875', 'Total reward: 13.0', 'Training loss: 1383.2507', 'Explore P: 0.0122')
('Episode: 876', 'Total reward: 13.0', 'Training loss: 2.0283', 'Explore P: 0.0122')
('Episode: 877', 'Total reward: 16.0', 'Training loss: 0.7609', 'Explore P: 0.0122')
('Episode: 878', 'Total reward: 11.0', 'Training loss: 1.7408', 'Explore P: 0.0122')
('Episode: 879', 'Total reward: 15.0', 'Training loss: 0.7078', 'Explore P: 0.0122')
('Episode: 880', 'Total reward: 14.0', 'Training loss: 0.9305', 'Explore P: 0.0122')
('Episode: 881', 'Total reward: 14.0', 'Training loss: 1.2251', 'Explore P: 0.0122')
('Episode: 882', 'Total reward: 11.0', 'Training loss: 0.6531', 'Explore P: 0.0122')
('Episode: 883', 'Total reward: 13.0', 'Training loss: 0.5078', 'Explore P: 0.0122')
('Episode: 884', 'Total reward: 15.0', 'Training loss: 0.3386', 'Explore P: 0.0122')
('Episode: 885', 'Total reward: 13.0', 'Training loss: 0.5196', 'Explore P: 0.0122')
('Episode: 886', 'Total reward: 17.0', 'Training loss: 0.5373', 'Explore P: 0.0122')
('Episode: 887', 'Total reward: 13.0', 'Training loss: 1342.9104', 'Explore P: 0.0122')
('Episode: 888', 'Total reward: 14.0', 'Training loss: 62.6485', 'Explore P: 0.0122')
('Episode: 889', 'Total reward: 16.0', 'Training loss: 1.4386', 'Explore P: 0.0122')
('Episode: 893', 'Total reward: 14.0', 'Training loss: 2.0764', 'Explore P: 0.0121')
('Episode: 894', 'Total reward: 29.0', 'Training loss: 2.0915', 'Explore P: 0.0121')
('Episode: 895', 'Total reward: 14.0', 'Training loss: 3.0212', 'Explore P: 0.0121')
('Episode: 896', 'Total reward: 15.0', 'Training loss: 1.9122', 'Explore P: 0.0121')
('Episode: 897', 'Total reward: 12.0', 'Training loss: 2.0551', 'Explore P: 0.0121')
('Episode: 898', 'Total reward: 15.0', 'Training loss: 35.7796', 'Explore P: 0.0120')
('Episode: 899', 'Total reward: 13.0', 'Training loss: 1.5189', 'Explore P: 0.0120')
('Episode: 900', 'Total reward: 12.0', 'Training loss: 2.1885', 'Explore P: 0.0120')
('Episode: 901', 'Total reward: 18.0', 'Training loss: 1.7466', 'Explore P: 0.0120')
('Episode: 902', 'Total reward: 11.0', 'Training loss: 2.4788', 'Explore P: 0.0120')
('Episode: 903', 'Total reward: 15.0', 'Training loss: 3.0681', 'Explore P: 0.0120')
('Episode: 904', 'Total reward: 11.0', 'Training loss: 2.8713', 'Explore P: 0.0120')
('Episode: 905', 'Total reward: 11.0', 'Training loss: 560.2599', 'Explore P: 0.0120')
('Episode: 906', 'Total reward: 13.0', 'Training loss: 679.8275', 'Explore P: 0.0120')
('Episode: 907', 'Total reward: 13.0', 'Training loss: 1.8717', 'Explore P: 0.0120')
('Episode: 908', 'Total reward: 12.0', 'Training loss: 5.3081', 'Explore P: 0.0120')
('Episode: 909', 'Total reward: 11.0', 'Training loss: 2.7723', 'Explore P: 0.0120')
('Episode: 910', 'Total reward: 12.0', 'Training loss: 2.4288', 'Explore P: 0.0120')
('Episode: 911', 'Total reward: 12.0', 'Training loss: 4.5259', 'Explore P: 0.0120')
('Episode: 912', 'Total reward: 8.0', 'Training loss: 7.2582', 'Explore P: 0.0120')
('Episode: 913', 'Total reward: 8.0', 'Training loss: 5.7660', 'Explore P: 0.0120')
('Episode: 914', 'Total reward: 9.0', 'Training loss: 2.6767', 'Explore P: 0.0120')
('Episode: 915', 'Total reward: 10.0', 'Training loss: 3.7783', 'Explore P: 0.0120')
('Episode: 916', 'Total reward: 9.0', 'Training loss: 2.3294', 'Explore P: 0.0120')
('Episode: 917', 'Total reward: 12.0', 'Training loss: 3.4652', 'Explore P: 0.0120')
('Episode: 918', 'Total reward: 11.0', 'Training loss: 3.6515', 'Explore P: 0.0120')
('Episode: 919', 'Total reward: 9.0', 'Training loss: 2.7800', 'Explore P: 0.0120')
('Episode: 920', 'Total reward: 9.0', 'Training loss: 3.6526', 'Explore P: 0.0120')
('Episode: 921', 'Total reward: 11.0', 'Training loss: 4.9908', 'Explore P: 0.0120')
('Episode: 922', 'Total reward: 10.0', 'Training loss: 3.2693', 'Explore P: 0.0120')
('Episode: 923', 'Total reward: 8.0', 'Training loss: 6.1815', 'Explore P: 0.0120')
('Episode: 924', 'Total reward: 10.0', 'Training loss: 5.5296', 'Explore P: 0.0120')
('Episode: 925', 'Total reward: 12.0', 'Training loss: 4.7362', 'Explore P: 0.0120')
('Episode: 926', 'Total reward: 7.0', 'Training loss: 4.9608', 'Explore P: 0.0120')
('Episode: 927', 'Total reward: 8.0', 'Training loss: 5.3386', 'Explore P: 0.0120')
('Episode: 928', 'Total reward: 8.0', 'Training loss: 9.3868', 'Explore P: 0.0120')
('Episode: 929', 'Total reward: 9.0', 'Training loss: 703.4950', 'Explore P: 0.0120')
('Episode: 930', 'Total reward: 12.0', 'Training loss: 7.0237', 'Explore P: 0.0120')
('Episode: 931', 'Total reward: 10.0', 'Training loss: 9.2183', 'Explore P: 0.0120')
('Episode: 932', 'Total reward: 8.0', 'Training loss: 4.4216', 'Explore P: 0.0120')
('Episode: 933', 'Total reward: 11.0', 'Training loss: 107.9203', 'Explore P: 0.0120')
('Episode: 934', 'Total reward: 12.0', 'Training loss: 3.8671', 'Explore P: 0.0120')
('Episode: 935', 'Total reward: 13.0', 'Training loss: 3.8570', 'Explore P: 0.0120')
('Episode: 936', 'Total reward: 13.0', 'Training loss: 977.7872', 'Explore P: 0.0120')
('Episode: 937', 'Total reward: 14.0', 'Training loss: 3.8177', 'Explore P: 0.0120')
('Episode: 938', 'Total reward: 14.0', 'Training loss: 1.2815', 'Explore P: 0.0120')
('Episode: 939', 'Total reward: 11.0', 'Training loss: 2.1504', 'Explore P: 0.0120')
('Episode: 940', 'Total reward: 9.0', 'Training loss: 1.1685', 'Explore P: 0.0120')
('Episode: 941', 'Total reward: 9.0', 'Training loss: 4.3363', 'Explore P: 0.0120')
('Episode: 942', 'Total reward: 10.0', 'Training loss: 3.3314', 'Explore P: 0.0120')
('Episode: 943', 'Total reward: 10.0', 'Training loss: 4.0235', 'Explore P: 0.0120')
('Episode: 944', 'Total reward: 9.0', 'Training loss: 5.3680', 'Explore P: 0.0120')
('Episode: 945', 'Total reward: 10.0', 'Training loss: 2.3471', 'Explore P: 0.0119')
('Episode: 946', 'Total reward: 12.0', 'Training loss: 15.2016', 'Explore P: 0.0119')
('Episode: 947', 'Total reward: 9.0', 'Training loss: 4.2687', 'Explore P: 0.0119')
('Episode: 948', 'Total reward: 14.0', 'Training loss: 1.4161', 'Explore P: 0.0119')
('Episode: 949', 'Total reward: 10.0', 'Training loss: 1.2985', 'Explore P: 0.0119')
('Episode: 950', 'Total reward: 13.0', 'Training loss: 1.5685', 'Explore P: 0.0119')
('Episode: 951', 'Total reward: 14.0', 'Training loss: 5.5542', 'Explore P: 0.0119')
('Episode: 952', 'Total reward: 15.0', 'Training loss: 2.0390', 'Explore P: 0.0119')
('Episode: 953', 'Total reward: 12.0', 'Training loss: 1065.4077', 'Explore P: 0.0119')
('Episode: 954', 'Total reward: 13.0', 'Training loss: 2.5117', 'Explore P: 0.0119')
('Episode: 955', 'Total reward: 16.0', 'Training loss: 4.4293', 'Explore P: 0.0119')
('Episode: 956', 'Total reward: 17.0', 'Training loss: 1.8029', 'Explore P: 0.0119')
('Episode: 957', 'Total reward: 19.0', 'Training loss: 8.0507', 'Explore P: 0.0119')
('Episode: 958', 'Total reward: 23.0', 'Training loss: 6.8067', 'Explore P: 0.0119')
('Episode: 959', 'Total reward: 26.0', 'Training loss: 4.0980', 'Explore P: 0.0119')
('Episode: 960', 'Total reward: 22.0', 'Training loss: 2.7337', 'Explore P: 0.0119')
('Episode: 961', 'Total reward: 22.0', 'Training loss: 8.5029', 'Explore P: 0.0119')
('Episode: 962', 'Total reward: 34.0', 'Training loss: 3.4243', 'Explore P: 0.0119')
('Episode: 963', 'Total reward: 63.0', 'Training loss: 3.2597', 'Explore P: 0.0119')
('Episode: 964', 'Total reward: 64.0', 'Training loss: 3.5446', 'Explore P: 0.0119')
('Episode: 965', 'Total reward: 182.0', 'Training loss: 240.3313', 'Explore P: 0.0118')
('Episode: 967', 'Total reward: 200.0', 'Training loss: 1.8179', 'Explore P: 0.0118')
('Episode: 969', 'Total reward: 73.0', 'Training loss: 4.2799', 'Explore P: 0.0117')
('Episode: 970', 'Total reward: 165.0', 'Training loss: 3.7584', 'Explore P: 0.0117')
('Episode: 971', 'Total reward: 163.0', 'Training loss: 5.6516', 'Explore P: 0.0117')
('Episode: 973', 'Total reward: 9.0', 'Training loss: 23.1647', 'Explore P: 0.0116')
('Episode: 974', 'Total reward: 193.0', 'Training loss: 5.7831', 'Explore P: 0.0116')
('Episode: 975', 'Total reward: 174.0', 'Training loss: 6.3897', 'Explore P: 0.0116')
('Episode: 976', 'Total reward: 197.0', 'Training loss: 6.9634', 'Explore P: 0.0115')
('Episode: 978', 'Total reward: 57.0', 'Training loss: 7.4369', 'Explore P: 0.0115')
('Episode: 980', 'Total reward: 32.0', 'Training loss: 6.2683', 'Explore P: 0.0115')
('Episode: 982', 'Total reward: 38.0', 'Training loss: 9.1342', 'Explore P: 0.0114')
('Episode: 984', 'Total reward: 18.0', 'Training loss: 6.0932', 'Explore P: 0.0114')
('Episode: 985', 'Total reward: 186.0', 'Training loss: 60.7453', 'Explore P: 0.0114')
('Episode: 987', 'Total reward: 37.0', 'Training loss: 6.8720', 'Explore P: 0.0113')
('Episode: 989', 'Total reward: 108.0', 'Training loss: 5.4848', 'Explore P: 0.0113')
('Episode: 991', 'Total reward: 138.0', 'Training loss: 20.9855', 'Explore P: 0.0113')
('Episode: 993', 'Total reward: 107.0', 'Training loss: 6.3520', 'Explore P: 0.0112')
('Episode: 995', 'Total reward: 128.0', 'Training loss: 4.3546', 'Explore P: 0.0112')
('Episode: 997', 'Total reward: 128.0', 'Training loss: 2.1486', 'Explore P: 0.0111')
('Episode: 999', 'Total reward: 156.0', 'Training loss: 314.9513', 'Explore P: 0.0111')