In [2]:
%pylab inline
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
In [8]:
bad = [0, 10, 20, 30, 40, 50, 59, 69, 78, 88, 98, 108, 118, 127, 137, 146, 154, 163, 171, 180, 189, 198, 208, 218, 228, 237, 247, 256, 265, 274, 282, 292, 300, 310, 319, 329, 338, 347, 357, 367, 376, 386, 396, 406, 415, 425, 434, 444, 453, 463, 473, 483, 493, 503, 512, 520, 529, 538, 548, 558, 567, 577, 586, 595, 604, 614, 622, 632, 641, 651, 660, 669, 679, 689, 699, 708, 718, 727, 737, 746, 756, 766, 775, 784, 794, 804, 814, 824, 834, 844, 853, 863, 873, 883, 893, 903, 913, 923, 932, 942]
good = [0, 9, 15, 26, 40, 55, 69, 86, 104, 121, 141, 159, 178, 195, 213,
233, 251, 271, 291, 311, 330, 347, 365, 385, 405, 425, 445, 465,
485, 503, 523, 543, 563, 580, 599, 619, 639, 658, 678, 698, 718,
738, 758, 778, 797, 817, 837, 857, 876, 896, 916, 936, 956, 976,
996, 1016, 1036, 1056, 1076, 1093, 1113, 1133, 1153, 1173, 1193,
1213, 1231, 1251, 1271, 1289, 1307, 1327, 1347, 1366, 1386, 1406,
1426, 1446, 1466, 1483, 1503, 1523, 1543, 1563, 1583, 1603, 1623,
1643, 1663, 1683, 1703, 1723, 1743, 1762, 1781, 1800, 1820, 1840, 1860, 1880]
mode = [0, 7, 16, 27, 38, 54, 72, 91, 111, 131, 151, 171, 190, 210, 230, 250, 270, 290, 310, 330, 350, 370, 390, 410, 430, 450, 470, 490, 510, 530, 550, 570, 590, 610, 630, 650, 670, 690, 710, 730, 750, 770, 790, 810, 830, 850, 870, 890, 910, 930, 950, 970, 990, 1010, 1030, 1050, 1070, 1090, 1110, 1130, 1150, 1170, 1190, 1210, 1230, 1250, 1270, 1290, 1310, 1330, 1350, 1370, 1390, 1410, 1430, 1450, 1470, 1490, 1510, 1530, 1550, 1570, 1590, 1610, 1630, 1650, 1670, 1690, 1710, 1729, 1749, 1769, 1789, 1809, 1829, 1849, 1869, 1889, 1909, 1929]
xs = range(0, 5000, 50)
In [9]:
plt.plot(xs,good, label="Bayesian sampling")
plt.plot(xs,mode, label="Bayesian ML")
plt.plot(xs,bad, label="Q-learner")
plt.xlabel("steps")
plt.ylabel("reward")
plt.title("Performance on Loop environment")
plt.legend(loc=2)
Out[9]:
In [6]:
chain_good = [2, 84, 168, 264, 360, 474, 674, 1028, 1212, 1514, 1734, 1982, 2218, 2530, 2748, 2850, 3100, 3374, 3582, 3918, 4130, 4328, 4648, 4912, 5218, 5456, 5816, 6076, 6246, 6430, 6648, 6888, 7186, 7334, 7470, 7780, 7994, 8262, 8478, 8754, 8958, 9170, 9364, 9544, 9724, 9996, 10312, 10544, 10796, 10962, 11242, 11450, 11704, 11934, 12148, 12332, 12640, 12882, 13012, 13188, 13422, 13626, 13748, 14004, 14226, 14372, 14556, 14660, 14884, 15010, 15122, 15336, 15694, 15900, 16192, 16386, 16654, 16900, 17060, 17266, 17430, 17670, 17852, 18096, 18212, 18434, 18632, 18778, 18972, 19242, 19334, 19526, 19822, 19972, 20294, 20522, 20712, 20964, 21230, 21458]
chain_mode = [2, 46, 96, 150, 222, 298, 376, 464, 546, 620, 692, 784, 862, 944, 1052, 1132, 1210, 1298, 1378, 1454, 1540, 1628, 1708, 1780, 1862, 1936, 2014, 2154, 2332, 2392, 2510, 2734, 2984, 3250, 3448, 3642, 3916, 4192, 4356, 4612, 4786, 4916, 5030, 5204, 5500, 5614, 5770, 6018, 6268, 6596, 6900, 7186, 7432, 7622, 7970, 8168, 8314, 8554, 8810, 9064, 9198, 9372, 9648, 9908, 10248, 10476, 10692, 10930, 11102, 11350, 11698, 11956, 12120, 12354, 12546, 12756, 12998, 13320, 13506, 13742, 14090, 14256, 14478, 14744, 14878, 15156, 15312, 15542, 15788, 16090, 16394, 16678, 16876, 17116, 17344, 17542, 17760, 18018, 18274, 18550]
chains_bad = [0, 148, 418, 706, 910, 1230, 1538, 1808, 2096, 2476, 2716, 3050, 3286, 3518, 3860, 4188, 4482, 4856, 5198, 5476, 5712, 6018, 6338, 6704, 7018, 7374, 7698, 8154, 8536, 8822, 9006, 9230, 9582, 9808, 9996, 10206, 10588, 10992, 11348, 11552, 11932, 12264, 12570, 12872, 13174, 13430, 13622, 13942, 14246, 14548]
xs = range(0, 5000, 50)
xs2 = range(0, 5000, 100)
In [7]:
plt.plot(xs,chain_good, label="Bayesian sampling")
plt.plot(xs,chain_mode, label="Bayesian ML")
plt.plot(xs2,chains_bad, label="Q learner")
plt.xlabel("steps")
plt.ylabel("reward")
plt.title("Performance on Chain environment")
plt.legend(loc=2)
Out[7]:
In [58]:
small_bayesianML = [0, 1, 4, 8, 14, 20, 32, 44, 56, 69, 81, 94, 106, 119, 131, 144, 156, 169, 181, 194, 206, 219, 231, 244, 256, 269, 281, 294, 306, 319, 331, 344, 356, 369, 381, 394, 406, 419, 431, 444, 456, 469, 481, 494, 506, 519, 531, 544, 556, 569, 581, 594, 606, 619, 631, 644, 656, 669, 681, 694, 706, 719, 731, 744, 756, 769, 781, 794, 806, 819, 831, 844, 856, 869, 881, 894, 906, 919, 931, 944, 956, 969, 981, 994, 1006, 1019, 1031, 1044, 1056, 1069, 1081, 1094, 1106, 1119, 1131, 1144, 1156, 1169, 1181, 1194]
small_bML= [0, 2, 2, 5, 7, 8, 9, 9, 11, 14, 16, 18, 18, 22, 24, 26, 28, 32, 34, 36, 38, 40, 44, 46, 50, 54, 56, 60, 63, 64, 66, 69, 71, 74, 75, 77, 81, 84, 86, 87]
sbML = small_bayesianML[:40]
MLML = [0, 1, 3, 10, 19, 36, 60, 85, 109, 134, 159, 184, 209, 234, 259, 284, 309, 334, 359, 384]
qlearner = [0, 9, 23, 36, 43, 53, 54, 57, 62, 65, 68, 70, 73, 75, 76, 79, 81, 84, 86, 91]
xs = range(0, 2000, 50)
xs2 = range(0, 2000, 100)
In [59]:
plt.plot(xs,sbML, label="Bayesian sampling")
plt.plot(xs2,MLML, label="Bayesian ML")
plt.plot(xs2,qlearner, label="Q learner")
plt.xlabel("steps")
plt.ylabel("reward")
plt.title("Performance on Easy Maze environment")
plt.legend(loc=2)
Out[59]:
In [6]:
qln = [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 5, 5, 7, 7, 7, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 11, 11, 11, 11, 11, 13, 13, 14, 14, 16, 16, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 20, 20, 21, 23, 23, 23, 23, 23, 23, 23, 24, 26, 26, 27, 27, 28, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]
mode = [0, 1, 1, 1, 1, 1, 3, 3, 3, 5, 7, 7, 7, 7, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 13, 13, 15, 15, 15, 15, 15, 15, 15, 19, 19, 19, 21, 21, 21, 21, 23, 23, 25, 25, 25, 26, 26, 26, 26, 26, 26, 27, 29, 29, 29, 29, 29, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 33, 33, 33, 33, 33, 33, 33, 33, 35, 35, 35, 37, 38, 38, 38, 38, 38, 40, 40, 42, 42, 44, 44, 44, 44, 44]
sampling = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 4, 4, 4, 6, 6, 6, 6, 6, 8, 8, 9, 11, 13, 15, 15, 17, 17, 17, 19, 21, 21, 22, 22, 24, 24, 25, 25, 26, 28, 30, 30, 32, 32, 34, 34, 35, 35, 35, 35, 37, 38, 38, 38, 39, 39, 39, 41, 42, 44, 45, 45, 45, 45, 47, 47, 47, 47, 47, 47, 49, 49, 50, 50, 52, 53, 53, 53, 54, 55, 55, 55, 57, 58, 60]
xs = range(0, 5000, 50)
gptd = [0, 0, 3, 6, 6, 9, 12, 12, 15, 15, 15, 21, 24, 26, 26, 26, 26, 26, 26, 29, 32, 32, 35, 35, 38, 38, 38, 38, 41, 44, 44, 47, 50, 53, 53, 53, 53, 56, 56, 56, 56, 56, 59, 62, 65, 65, 65, 68, 71, 73, 73, 73, 73, 76, 79, 79, 79, 84, 87, 87, 90, 93, 93, 93, 93, 96, 99, 99, 99, 101, 101, 107, 107, 107, 110, 113, 116, 116, 119, 119, 119, 125, 125, 125, 125, 128, 128, 128, 128, 131, 136, 136, 136, 136, 139, 144, 144, 144, 147, 147]
for i in xrange(len(qln)):
qln[i] = qln[i]/2
# mode = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 4, 5, 5, 5, 7, 7, 7, 7, 8, 8, 8, 10, 10, 10, 12, 12, 12, 12, 14, 14, 16, 16, 16, 16, 16, 18, 18, 20, 22, 22, 22, 22, 22, 23, 23, 25, 25, 25, 27, 27, 29, 29, 29, 31, 31, 33, 33, 33, 35, 36, 36, 36, 36, 38, 40, 40, 42, 42, 43, 45, 45, 45, 47, 49, 50, 51, 52, 53, 54, 54, 54, 54, 54, 54, 56, 57, 57, 58]
In [7]:
plt.plot(xs,sampling, label="Bayesian sampling")
plt.plot(xs,mode, label="Bayesian ML")
plt.plot(xs,qln, label="Q learner")
plt.plot(xs, gptd, label="GPTD learner")
plt.xlabel("steps")
plt.ylabel("reward")
plt.title("Performance on Hard Maze environment")
plt.legend(loc=2)
Out[7]:
In [21]:
gptd = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 112, 112, 112, 112, 128, 128, 128, 128, 128, 128, 128, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 184, 244, 244, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 296, 296, 296, 296, 296, 296, 296, 296, 296, 316, 316, 316, 316, 316, 316, 316, 316, 316, 316, 352, 352, 352, 352, 368, 368, 368, 396, 396, 396, 396, 396, 396, 396, 396, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 436, 496, 496, 496, 496, 496, 524, 524, 524, 524, 524, 524, 524, 524, 524, 552, 552, 552, 552, 552, 552, 552, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 628, 628, 628, 628, 628, 628, 628, 628, 628, 628, 628, 628, 628]
sampling = [0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 40, 40, 40, 40, 40, 40, 40, 40, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 96, 96, 96, 96, 96, 96, 96, 101, 101, 101, 101, 101, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148]
ml = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 92, 92, 92, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133, 133]
xs = xrange(0, 20000, 50)
In [23]:
plt.plot(xs,sampling, label="Bayesian sampling", color='b')
plt.plot(xs, ml, label="Bayesian ML", color='g')
plt.plot(xs, gptd, label="GPTD learner", color='c')
plt.xlabel("steps")
plt.ylabel("reward")
plt.title("Performance on Very Hard Maze environment")
plt.legend(loc=2)
Out[23]:
In [ ]: