rand_rates [0.297358606175418, 0.05603661127506938, 0.6535666667862454, 0.7976295943783823, 0.40277829020538214, 0.5026309217184822, 0.13330716707173468, 0.7912418987726315, 0.05130823808661064, 0.7988684286182635, 0.7233578793010739, 0.13634823264616455, 0.2489012610330408, 0.6557618490434519, 0.005809536754381204, 0.6308226260013383, 0.44080938576122314, 0.3495717055632467, 0.7409902795102667, 0.6830623924209595, 0.636013220359687, 0.2640373207941863, 0.21449798032700132, 0.7196567097740026]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] 24
0
samples_from_beta_distr {0: 0.5506979372068089, 1: 0.12734818511988877, 2: 0.3412299350587927, 3: 0.8054136799715076, 4: 0.11273495998826488, 5: 0.2203243263062467, 6: 0.8511804800696697, 7: 0.8272297145067081, 8: 0.7347109102716323, 9: 0.16416247817217394, 10: 0.9116400979973631, 11: 0.552922863768152, 12: 0.409619694092394, 13: 0.053711362851815186, 14: 0.4102858637271981, 15: 0.1755709112810346, 16: 0.28668256839336653, 17: 0.7524991798902002, 18: 0.15638440379315077, 19: 0.6385798660745896, 20: 0.28964771927254906, 21: 0.4114826385191182, 22: 0.2293564552043075, 23: 0.5641615291770792}
selected action: 10 rwd: 1
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 0 new params: tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1.]) tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1.])
1
samples_from_beta_distr {0: 0.05526973925225801, 1: 0.9363731604539063, 2: 0.465605153011164, 3: 0.12581031566530032, 4: 0.6687073970492825, 5: 0.0688246970269409, 6: 0.06940331839998404, 7: 0.9944975517768292, 8: 0.5281367997412384, 9: 0.27433989397079594, 10: 0.962324819881041, 11: 0.84290418547375, 12: 0.9244810310422594, 13: 0.5363969521889174, 14: 0.26385278124488276, 15: 0.10954437908648133, 16: 0.32328221248938677, 17: 0.9945727959397604, 18: 0.426383213935463, 19: 0.020936664058784523, 20: 0.4267006237946333, 21: 0.5151407316325075, 22: 0.6693690965319055, 23: 0.7915986862793059}
selected action: 17 rwd: 0
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 1 new params: tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1.]) tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
1., 1., 1., 1., 1., 1.])
2
samples_from_beta_distr {0: 0.7149540620152979, 1: 0.6274469116797207, 2: 0.2689362121261097, 3: 0.40422501062140864, 4: 0.8028341777614662, 5: 0.019586249997208913, 6: 0.7984343976109964, 7: 0.11227888847772868, 8: 0.11025359222209415, 9: 0.5010775561089528, 10: 0.26781037838055227, 11: 0.3931497465966921, 12: 0.6840780651105326, 13: 0.9633830187751966, 14: 0.11545520670841547, 15: 0.46031173297852973, 16: 0.23529591471963948, 17: 0.8789725805675713, 18: 0.7654694316983518, 19: 0.7707328151974192, 20: 0.13500166038775147, 21: 0.30859018881347205, 22: 0.22780531084144628, 23: 0.30704381125170516}
selected action: 13 rwd: 1
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 2 new params: tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1.]) tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
1., 1., 1., 1., 1., 1.])
3
samples_from_beta_distr {0: 0.5008524795574897, 1: 0.7056962141022319, 2: 0.367156939182533, 3: 0.727779444686781, 4: 0.5244261681444119, 5: 0.18500644521757653, 6: 0.8666370096647015, 7: 0.5535315128480677, 8: 0.4921355918632565, 9: 0.21494070121617023, 10: 0.3921774398313786, 11: 0.1269807225861865, 12: 0.9238381007513979, 13: 0.9728075801055874, 14: 0.8745181453032552, 15: 0.3932236383790028, 16: 0.6315723870229063, 17: 0.06665402990532296, 18: 0.1286950326858251, 19: 0.7885621117345633, 20: 0.8984930148214297, 21: 0.9015410022676688, 22: 0.301042247559214, 23: 0.3861131899341755}
selected action: 13 rwd: 1
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 3 new params: tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 3., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1.]) tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
1., 1., 1., 1., 1., 1.])
4
samples_from_beta_distr {0: 0.13848779091904423, 1: 0.5197625067781384, 2: 0.14262615127646366, 3: 0.2797798101320302, 4: 0.3298988593741842, 5: 0.23620519517908165, 6: 0.3333982790875833, 7: 0.041444508498744426, 8: 0.6867486548154904, 9: 0.3327732536677623, 10: 0.8468891275215445, 11: 0.11963267574245129, 12: 0.8023926672962596, 13: 0.39619467020285454, 14: 0.5875112104068217, 15: 0.6832789198157829, 16: 0.9300713918896626, 17: 0.5004319145006589, 18: 0.4879605692481827, 19: 0.906440366274931, 20: 0.9424863804079436, 21: 0.44619580204215087, 22: 0.871041277591744, 23: 0.7736776928920787}
selected action: 20 rwd: 1
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]
time_step: 4 new params: tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 3., 1., 1., 1., 1.,
1., 1., 2., 1., 1., 1.]) tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
1., 1., 1., 1., 1., 1.])
5
samples_from_beta_distr {0: 0.47314638863837677, 1: 0.07978892984938589, 2: 0.222324699368296, 3: 0.16034178529127938, 4: 0.2980515350590774, 5: 0.13869773473203273, 6: 0.7330845778452255, 7: 0.7993029673258709, 8: 0.5420167940883778, 9: 0.6411556318567913, 10: 0.2134087941262705, 11: 0.08093793094311227, 12: 0.24148391524278792, 13: 0.8979076395623248, 14: 0.7291708166829659, 15: 0.26523576745837457, 16: 0.3647285273983646, 17: 0.10161606127125439, 18: 0.8858706886258826, 19: 0.23656131503068595, 20: 0.8982079860377098, 21: 0.8634390929524525, 22: 0.8229970633818392, 23: 0.19140454648680622}
selected action: 20 rwd: 1
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]
time_step: 5 new params: tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 3., 1., 1., 1., 1.,
1., 1., 3., 1., 1., 1.]) tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
1., 1., 1., 1., 1., 1.])
6
samples_from_beta_distr {0: 0.1401428801485443, 1: 0.974147463188781, 2: 0.30376828455318594, 3: 0.2339808208046095, 4: 0.28862818964283304, 5: 0.019717153621291665, 6: 0.037667397377785, 7: 0.5805433607444153, 8: 0.7896349439444492, 9: 0.3452625939645493, 10: 0.2723375008915747, 11: 0.9680199747969336, 12: 0.5261443242933898, 13: 0.7532022076115457, 14: 0.6971128282870356, 15: 0.858778497982601, 16: 0.43286767173627794, 17: 0.1683095911489608, 18: 0.1225194324674314, 19: 0.567170548353685, 20: 0.6473972014142961, 21: 0.4620200595430399, 22: 0.09413591565081764, 23: 0.9192375290713243}
selected action: 1 rwd: 0
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 6 new params: tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 3., 1., 1., 1., 1.,
1., 1., 3., 1., 1., 1.]) tensor([1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
1., 1., 1., 1., 1., 1.])
7
samples_from_beta_distr {0: 0.9230257456030304, 1: 0.3645578100627148, 2: 0.035751801143866904, 3: 0.5163825333016683, 4: 0.7295101142677434, 5: 0.3323377137166891, 6: 0.22768698061508033, 7: 0.48412559077573286, 8: 0.906553717357562, 9: 0.4535226821169463, 10: 0.6892427607513638, 11: 0.028546127172355832, 12: 0.888217361234233, 13: 0.6172741733879068, 14: 0.4738412264028258, 15: 0.40325916949241236, 16: 0.5898762939432933, 17: 0.3662555138448933, 18: 0.7067267481997584, 19: 0.3364167243371776, 20: 0.640031181286888, 21: 0.8474463483291111, 22: 0.7016873869752583, 23: 0.07036476962761358}
selected action: 0 rwd: 0
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 7 new params: tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 3., 1., 1., 1., 1.,
1., 1., 3., 1., 1., 1.]) tensor([2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
1., 1., 1., 1., 1., 1.])
8
samples_from_beta_distr {0: 0.05200012205782353, 1: 0.2904588505686596, 2: 0.14680201010968635, 3: 0.8441923260930786, 4: 0.30877806506706446, 5: 0.42528936085289665, 6: 0.9358085204635976, 7: 0.9617639845921127, 8: 0.8611814057280913, 9: 0.6715686031289314, 10: 0.7641966204317939, 11: 0.33903901440983464, 12: 0.455141863329618, 13: 0.5042246211659381, 14: 0.26735158605238235, 15: 0.9556703034251757, 16: 0.5887054896198906, 17: 0.0016204648865946931, 18: 0.7264641298394964, 19: 0.886252488481099, 20: 0.4125354337797974, 21: 0.2974871470344012, 22: 0.9578466944377555, 23: 0.35280515758000336}
selected action: 7 rwd: 1
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 8 new params: tensor([1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 3., 1., 1., 1., 1.,
1., 1., 3., 1., 1., 1.]) tensor([2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
1., 1., 1., 1., 1., 1.])
9
samples_from_beta_distr {0: 0.7419689093889408, 1: 0.15529480720359373, 2: 0.8221530452311481, 3: 0.7563261261857206, 4: 0.04681377398404287, 5: 0.7941902701191741, 6: 0.1015800840214335, 7: 0.7844727565099932, 8: 0.8953709476303778, 9: 0.4985647507773594, 10: 0.7024491768582631, 11: 0.18423583009803784, 12: 0.762980410909308, 13: 0.5821922200666525, 14: 0.7501712554799144, 15: 0.759222862910083, 16: 0.6515800146895993, 17: 0.7942967034596281, 18: 0.26329567474526666, 19: 0.400140575641863, 20: 0.9783677030030322, 21: 0.7742750879447619, 22: 0.5709717136309612, 23: 0.05749662450975622}
selected action: 20 rwd: 1
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]
time_step: 9 new params: tensor([1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 3., 1., 1., 1., 1.,
1., 1., 4., 1., 1., 1.]) tensor([2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
1., 1., 1., 1., 1., 1.])
10
samples_from_beta_distr {0: 0.4139190172336117, 1: 0.22274936135828985, 2: 0.1663903134460372, 3: 0.5413620483595575, 4: 0.642383996219157, 5: 0.24312790497569967, 6: 0.14264179308101835, 7: 0.16688451143264402, 8: 0.058448372369845375, 9: 0.2484477375177364, 10: 0.8793901803686325, 11: 0.13838535388041365, 12: 0.5502816234576827, 13: 0.5495623686596361, 14: 0.440036717435472, 15: 0.6041343530773836, 16: 0.03778897886365288, 17: 0.525824678374354, 18: 0.4489100473324218, 19: 0.1869245667132453, 20: 0.471640822572125, 21: 0.5956641592073141, 22: 0.01699599557776346, 23: 0.8625867356778912}
selected action: 10 rwd: 0
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 10 new params: tensor([1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 3., 1., 1., 1., 1.,
1., 1., 4., 1., 1., 1.]) tensor([2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 2.,
1., 1., 1., 1., 1., 1.])
11
samples_from_beta_distr {0: 0.023941136989173013, 1: 0.5633849781493063, 2: 0.035682132704092664, 3: 0.5266266067739089, 4: 0.43959997747319784, 5: 0.5769594742336499, 6: 0.6054956295410395, 7: 0.4537087763146745, 8: 0.2846378502312384, 9: 0.8032369940698173, 10: 0.31815064638565715, 11: 0.12367429677200369, 12: 0.7725832994780015, 13: 0.9547402557719097, 14: 0.20564510113499096, 15: 0.9655792271787055, 16: 0.4919510387616295, 17: 0.09756725237193034, 18: 0.19091708361884224, 19: 0.36388415747570235, 20: 0.8923353913026014, 21: 0.5736379297690785, 22: 0.3684560066072354, 23: 0.9905344317526267}
selected action: 23 rwd: 1
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
time_step: 11 new params: tensor([1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 3., 1., 1., 1., 1.,
1., 1., 4., 1., 1., 2.]) tensor([2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 2.,
1., 1., 1., 1., 1., 1.])
12
samples_from_beta_distr {0: 0.21949834489386502, 1: 0.05628738265183388, 2: 0.84326397025065, 3: 0.9491711447110887, 4: 0.0006033228265391446, 5: 0.5162563427268354, 6: 0.1565325710456959, 7: 0.21007858631087178, 8: 0.6567595100939616, 9: 0.3384202759966859, 10: 0.3320112769724615, 11: 0.2666163433107313, 12: 0.41521519813437546, 13: 0.7881101022242507, 14: 0.4307960319657628, 15: 0.9989568162107866, 16: 0.3207213556347316, 17: 0.08999196461546352, 18: 0.9234737000316668, 19: 0.7846278916029781, 20: 0.9247840784674691, 21: 0.4954199373872133, 22: 0.8585124507503339, 23: 0.9619166634773394}
selected action: 15 rwd: 0
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 12 new params: tensor([1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 3., 1., 1., 1., 1.,
1., 1., 4., 1., 1., 2.]) tensor([2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 2., 1., 2.,
1., 1., 1., 1., 1., 1.])
13
samples_from_beta_distr {0: 0.31489357195970086, 1: 0.09087247015210756, 2: 0.42286662961752935, 3: 0.7856546617818616, 4: 0.5708594557846264, 5: 0.04672789735504838, 6: 0.7005600759912332, 7: 0.6188625727719199, 8: 0.7685817651132113, 9: 0.6836479338095973, 10: 0.412547943176988, 11: 0.3673065988011884, 12: 0.1836447559962553, 13: 0.7385375744937361, 14: 0.25019083694667693, 15: 0.2044921946156866, 16: 0.5512867672148549, 17: 0.5650508998333585, 18: 0.9878987411301382, 19: 0.8607390807292926, 20: 0.908389130503321, 21: 0.3714578122894905, 22: 0.536392936493946, 23: 0.9986687386717062}
selected action: 23 rwd: 0
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 13 new params: tensor([1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 3., 1., 1., 1., 1.,
1., 1., 4., 1., 1., 2.]) tensor([2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 2., 1., 2.,
1., 1., 1., 1., 1., 2.])
14
samples_from_beta_distr {0: 0.8069684115229181, 1: 0.8640798960523162, 2: 0.4826988881769434, 3: 0.7076996308162969, 4: 0.790638458727509, 5: 0.9838776972082116, 6: 0.5311155032768353, 7: 0.6856083532728171, 8: 0.48820624408933383, 9: 0.9916808304036145, 10: 0.50885454173307, 11: 0.27187030305305687, 12: 0.06926106106727559, 13: 0.8385561938099774, 14: 0.704268265170965, 15: 0.5130152862053922, 16: 0.38935566724581694, 17: 0.3970148520559469, 18: 0.1683855969211513, 19: 0.8484727240268563, 20: 0.6103418264007191, 21: 0.5517137783256966, 22: 0.49659040009620314, 23: 0.17496169627188582}
selected action: 9 rwd: 1
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 14 new params: tensor([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 2., 1., 1., 3., 1., 1., 1., 1.,
1., 1., 4., 1., 1., 2.]) tensor([2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 2., 1., 2.,
1., 1., 1., 1., 1., 2.])
15
samples_from_beta_distr {0: 0.08441941473105481, 1: 0.45860929914294407, 2: 0.559234956433128, 3: 0.019415003376341767, 4: 0.8890203047951274, 5: 0.004462665258097771, 6: 0.24255476282249028, 7: 0.4657380065463783, 8: 0.5586388001828917, 9: 0.426874357091701, 10: 0.33474190226130485, 11: 0.6908202822207221, 12: 0.7551423074184013, 13: 0.9018793791696056, 14: 0.8070329139484025, 15: 0.7729865787203681, 16: 0.5816247956778041, 17: 0.31500938132212447, 18: 0.8696541912427456, 19: 0.2572668018630663, 20: 0.8362729671741547, 21: 0.7037300411106899, 22: 0.575568255380829, 23: 0.5924351219002928}
selected action: 13 rwd: 0
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 15 new params: tensor([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 2., 1., 1., 3., 1., 1., 1., 1.,
1., 1., 4., 1., 1., 2.]) tensor([2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 1., 2., 1., 2.,
1., 1., 1., 1., 1., 2.])
16
samples_from_beta_distr {0: 0.5168527888599397, 1: 0.7066230734825681, 2: 0.2013856887954505, 3: 0.1316701986290085, 4: 0.2700154973568112, 5: 0.7897202705213702, 6: 0.10965646739899967, 7: 0.7620403957740736, 8: 0.6613910742845466, 9: 0.25128792076220796, 10: 0.8723314979987878, 11: 0.1842910040522458, 12: 0.8494624572542318, 13: 0.5119488622102668, 14: 0.6101457772110034, 15: 0.0891821078549171, 16: 0.9103133945203339, 17: 0.17611432876625704, 18: 0.9194794921359841, 19: 0.09240626489424283, 20: 0.6824103891525054, 21: 0.1492838632243608, 22: 0.7847791282254496, 23: 0.3458127025035864}
selected action: 18 rwd: 1
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 16 new params: tensor([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 2., 1., 1., 3., 1., 1., 1., 1.,
2., 1., 4., 1., 1., 2.]) tensor([2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 1., 2., 1., 2.,
1., 1., 1., 1., 1., 2.])
17
samples_from_beta_distr {0: 0.2840401203187039, 1: 0.842367090470767, 2: 0.4445953595271404, 3: 0.7988370624505824, 4: 0.7785231744563916, 5: 0.1974018619823125, 6: 0.2507665181906877, 7: 0.5819362459682696, 8: 0.8764602192108677, 9: 0.914346117421789, 10: 0.47145652172593117, 11: 0.14140837193196007, 12: 0.9951174402488363, 13: 0.7226898835714252, 14: 0.6832564897785053, 15: 0.11925981120562829, 16: 0.36687962462537943, 17: 0.2393232468100464, 18: 0.05975261700632668, 19: 0.6721089734064287, 20: 0.3152655681928277, 21: 0.7682512644142142, 22: 0.8933987320124293, 23: 0.27846036148655706}
selected action: 12 rwd: 0
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 17 new params: tensor([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 2., 1., 1., 3., 1., 1., 1., 1.,
2., 1., 4., 1., 1., 2.]) tensor([2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 2., 1., 2., 1., 2.,
1., 1., 1., 1., 1., 2.])
18
samples_from_beta_distr {0: 0.3713856504549808, 1: 0.4355562535428058, 2: 0.4679950082756568, 3: 0.4156312639939726, 4: 0.20979387936834784, 5: 0.8385668199274243, 6: 0.022742235578157747, 7: 0.8461174371631349, 8: 0.3009444473957157, 9: 0.9128802789705209, 10: 0.5039236100745127, 11: 0.42238274488101946, 12: 0.2749461829802582, 13: 0.7885836745906996, 14: 0.14268527408861129, 15: 0.2951486574966772, 16: 0.035709661967266576, 17: 0.6344845140005503, 18: 0.13910494125285813, 19: 0.3655064818595665, 20: 0.995889269568735, 21: 0.05041477002298604, 22: 0.7376208472563899, 23: 0.46890558377229724}
selected action: 20 rwd: 1
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]
time_step: 18 new params: tensor([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 2., 1., 1., 3., 1., 1., 1., 1.,
2., 1., 5., 1., 1., 2.]) tensor([2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 2., 1., 2., 1., 2.,
1., 1., 1., 1., 1., 2.])
19
samples_from_beta_distr {0: 0.07036372611869618, 1: 0.44173404974969976, 2: 0.5607874165271313, 3: 0.6481677441701276, 4: 0.2156904412309767, 5: 0.876352086339783, 6: 0.3006043167386701, 7: 0.7370367997928543, 8: 0.5018840142997841, 9: 0.8529051793800777, 10: 0.651589866378827, 11: 0.352943137754137, 12: 0.27810011950183416, 13: 0.6771166004721451, 14: 0.915840094966665, 15: 0.3656478959024862, 16: 0.5039979041604895, 17: 0.06890094022333258, 18: 0.5590439664455854, 19: 0.3531175878448488, 20: 0.812039534097957, 21: 0.8615785837220523, 22: 0.6843142962177696, 23: 0.3026153926605436}
selected action: 14 rwd: 0
updated rewd vec: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
time_step: 19 new params: tensor([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 2., 1., 1., 3., 1., 1., 1., 1.,
2., 1., 5., 1., 1., 2.]) tensor([2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 2., 2., 2., 1., 2.,
1., 1., 1., 1., 1., 2.])