In [1]:
import pandas as pd
import seaborn as sns
from Agent import Agent
sns.set_style('whitegrid')
%matplotlib inline

GAME = 'CartPole-v0'
MAX_EPISODES = 100

class NewAgent(Agent):
    def __init__(self, env_name):
        super().__init__(env_name)
    
    def run_plot(self, MAX_EPISODES):
        while self.episode < MAX_EPISODES:
            self.run_episode()
            self.episode += 1
            self.accumulate_reward_list.append(self.accumulate_reward)
            print (self.episode, self.accumulate_reward)
        print('done')       


def main():
    A = NewAgent(GAME)
    A.run_plot(MAX_EPISODES)
    pd.Series(A.accumulate_reward_list).plot(figsize=(16,6))

if __name__ == '__main__':
    main()


2 8.2
3 34.480000000000004
4 38.33200000000001
5 42.69880000000001
6 45.728920000000016
7 50.256028000000015
8 54.330425200000015
9 57.99738268000001
10 60.39764441200002
11 63.45787997080002
12 66.21209197372002
13 67.79088277634801
14 70.11179449871321
15 72.20061504884188
16 74.08055354395769
17 75.77249818956192
18 77.29524837060572
19 78.66572353354515
20 79.89915118019063
21 80.10923606217156
22 79.3983124559544
23 79.65848121035896
24 80.79263308932306
25 80.91336978039075
26 81.02203280235167
27 82.01982952211651
28 82.01784656990486
29 82.91606191291437
30 82.82445572162293
31 82.74201014946063
32 83.56780913451456
33 84.3110282210631
34 84.07992539895679
35 83.8719328590611
36 85.484739573155
37 85.13626561583949
38 83.92263905425554
39 82.83037514882999
40 83.64733763394699
41 84.38260387055229
42 83.24434348349706
43 84.01990913514736
44 84.71791822163262
45 85.34612639946936
46 85.01151375952242
47 85.61036238357018
48 86.14932614521315
49 86.63439353069184
50 86.17095417762265
51 85.75385875986038
52 86.27847288387434
53 86.75062559548691
54 88.07556303593822
55 89.26800673234439
56 87.64120605910995
57 87.07708545319895
58 86.56937690787905
59 86.11243921709114
60 87.50119529538202
61 88.75107576584381
62 89.87596818925942
63 89.98837137033348
64 90.08953423330013
65 90.18058080997011
66 89.36252272897309
67 89.52627045607578
68 89.6736434104682
69 88.90627906942137
70 89.11565116247924
71 88.40408604623131
72 88.66367744160817
73 87.99730969744735
74 88.29757872770261
75 86.76782085493235
76 85.39103876943912
77 85.9519348924952
78 85.55674140324568
79 86.1010672629211
80 85.69096053662899
81 85.32186448296609
82 85.88967803466947
83 84.60071023120253
84 84.34063920808227
85 85.00657528727403
86 85.60591775854662
87 85.24532598269195
88 84.92079338442275
89 83.72871404598047
90 84.45584264138242
91 85.11025837724418
92 85.69923253951976
93 85.32930928556777
94 84.996378357011
95 84.69674052130989
96 83.5270664691789
97 83.37435982226101
98 82.33692384003491
99 82.30323145603141
100 82.27290831042826
done

In [ ]: