In [1]:
import numpy as np
import pandas as pd
from emulator.main import Account
In [2]:
A = Account()
In [3]:
actions = np.random.randint(0,3, size=(32))
In [4]:
rewrads_list = []
for i in actions:
r,n,d = A.step(i)
rewrads_list.append(r)
In [5]:
GAMMA = 0.9
R_list = []
R = 0
for i in rewrads_list[::-1]:
R = i + GAMMA * R
R_list.append(R)
R_list.reverse()
In [6]:
tmp = pd.DataFrame()
tmp['reward'] = rewrads_list
tmp['discount'] = R_list
In [7]:
tmp
Out[7]:
In [ ]: