Chapter 2 of Algorithms for Sequential Decision Making (Michael Littman's PhD thesis)
In [3]:
%matplotlib inline
In [4]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
In [5]:
def series(discount_factor, n=1e6):
assert 0 < discount_factor < 1, "Discount factor must be >0 and <1"
return sum([discount_factor ** i for i in range(int(n))])
In [ ]:
betas = [0.1, 0.25, 0.5, 0.75, 0.9, 0.99]
In [7]:
# By calculation
plt.plot(betas, [series(beta) for beta in betas]);
In [8]:
# By formula (beta / (1 - beta))
plt.plot(betas, [beta / (1 - beta) for beta in betas]);
In [62]:
def action1(beta, t):
return sum([-beta ** t for t in range(t)])
def action2(beta, t):
return -beta ** 2 / (1 - beta)
In [92]:
betas = [0.1, 0.25, 0.5, 0.75, 0.9, 0.99]
steps = 20
rows = []
for beta in betas:
for step in range(steps):
rows.append({
'step': step,
'beta': beta,
'a1': action1(beta, step),
'a2': action2(beta, step)
})
df = pd.DataFrame(rows)
df_ = pd.melt(df, id_vars=['step', 'beta'])
At some step, depending on beta, we'll realize that $a_2$ is superior
In [113]:
g = sns.FacetGrid(df_, col='beta', hue='variable', sharey=False)
g.map(sns.pointplot, 'step', 'value');