In [1]:
%matplotlib inline
import matplotlib
import tanuki
from tanuki.bandit import GaussianBandit
from tanuki.agent import Agent, GradientAgent
from tanuki.policy import EpsilonGreedyPolicy
from tanuki.environment import Environment
In [2]:
n_arms = 10
bandit = GaussianBandit(n_arms)
n_trials = 1000
n_experiments = 500
In [6]:
#agents = [Agent(bandit, EpsilonGreedyPolicy(0.01))]
policy = EpsilonGreedyPolicy(0.01)
agents = [GradientAgent(bandit, policy, alpha=0.1),
GradientAgent(bandit, policy, alpha=0.8)]
env = Environment(bandit, agents, 'Epsilon-Greedy')
scores, optimal = env.run(n_trials, n_experiments)
In [4]:
matplotlib.rcParams['figure.figsize'] = [10.0, 10.0]
env.plot_results(scores, optimal)
In [7]:
env.plot_beliefs()
In [ ]: