In [1]:
%matplotlib inline
import matplotlib

import tanuki
from tanuki.bandit import GaussianBandit
from tanuki.agent import Agent, GradientAgent
from tanuki.policy import EpsilonGreedyPolicy
from tanuki.environment import Environment

In [2]:
n_arms = 10
bandit = GaussianBandit(n_arms)
n_trials = 1000
n_experiments = 500

In [6]:
#agents = [Agent(bandit, EpsilonGreedyPolicy(0.01))]

policy = EpsilonGreedyPolicy(0.01)

agents = [GradientAgent(bandit, policy, alpha=0.1),
          GradientAgent(bandit, policy, alpha=0.8)]

env = Environment(bandit, agents, 'Epsilon-Greedy')
scores, optimal = env.run(n_trials, n_experiments)

In [4]:
matplotlib.rcParams['figure.figsize'] = [10.0, 10.0]

env.plot_results(scores, optimal)



In [7]:
env.plot_beliefs()



In [ ]: