2015 August 28
In [ ]:
from IPython.display import Image
Image(filename='img/treat_aud_reward.jpg')
In [ ]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
from numpy.random import binomial
from ggplot import *
import random
import sys
plt.figure(figsize=(6,6),dpi=80);
%matplotlib inline
In [ ]:
Image(filename='img/ab.jpg')
Each test is like flipping a fair coin N times
In [ ]:
Image(filename='img/a.jpg')
In [ ]:
df = pd.DataFrame({"coin_toss":binomial(1,0.5,100)})
df.hist()
plt.show()
In [ ]:
df.head()
In [ ]:
df = pd.DataFrame({"coin_toss":binomial(1,0.6,100)})
df.hist()
plt.show()
In [ ]:
df = pd.DataFrame({"coin_%i"%i:binomial(1,0.5,100) for i in range(20)})
df.hist()
plt.show()
In [ ]:
df = pd.DataFrame({"coin_%i"%i:binomial(1,0.52,100) for i in range(20)})
df.hist()
plt.show()
In [ ]:
# 1 arm
payoff = [-0.1,0.5]
a = np.bincount(binomial(1,0.5,100))
print np.dot(a, payoff)
In [ ]:
# 2-arm, equal reward
payoff = [0,1,2]
a = np.bincount(binomial(2,0.5,100))
print np.dot(a, payoff)
In [ ]:
payoff=[1,1.05]
reward1 = np.bincount(binomial(1,0.5,100))[1] * payoff[0]
print reward1
reward2 = np.bincount(binomial(1,0.5,100))[1] * payoff[1]
print reward2
total_reward = reward1 + reward2
print total_reward
In [ ]:
def trial(payoff=[1, 1.01]):
reward1 = np.bincount(binomial(1,0.5,100))[1] * payoff[0]
reward2 = np.bincount(binomial(1,0.5,100))[1] * payoff[1]
return reward1, reward2, reward1 + reward2, reward1-reward2
trials = 100
sim = np.array([trial() for i in range(trials)])
df = pd.DataFrame(sim, columns=["t1", "t2", "tot", "diff"])
print len(df[df["diff"] <= 0.0])
df.hist()
plt.show()
In [ ]:
def trial(ps=[0.5, 0.51], payoff=[1, 1]):
reward1 = np.bincount(binomial(1,ps[0],100))[1] * payoff[0]
reward2 = np.bincount(binomial(1,ps[1],100))[1] * payoff[1]
return reward1, reward2, reward1 + reward2, reward1-reward2
trials = 100
sim = np.array([trial() for i in range(trials)])
df = pd.DataFrame(sim, columns=["t1", "t2", "tot", "diff"])
print len(df[df["diff"] <= 0.0])
df.hist()
plt.show()
In [ ]:
Image(filename='img/abcd.jpg')
In [ ]:
df = pd.DataFrame({"tot_reward":binomial(2,0.5,100)})
df.hist()
plt.show()
In [ ]:
df = pd.DataFrame({"tot_reward":binomial(3,0.5,100)})
df.hist()
plt.show()
In [ ]:
trials = 100
means = [0.1, 0.1, 0.9]
reward = np.zeros(trials)
for m in means:
# equal rewards of 1 or 0
reward += binomial(1,m,trials)
df = pd.DataFrame({"reward":reward, "fair_reward":binomial(3,0.5,trials)})
df.hist()
plt.show()
In [ ]:
sys.path.append('../../BanditsBook/python')
from core import *
Notes:
In [ ]:
random.seed(1)
# Mean arm payoff (Bernoulli)
means = [0.1, 0.1, 0.1, 0.1, 0.9]
# Mulitple arms!
n_arms = len(means)
random.shuffle(means)
arms = map(lambda (mu): BernoulliArm(mu), means)
print("Best arm is " + str(ind_max(means)))
t_horizon = 250
n_sims = 1000
data = []
for epsilon in [0.1, 0.2, 0.3, 0.4, 0.5]:
algo = EpsilonGreedy(epsilon, [], [])
algo.initialize(n_arms)
# results are column oriented
# simulation_num, time, chosen arm, reward, cumulative reward
results = test_algorithm(algo, arms, n_sims, t_horizon)
results.append([epsilon]*len(results[0]))
data.extend(np.array(results).T)
df = pd.DataFrame(data, columns = ["Sim", "T", "ChosenArm", "Reward", "CumulativeReward", "Epsilon"])
df.head()
In [ ]:
a=df.groupby(["Epsilon", "T"]).mean().reset_index()
a.head()
In [ ]:
ggplot(aes(x="T",y="Reward", color="Epsilon"), data=a) + geom_line()
In [ ]:
ggplot(aes(x="T",y="CumulativeReward", color="Epsilon"), data=a) + geom_line()
Upgrades to $\epsilon$-Greedy:
Tempted choose each are in proportion to its current value, i.e.:
$p(A) \propto \frac{rA}{rA + RB}$
$p(B) \propto \frac{rB}{rA + RB}$
Remember Boltzmann, and about adding a temperature, $\tau$:
$p(A) \propto \frac{-\exp(rA/\tau)}{(\exp(rA/\tau) + \exp(rB/\tau))}$
$p(B) \propto \frac{-\exp(rB/\tau)}{(\exp(rA/\tau) + \exp(rB/\tau))}$
And what is annealing?
In [ ]:
t_horizon = 250
n_sims = 1000
algo = AnnealingSoftmax([], [])
algo.initialize(n_arms)
data = np.array(test_algorithm(algo, arms, n_sims, t_horizon)).T
df = pd.DataFrame(data)
#df.head()
df.columns = ["Sim", "T", "ChosenArm", "Reward", "CumulativeReward"]
df.head()
a=df.groupby(["T"]).mean().reset_index()
a.head()
In [ ]:
ggplot(aes(x="T",y="Reward", color="Sim"), data=a) + geom_line()
In [ ]:
ggplot(aes(x="T",y="CumulativeReward", color="Sim"), data=a) + geom_line()
In [ ]:
t_horizon = 250
n_sims = 1000
data = []
for alpha in [0.1, 0.3, 0.5, 0.7, 0.9]:
algo = UCB2(alpha, [], [])
algo.initialize(n_arms)
results = test_algorithm(algo, arms, n_sims, t_horizon)
results.append([alpha]*len(results[0]))
data.extend(np.array(results).T)
df = pd.DataFrame(data, columns = ["Sim", "T", "ChosenArm", "Reward", "CumulativeReward", "Alpha"])
df.head()
In [ ]:
a=df.groupby(["Alpha", "T"]).mean().reset_index()
a.head()
In [ ]:
ggplot(aes(x="T",y="Reward", color="Alpha"), data=a) + geom_line()
In [ ]:
ggplot(aes(x="T",y="CumulativeReward", color="Alpha"), data=a) + geom_line()
The end