In [72]:
def sample_a_mean(payoff_mean, payoff_stdev, cost_mean, cost_stdev, cost_probability, sample_size):
payoffs = np.random.normal(loc=payoff_mean, scale=payoff_stdev, size=sample_size)
costs = np.random.normal(loc=cost_mean, scale=cost_stdev, size=sample_size) * np.random.binomial(1,cost_probability, size=sample_size)
return (payoffs - costs).mean()
In [71]:
(np.random.normal(loc=23, scale=1, size=1000) - (np.random.normal(loc=10, scale=1, size=1000) * np.random.binomial(1, 1, size=1000))).mean()
Out[71]:
In [66]:
np.random.binomial(1, 1, size=100)
Out[66]:
In [78]:
sample_a_mean(23.25, 2.00, 10.00, 1.00, .5, 10000)
Out[78]:
In [48]:
sample_a_mean(23.25, 1.00, 10.00, 1.00, .01, 100000)
Out[48]:
In [11]:
np.random.binomial(1,.15, size=10)
Out[11]:
In [5]:
np.array([1,2,3,4])*np.array([0,.15,.5,1])
Out[5]:
In [ ]:
payoffs = np.random.normal(loc=payoff_mean, scale=payoff_stdev, size=sample_size)
costs = np.random.normal(loc=payoff_mean, scale=payoff_stdev, size=sample_size) * np.random.binomial(1,cost_probability, size=sample_size)
In [86]:
class Bandit:
def __init__(self, control_payoff_mean, control_cost_mean, variant_payoff_mean, variant_cost_mean):
self._control_payoff_mean = control_payoff_mean
self._control_cost_mean = control_cost_mean
self._variant_payoff_mean = variant_payoff_mean
self._variant_cost_mean = variant_cost_mean
self._control_cost_probability = .1
self._variant_cost_probability = .1
self._control_rewards = []
self._variant_rewards = []
self._chosen_rewards = []
def _pull(self):
control_payoff = self._pull_control()
variant_payoff = self._pull_variant()
self._control_rewards.append(control_payoff)
self._variant_rewards.append(variant_payoff)
return {'control': control_payoff, 'variant': variant_payoff}
def pull_control(self):
chosen_reward = self._pull()['control']
self._chosen_rewards.append(chosen_reward)
return chosen_reward
def pull_variant(self):
chosen_reward = self._pull()['variant']
self._chosen_rewards.append(chosen_reward)
return chosen_reward
def _pull_control(self):
return np.random.normal(loc=self._control_payoff_mean, scale=5.00) - np.random.binomial(1, self._control_cost_probability)*np.random.normal(loc=self._control_cost_mean, scale=1.00)
def _pull_variant(self):
return np.random.normal(loc=self._variant_payoff_mean, scale=5.00) - np.random.binomial(1, self._variant_cost_probability)*np.random.normal(loc=self._variant_cost_mean, scale=1.00)
In [129]:
class RPMStrategy:
def __init__(self):
self._control_observations = []
self._variant_observations = []
def log_control_observation(self, observation):
self._control_observations.append(observation)
def log_variant_observation(self, observation):
self._variant_observations.append(observation)
def _better_bootstrap(self, data, max=200.):
p_unobserved = 1./(len(data) + 1.)
number_of_randoms = np.random.binomial(len(data)+1, p_unobserved)
random_values = [numpy.random.uniform(0., max) for i in range(number_of_randoms)]
new_data = data + random_values
sample_mean = np.random.choice(new_data, size=len(new_data)).mean()
return sample_mean
def choose_arm(self, control_lambda, variant_lambda):
control_mean = self._better_bootstrap(self._control_observations)
variant_mean = self._better_bootstrap(self._variant_observations)
if control_mean > variant_mean:
self._control_observations.append(control_lambda())
else:
self._variant_observations.append(variant_lambda())
In [176]:
experiment = Bandit(22.00, 9.00, 24.00, 10.00)
bandit = RPMStrategy()
for i in range(2000):
bandit.choose_arm(experiment.pull_control, experiment.pull_variant)
In [177]:
plt.plot(np.array(experiment._chosen_rewards).cumsum())
plt.plot(np.array(experiment._control_rewards).cumsum())
plt.plot(np.array(experiment._variant_rewards).cumsum())
Out[177]:
In [178]:
plt.plot(np.array(experiment._variant_rewards).cumsum() - np.array(experiment._chosen_rewards).cumsum())
Out[178]:
In [179]:
plt.plot(np.array(experiment._variant_rewards).cumsum() - np.array(experiment._control_rewards).cumsum())
Out[179]:
In [180]:
plt.plot(np.array(experiment._chosen_rewards).cumsum() - np.array(experiment._control_rewards).cumsum())
Out[180]:
In [126]:
experiment._control_rewards
Out[126]:
In [107]:
np.random.binomial(1000, .001)
Out[107]:
In [181]:
def better_bootstrap(data, max=200.):
p_unobserved = 1./(len(data) + 1.)
number_of_randoms = np.random.binomial(len(data)+1, p_unobserved)
random_values = [numpy.random.uniform(0., max) for i in range(number_of_randoms)]
new_data = data + random_values
sample_mean = np.random.choice(new_data, size=len(new_data)).mean()
return sample_mean
In [200]:
le_data = list(np.random.normal(loc=25, scale=5, size=5))
plt.hist([better_bootstrap(le_data) for i in range(5000)], bins=100)
''
Out[200]:
In [215]:
def random_interval_bootstrap(data, sample_count=1000, max=200.):
"""A bootstrap that works on an interval like 0 to 200. It assumes that
the likelihood of seeing a value we haven't seen is 1/N."""
sampled_means = []
for x in range(sample_count):
p_unobserved = 1./(len(data) + 1.)
number_of_randoms = np.random.binomial(len(data)+1, p_unobserved)
random_values = [numpy.random.uniform(0., max) for i in range(number_of_randoms)]
new_data = data + random_values
sample_mean = np.random.choice(new_data, size=len(new_data)).mean()
sampled_means.append(sample_mean)
return sampled_means
le_data = list(np.random.normal(loc=25, scale=5, size=2))
plt.hist(random_interval_bootstrap(le_data, 5000), bins=100)
''
Out[215]:
In [276]:
import random, numpy
class BanditScenario:
def __init__(self, scenario):
self._scenario = scenario
self._scenario_payoffs = {treatment_name:[] for treatment_name in self._scenario.keys()}
def next_visitor(self, show_treatment):
for key, value in self._scenario.items():
ordered = np.random.binomial(1, value['conversion_rate'])
order_average = numpy.random.normal(loc=value['order_average'], scale=5.00)
self._scenario_payoffs[key].append(ordered*order_average)
return self._scenario_payoffs[show_treatment][-1]
class BanditPlayer:
def __init__(self, treatments):
self._payoffs = []
self._treatments = treatments
self._results = {treatment_name:[] for treatment_name in treatments}
def next_treatment_to_show(self):
return random.choice(self._treatments)
def visitor_results(self, treatment_shown, money_made):
treatment_results = self._results[treatment_shown]
treatment_results.append(money_made)
self._payoffs.append(money_made)
simulated_experiment = BanditScenario({
'A': {
'conversion_rate': .05,
'order_average': 35.00
},
'B':{
'conversion_rate': .06,
'order_average': 36.00
},
'C':{
'conversion_rate': .07,
'order_average': 31.00
}
})
bandit = BanditPlayer(['A', 'B', 'C'])
visitor_count = 500
for i in range(visitor_count):
treatment_name = bandit.next_treatment_to_show()
money_made = simulated_experiment.next_visitor(show_treatment=treatment_name)
bandit.visitor_results(treatment_name, money_made)
In [277]:
plt.plot(np.array(simulated_experiment._scenario_payoffs['A']).cumsum())
plt.plot(np.array(simulated_experiment._scenario_payoffs['B']).cumsum())
plt.plot(np.array(simulated_experiment._scenario_payoffs['C']).cumsum())
Out[277]:
In [278]:
plt.plot(np.array(bandit._payoffs).cumsum())
Out[278]:
In [219]:
random.choice([1,2,3,4,5])
Out[219]:
In [223]:
random.uniform(0,1)
Out[223]:
In [257]:
np.random.binomial(1, .9)
Out[257]:
In [258]:
{}.items()
Out[258]:
In [1]:
class BanditScenario:
def __init__(self, scenario):
self._scenario = scenario
self._scenario_payoffs = {treatment_name:[] for treatment_name in self._scenario.keys()}
self._bandit_payoffs = []
def next_visitor(self, show_treatment):
for key, value in self._scenario.items():
ordered = np.random.binomial(1, value['conversion_rate'])
order_average = numpy.random.normal(loc=value['order_average'], scale=5.00)
self._scenario_payoffs[key].append(ordered*order_average)
if key == show_treatment:
self._bandit_payoffs.append(ordered*order_average)
return self._scenario_payoffs[show_treatment][-1]
class SimpleBandit:
def __init__(self, treatments):
self._treatments = treatments
self._selection_count = 0
self._exploitation_count = 0
self._payouts = {treatment: 0 for treatment in treatments}
def choose_treatment(self):
self._selection_count += 1
if self._selection_count <= 5*len(self._treatments):
return self._treatments[(self._selection_count-1) / 5]
else:
self._exploitation_count += 1
if self._exploitation_count == 5:
self._exploitation_count = 0
self._selection_count = 0
return sorted(self._payouts.items(), key=lambda x: x[1], reverse=True)[0][0]
def log_payout(self, treatment, amount):
self._payouts[treatment] += amount
class RPMBandit:
def __init__(self, treatments):
self._treatments = treatments
self._payoffs = {treatment: [] for treatment in treatments}
def choose_treatment(self):
max_treatment = self._treatments[0]
max_value = float('-inf')
for key, value in self._payoffs.items():
random_numbers_from_range = np.random.binomial(len(value)+1, 1.0/(len(value)+1))
generated_data = value + [random.uniform(0,200) for i in range(random_numbers_from_range)]
sampled_mean = np.random.choice(generated_data, size=len(generated_data)).mean()
if sampled_mean > max_value:
max_treatment = key
max_value = sampled_mean
return max_treatment
def log_payout(self, treatment, payout):
self._payoffs[treatment].append(payout)
In [342]:
simulated_experiment = BanditScenario({
'A': {
'conversion_rate': .05,
'order_average': 35.00
},
'B':{
'conversion_rate': .06,
'order_average': 36.00
}
})
simple_bandit = SimpleBandit(['A', 'B'])
for visitor_i in range(1500):
treatment = simple_bandit.choose_treatment()
payout = simulated_experiment.next_visitor(treatment)
simple_bandit.log_payout(treatment, payout)
plt.title('Money made by different strategies')
plt.xlabel('Visitor #')
plt.ylabel('Total $ made')
plt.plot(np.array(simulated_experiment._scenario_payoffs['B']).cumsum(), label='Treatment B')
plt.plot(np.array(simulated_experiment._bandit_payoffs).cumsum(),label='Bandit')
plt.plot(np.array(simulated_experiment._scenario_payoffs['A']).cumsum(), label='Treatment A')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
Out[342]:
In [323]:
len(simulated_experiment._bandit_payoffs)
Out[323]:
In [354]:
plt.title('Distribution of means for N(35,5) distribution (sampling 100 vs 500 data points)')
plt.xlabel('')
plt.ylabel('Counts')
plt.hist([np.random.normal(loc=35, scale=5, size=100).mean() for i in range(2500)], label='100 sample mean')
plt.hist([np.random.normal(loc=35, scale=5, size=500).mean() for i in range(2500)], label='500 sample mean')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
''
Out[354]:
In [360]:
plt.hist([np.random.normal(loc=35, scale=5, size=100).mean() -
np.random.normal(loc=34, scale=5, size=100).mean()
for i in range(2500)], bins=30, label='100 sample mean')
''
Out[360]:
In [370]:
plt.title('Distributions of a mean of 34 and 35 with 50 samples')
plt.xlabel('')
plt.ylabel('Counts')
plt.hist([np.random.normal(loc=35, scale=5, size=50).mean()
for i in range(2500)],
bins=30, label='mean of 35', alpha=.8)
plt.hist([np.random.normal(loc=34, scale=5, size=50).mean()
for i in range(2500)],
bins=30, label='mean of 34', alpha=.8)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
''
Out[370]:
In [372]:
sum([np.random.normal(loc=34, scale=5, size=50).mean() > np.random.normal(loc=35, scale=5, size=50).mean()
for i in range(1500)])/1500.
Out[372]:
In [378]:
np.random.choice([34,32,33], size=3).mean()
Out[378]:
In [382]:
np.random.binomial(1, .8, 3)
Out[382]:
In [383]:
random
Out[383]:
In [386]:
[random.uniform(0,200) for i in range(10)]
Out[386]:
In [392]:
np.random.binomial(0, 1/7., 2)
Out[392]:
In [390]:
np.random.binomial?
In [398]:
plt.hist([better_bootstrap([1]) for i in range(5000)], bins=100)
''
Out[398]:
In [42]:
def run_bandit_sim(bandit_algorithm):
simulated_experiment = BanditScenario({
'A': {
'conversion_rate': 1,
'order_average': 35.00
},
'B':{
'conversion_rate': 1,
'order_average': 50.00
}
})
simple_bandit = bandit_algorithm
for visitor_i in range(500):
treatment = simple_bandit.choose_treatment()
payout = simulated_experiment.next_visitor(treatment)
simple_bandit.log_payout(treatment, payout)
return sum(simulated_experiment._bandit_payoffs)
simple_bandit_results = np.array([run_bandit_sim(SimpleBandit(['A', 'B'])) for i in range(300)])
rpm_bandit_results = np.array([run_bandit_sim(RPMBandit(['A', 'B'])) for i in range(300)])
print 'SimpleBandit: ' + str(mean(simple_bandit_results))
print 'RPMBandit: ' + str(mean(rpm_bandit_results))
plt.title('Payoffs of SimpleBandit vs RPMBandit')
plt.xlabel('Total Payoff')
plt.ylabel('Observations')
plt.hist(simple_bandit_results, label='SimpleBandit', alpha=.8, bins=40)
plt.hist(rpm_bandit_results, label='RPMBandit', alpha=.8, bins=40)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
''
Out[42]:
In [32]:
sum(simple_bandit_results)
Out[32]:
In [33]:
sum(rpm_bandit_results)
Out[33]:
In [34]:
sum(rpm_bandit_results)/sum(simple_bandit_results)
Out[34]:
In [ ]: