In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import pandas as pd
pd.options.display.mpl_style = 'default'
import numpy as np
from abtest_util import SimStream, DonationProb, EmpiricalDonationProb
from bayesian_abtest import CostABTest
from nh_abtest import NHABTest, samples_per_branch_calculator
from abstract_abtest import expected_results, expected_results_by_lift

In [2]:
#Define Ground Truth Click Through Rates
# DonationProb class
p_B = DonationProb(0.19)
p_A = DonationProb(0.20)

# Estimate Control CTR From Historical Data
n = 10000
hist_data_B = SimStream(p_B).get_next_records(n)
p_hat = EmpiricalDonationProb(hist_data_B, p_B.values)
ci =  p_hat.p_donate_ci()
print "CI over control:",  ci
interval = round(1.0/ci[0])*50
print "Evalaution Interval", interval

#interest in lift vlaues:
lifts = [-0.20, -0.10, -0.05, -0.025, -0.015, 0.015, 0.025, 0.05, 0.10, 0.20]


CI over control: (0.18033011359550957, 0.18779999999999997, 0.1952935250740197)
Evalaution Interval 300.0

In [ ]:
# Set Up Cost  AB Test

cost = 0.001
max_run = float('inf')  #this one is so clean it doesnt need a max_run arg
iters = 200

expected_results_by_lift(CostABTest,[None, None, interval, max_run, cost], iters, p_hat, lifts)


-0.2
-0.1

In [5]:
# Set Up NH  AB Test
mde = 0.05
alpha = 0.05
power = 0.95
max_run = samples_per_branch_calculator(p_hat, mde, alpha, power)
print max_run

iters = 20
expected_results_by_lift(NHABTest,[None, None, interval, max_run, alpha], iters, p_hat, lifts)


44960.7213831
-0.2
-0.1
-0.05
-0.025
-0.015
0.015
0.025
0.05
0.1
0.2
Out[5]:
% lift A over B P(Choosing A) Median P(Unknown) Median Avg Time
0 -20.0 0.00 0.00 870
1 -10.0 0.00 0.00 3000
2 -5.0 0.00 0.00 9810
3 -2.5 0.10 0.35 19380
4 -1.5 0.15 0.40 21000
5 1.5 0.65 0.35 23655
6 2.5 0.90 0.10 17190
7 5.0 0.95 0.00 17790
8 10.0 1.00 0.00 3645
9 20.0 1.00 0.00 1005