$H_0$: two populations have the same mean
$p$-value: under $H_0$ what's the probability of observing the difference we observed?
In [1]:
import numpy as np
In [153]:
def bootstrap_p(B: np.ndarray,
N: np.ndarray,
n_bootstraps: int=int(1e4)) -> float:
obs_diff = np.abs(B.mean() - N.mean())
U = np.concatenate([B, N])
Bs = np.random.choice(U, (n, n_bootstraps), replace=True)
Ns = np.random.choice(U, (n, n_bootstraps), replace=True)
diff = np.abs(Bs.mean(axis=0) - Ns.mean(axis=0))
assert len(diff) == n_bootstraps
return np.sum(diff > obs_diff) / n_bootstraps
In [154]:
bootstrap_p(10 + np.random.randn(n),
5 + np.random.randn(n))
Out[154]:
In [155]:
bootstrap_p(10 + np.random.randn(n),
10 + np.random.randn(n))
Out[155]:
In [156]:
bootstrap_p(11 + np.random.randn(n),
10 + np.random.randn(n))
Out[156]:
In [175]:
bootstrap_p(10.1 + np.random.randn(n),
10 + np.random.randn(n))
Out[175]:
In [164]:
bootstrap_p(10 + .1e-5 * np.random.randn(n),
10 + .1e-5 * np.random.randn(n))
Out[164]:
In [ ]: