Chapter 11. Null Hypothesis Significance Testing

Exercise 11.1

Purpose: To compute p values for stopping at fixed N and fixed z.

Part A

fixed N


In [1]:
import numpy as np
from scipy.misc import factorial

In [2]:
N = 45
z = 3
theta = 1/6

In [3]:
def binomial(theta, N, z):
    coef = factorial(N) / factorial(N-z) / factorial(z)
    p = coef * theta**z * (1 - theta)**(N-z)
    return p

In [4]:
tail = np.arange(z+1)
tail


Out[4]:
array([0, 1, 2, 3])

In [5]:
p = binomial(theta, N, tail).sum() * 2 # left and right tail probability
p


Out[5]:
0.089203344214784791

Part B

fixed z


In [6]:
right_tail = np.arange(z, N)
p_right = z / right_tail * binomial(theta, right_tail, z)
p = (1 - p_right.sum()) * 2
p


Out[6]:
0.031262731621325646

Exercise 11.2

Purpose: To determine NHST CIs, and notice that they depend on the experimenter’s intention.


In [7]:
N = 45
z = 3

Part A

fixed N


In [8]:
left_tail = np.arange(z+1)
theta = np.arange(0.170, 0.190, 0.001)

In [9]:
p = map(lambda t: binomial(t, N, left_tail).sum()*2, theta)
p = list(p)

In [10]:
list(zip(theta, p))


Out[10]:
[(0.17000000000000001, 0.079301003321235491),
 (0.17100000000000001, 0.076528179820395031),
 (0.17200000000000001, 0.073842447818352835),
 (0.17300000000000001, 0.071241492554731084),
 (0.17400000000000002, 0.068723042742581952),
 (0.17500000000000002, 0.066284870576604413),
 (0.17600000000000002, 0.063924791692357513),
 (0.17700000000000002, 0.061640665079151488),
 (0.17800000000000002, 0.059430392949216274),
 (0.17900000000000002, 0.05729192056566549),
 (0.18000000000000002, 0.05522323603169306),
 (0.18100000000000002, 0.053222370043359085),
 (0.18200000000000002, 0.051287395608242525),
 (0.18300000000000002, 0.049416427732159429),
 (0.18400000000000002, 0.047607623076067938),
 (0.18500000000000003, 0.045859179585204912),
 (0.18600000000000003, 0.044169336092423669),
 (0.18700000000000003, 0.042536371897628329),
 (0.18800000000000003, 0.040958606325127343),
 (0.18900000000000003, 0.039434398260657491)]

In [11]:
p = np.array(p)
p_idx = np.nonzero(p > 0.05)[0][-1]
theta1 = theta[p_idx]
p[p_idx], theta1


Out[11]:
(0.051287395608242525, 0.18200000000000002)

In [12]:
right_tail = np.arange(z, N)
theta = np.arange(0.005, 0.020, 0.001)

In [13]:
p = map(lambda t: binomial(t, N, right_tail).sum()*2, theta)
p = list(p)

In [14]:
p = np.array(p)
p_idx = np.nonzero(p > 0.05)[0][0]
theta2 = theta[p_idx]
p[p_idx], theta2


Out[14]:
(0.050321604252686322, 0.014000000000000002)

In [15]:
theta2, theta1


Out[15]:
(0.014000000000000002, 0.18200000000000002)

Part B

fixed z


In [ ]:


In [16]:
theta = np.arange(0.150, 0.160, 0.001)

In [17]:
low_tail = np.arange(z, N)

def p_greated_than(theta):
    p_right = z / low_tail * binomial(theta, low_tail, z)
    p = (1 - p_right.sum()) * 2
    return p

p = map(p_greated_than, theta)
p = list(p)

In [18]:
list(zip(theta, p))


Out[18]:
[(0.14999999999999999, 0.059952767891028103),
 (0.151, 0.057709084141494227),
 (0.152, 0.055542671846889879),
 (0.153, 0.053451171198829472),
 (0.154, 0.051432280746387393),
 (0.155, 0.049483756607446416),
 (0.156, 0.047603411654460581),
 (0.157, 0.045789114677364351),
 (0.158, 0.044038789526212208),
 (0.159, 0.042350414236001166),
 (0.16, 0.04072202013599191)]

In [19]:
p_idx = np.nonzero(np.array(p) > 0.05)[0][-1]
theta1 = theta[p_idx]
p[p_idx], theta1


Out[19]:
(0.051432280746387393, 0.154)

In [20]:
theta = np.arange(0.005, 0.020, 0.001)
high_tail = np.arange(z+1)

In [ ]:


In [21]:
high_tail = np.arange(z, N+1)

def p_less_than(theta):
    p = z / high_tail * binomial(theta, high_tail, z)
    p = 2 * p.sum()
    return p

p = map(p_less_than, theta)
p = list(p)

In [22]:
p = np.array(p)
p_idx = np.nonzero(p > 0.05)[0][0]
theta2 = theta[p_idx]
p[p_idx], theta2


Out[22]:
(0.05032160425268635, 0.014000000000000002)

In [23]:
theta2, theta1


Out[23]:
(0.014000000000000002, 0.154)

Exercise 11.3

Purpose: To determine the p value when data collection stops at a fixed duration.


In [24]:
N = 45
z = 3
theta = 1/6

In [25]:
Ns = np.arange(40, 51)
p_N = np.ones_like(Ns) / len(Ns)

In [28]:
p_total = 0
for i, n in enumerate(Ns):
    # For the current `n`, determine the max z that is in the low tail:
    z_max = np.arange(0, n+1) / n
    z_max = np.nonzero(z_max <= z/N)[0][-1]
    low_tail = np.arange(0, z_max+1)
    p = 2*binomial(theta, n, low_tail).sum()
    p_total += p_N[i] * p
    print(n, p)


40 0.0547023780784
41 0.0476264485758
42 0.0414274505072
43 0.0360033271972
44 0.0312627316213
45 0.0892033442148
46 0.0788568062517
47 0.0696331483776
48 0.0614226953457
49 0.0541245148729
50 0.0476460575692

In [29]:
p_total


Out[29]:
0.055628082055612135

In [ ]: