Chapter 11. Null Hypothesis Significance Testing

Exercise 11.1
Exercise 11.2
Exercise 11.3

Exercise 11.1

Purpose: To compute p values for stopping at fixed N and fixed z.

Part A

fixed N



In [1]:

    
import numpy as np
from scipy.misc import factorial



In [2]:

    
N = 45
z = 3
theta = 1/6



In [3]:

    
def binomial(theta, N, z):
    coef = factorial(N) / factorial(N-z) / factorial(z)
    p = coef * theta**z * (1 - theta)**(N-z)
    return p



In [4]:

    
tail = np.arange(z+1)
tail









    Out[4]:





array([0, 1, 2, 3])



In [5]:

    
p = binomial(theta, N, tail).sum() * 2 # left and right tail probability
p









    Out[5]:





0.089203344214784791

Part B

fixed z



In [6]:

    
right_tail = np.arange(z, N)
p_right = z / right_tail * binomial(theta, right_tail, z)
p = (1 - p_right.sum()) * 2
p









    Out[6]:





0.031262731621325646

Exercise 11.2

Purpose: To determine NHST CIs, and notice that they depend on the experimenter’s intention.



In [7]:

    
N = 45
z = 3

Part A

fixed N



In [8]:

    
left_tail = np.arange(z+1)
theta = np.arange(0.170, 0.190, 0.001)



In [9]:

    
p = map(lambda t: binomial(t, N, left_tail).sum()*2, theta)
p = list(p)



In [10]:

    
list(zip(theta, p))









    Out[10]:





[(0.17000000000000001, 0.079301003321235491),
 (0.17100000000000001, 0.076528179820395031),
 (0.17200000000000001, 0.073842447818352835),
 (0.17300000000000001, 0.071241492554731084),
 (0.17400000000000002, 0.068723042742581952),
 (0.17500000000000002, 0.066284870576604413),
 (0.17600000000000002, 0.063924791692357513),
 (0.17700000000000002, 0.061640665079151488),
 (0.17800000000000002, 0.059430392949216274),
 (0.17900000000000002, 0.05729192056566549),
 (0.18000000000000002, 0.05522323603169306),
 (0.18100000000000002, 0.053222370043359085),
 (0.18200000000000002, 0.051287395608242525),
 (0.18300000000000002, 0.049416427732159429),
 (0.18400000000000002, 0.047607623076067938),
 (0.18500000000000003, 0.045859179585204912),
 (0.18600000000000003, 0.044169336092423669),
 (0.18700000000000003, 0.042536371897628329),
 (0.18800000000000003, 0.040958606325127343),
 (0.18900000000000003, 0.039434398260657491)]



In [11]:

    
p = np.array(p)
p_idx = np.nonzero(p > 0.05)[0][-1]
theta1 = theta[p_idx]
p[p_idx], theta1









    Out[11]:





(0.051287395608242525, 0.18200000000000002)



In [12]:

    
right_tail = np.arange(z, N)
theta = np.arange(0.005, 0.020, 0.001)



In [13]:

    
p = map(lambda t: binomial(t, N, right_tail).sum()*2, theta)
p = list(p)



In [14]:

    
p = np.array(p)
p_idx = np.nonzero(p > 0.05)[0][0]
theta2 = theta[p_idx]
p[p_idx], theta2









    Out[14]:





(0.050321604252686322, 0.014000000000000002)



In [15]:

    
theta2, theta1









    Out[15]:





(0.014000000000000002, 0.18200000000000002)

Part B

fixed z



In [ ]:



In [16]:

    
theta = np.arange(0.150, 0.160, 0.001)



In [17]:

    
low_tail = np.arange(z, N)

def p_greated_than(theta):
    p_right = z / low_tail * binomial(theta, low_tail, z)
    p = (1 - p_right.sum()) * 2
    return p

p = map(p_greated_than, theta)
p = list(p)



In [18]:

    
list(zip(theta, p))









    Out[18]:





[(0.14999999999999999, 0.059952767891028103),
 (0.151, 0.057709084141494227),
 (0.152, 0.055542671846889879),
 (0.153, 0.053451171198829472),
 (0.154, 0.051432280746387393),
 (0.155, 0.049483756607446416),
 (0.156, 0.047603411654460581),
 (0.157, 0.045789114677364351),
 (0.158, 0.044038789526212208),
 (0.159, 0.042350414236001166),
 (0.16, 0.04072202013599191)]



In [19]:

    
p_idx = np.nonzero(np.array(p) > 0.05)[0][-1]
theta1 = theta[p_idx]
p[p_idx], theta1









    Out[19]:





(0.051432280746387393, 0.154)



In [20]:

    
theta = np.arange(0.005, 0.020, 0.001)
high_tail = np.arange(z+1)



In [ ]:



In [21]:

    
high_tail = np.arange(z, N+1)

def p_less_than(theta):
    p = z / high_tail * binomial(theta, high_tail, z)
    p = 2 * p.sum()
    return p

p = map(p_less_than, theta)
p = list(p)



In [22]:

    
p = np.array(p)
p_idx = np.nonzero(p > 0.05)[0][0]
theta2 = theta[p_idx]
p[p_idx], theta2









    Out[22]:





(0.05032160425268635, 0.014000000000000002)



In [23]:

    
theta2, theta1









    Out[23]:





(0.014000000000000002, 0.154)

Exercise 11.3

Purpose: To determine the p value when data collection stops at a fixed duration.



In [24]:

    
N = 45
z = 3
theta = 1/6



In [25]:

    
Ns = np.arange(40, 51)
p_N = np.ones_like(Ns) / len(Ns)



In [28]:

    
p_total = 0
for i, n in enumerate(Ns):
    # For the current `n`, determine the max z that is in the low tail:
    z_max = np.arange(0, n+1) / n
    z_max = np.nonzero(z_max <= z/N)[0][-1]
    low_tail = np.arange(0, z_max+1)
    p = 2*binomial(theta, n, low_tail).sum()
    p_total += p_N[i] * p
    print(n, p)









    



40 0.0547023780784
41 0.0476264485758
42 0.0414274505072
43 0.0360033271972
44 0.0312627316213
45 0.0892033442148
46 0.0788568062517
47 0.0696331483776
48 0.0614226953457
49 0.0541245148729
50 0.0476460575692



In [29]:

    
p_total









    Out[29]:





0.055628082055612135



In [ ]: