In [3]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
# import rpy2
from scipy.stats import beta, combine_pvalues
from statsmodels.sandbox.stats.multicomp import multipletests
%matplotlib inline
plt.rcParams['figure.figsize'] = (12.0, 10.0)
In [4]:
total_null = 500
total_alt = 500
rejected_null = total_null * 0.05
rejected_alt = total_alt * 0.95
In [5]:
hypothesis_df = pd.DataFrame({'null hypotheses': [total_null, total_null - rejected_null, 0],
'rejected nulls': [0, rejected_null, rejected_null],
'alt hypotheses': [total_alt, total_alt - rejected_alt, 0],
'rejected alts': [0, rejected_alt, rejected_alt]},
index=['population of hypotheses',
'rejected_hypotheses',
'only rejected hypotheses'])
hypothesis_df = hypothesis_df[['null hypotheses', 'rejected nulls', 'alt hypotheses', 'rejected alts']]
def plot_hyp_df():
hypothesis_df.plot(kind='bar', stacked=True, color=['lightsteelblue', 'darkblue',
'wheat',
'orange'],
title="When we are halfway right: 95% of rejected hypotheses are alt" )
plt.show()
In [6]:
plot_hyp_df()
In [7]:
total_null = 950
total_alt = 50
rejected_null = total_null * 0.05
rejected_alt = total_alt * 0.95
In [8]:
hypothesis_df = pd.DataFrame({'null hypotheses': [total_null, total_null - rejected_null, 0],
'rejected nulls': [0, rejected_null, rejected_null],
'alt hypotheses': [total_alt, total_alt - rejected_alt, 0],
'rejected alts': [0, rejected_alt, rejected_alt]},
index=['population of hypotheses',
'rejected_hypotheses',
'only rejected hypotheses'])
hypothesis_df = hypothesis_df[['null hypotheses', 'rejected nulls', 'alt hypotheses', 'rejected alts']]
def plot_hyp_df_again():
hypothesis_df.plot(kind='bar', stacked=True, color=['lightsteelblue', 'darkblue', 'wheat',
'orange'],
title="When we're mostly wrong: 50% of rejected hypothese are alt")
plt.show()
In [9]:
plot_hyp_df_again()
In [55]:
a = 1
b = 100
x = np.arange(0, 1, 0.001)
null_distr = np.ones(1000)
alt_distr = beta(a=a, b=b).pdf(x)
In [56]:
def plot_our_p_distrs():
plt.plot(x, null_distr)
plt.plot(x, alt_distr)
plt.legend(['null_distr', 'alt_distr'])
plt.ylim((0, 15))
plt.title('Null Versus Alternative Hypothesis P Values')
plt.show()
In [57]:
plot_our_p_distrs()
In [58]:
np.random.seed(118943579)
But first, let's assume that 90% of the hypotheses we are testing are null.
In [59]:
total_null = 900
total_alt = 100
null_pulls = np.random.random(total_null)
alt_pulls = np.random.beta(a=a, b=b, size=total_alt)
In [1]:
def plot_sim():
plt.hist([null_pulls, alt_pulls], bins=20,
stacked=True)
plt.title("Simulated P values")
plt.legend(['null_pulls', 'alt_pulls'])
plt.show()
In [65]:
plot_sim()
In [66]:
def plot_sim_gray():
plt.hist(np.concatenate([null_pulls, alt_pulls]), bins=20, color='gray')
plt.title("But if we don't know which are which?")
plt.show()
In [67]:
plot_sim_gray()
In [68]:
null_pulls_rejected = null_pulls[null_pulls <= 0.05]
alt_pulls_rejected = alt_pulls[alt_pulls <= 0.05]
In [124]:
def classic_hyp_hist():
plt.hist([null_pulls_rejected, alt_pulls_rejected], stacked=True)
plt.legend(['null_pulls: {}'.format(len(null_pulls_rejected)),
'alt_pulls: {}'.format(len(alt_pulls_rejected))])
percent_alt = int(alt_pulls_rejected.shape[0] / (alt_pulls_rejected.shape[0] + null_pulls_rejected.shape[0]) * 100)
plt.title('We End up Getting {percent_alt}% Alt Hypotheses'.format(percent_alt=percent_alt))
plt.show()
In [125]:
classic_hyp_hist()
The Bonferroni correction controls FWER by setting $\alpha = \frac{0.05}{k}$ where $k$ is the number of hypotheses we're testing. In our case, this is $0.05 / 1000 = 0.00005$
In [72]:
print(alt_pulls[alt_pulls <= 0.05 / 1000])
print(null_pulls[null_pulls <= 0.05 / 1000])
In [73]:
print(alt_pulls[alt_pulls <= 0.10 / 1000])
print(null_pulls[null_pulls <= 0.10 / 1000])
But somehow this is unimpressive
In [76]:
def plot_hyp_df_fdr():
hypothesis_df.plot(kind='bar', stacked=True, color=['lightsteelblue', 'darkblue', 'wheat',
'orange'],
title="Let's control the proportion of orange on the right")
plt.show()
In [77]:
plot_hyp_df_fdr()
We will accept this procedure as magic, but for those of you who are curious, here's a link to the proof: https://statweb.stanford.edu/~candes/stats300c/Lectures/Lecture7.pdf
In [120]:
alpha = 0.10
null_pulls.sort()
alt_pulls.sort()
p_values_df = pd.DataFrame({'p': np.concatenate([null_pulls, alt_pulls]),
'case': ['null'] * total_null + ['alt'] * total_alt}).sort_values('p', ascending=True)
n = p_values_df.shape[0]
p_values_df['adjusted_alpha'] = [alpha * (k + 1) / n for k in range(n)]
p_values_df['k'] = range(1, n + 1)
p_values_to_reject = multipletests(p_values_df.p.values,
alpha=alpha,
method='fdr_bh')[0]
p_values_rejected = p_values_df.loc[p_values_to_reject, :].reset_index(drop=True)
def plot_stepup():
ax = plt.subplot(111)
p_values_df.plot(x='k', y='p', kind='scatter', marker='.', ax=ax)
p_values_df.plot(x='k', y='adjusted_alpha', ax=ax, color='red')
ax.set_yscale('log')
plt.title('Step-Up Procedure: Find the last time a blue point is under red line')
plt.xlabel('kth smallest p value')
plt.ylabel('Log of p-value')
plt.show()
In [121]:
plot_stepup()
In [2]:
print(p_values_rejected)
In [128]:
max_p_fdr = max(p_values_df.loc[p_values_to_reject, 'p'])
def plot_stepup_hist():
null_pulls_rejected = null_pulls[null_pulls <= max_p_fdr]
alt_pulls_rejected = alt_pulls[alt_pulls <= max_p_fdr]
plt.hist([null_pulls_rejected, alt_pulls_rejected], stacked=True)
plt.legend(['null_pulls: {}'.format(len(null_pulls_rejected)),
'alt_pulls: {}'.format(len(alt_pulls_rejected))])
percent_alt = int(alt_pulls_rejected.shape[0] / (alt_pulls_rejected.shape[0] + null_pulls_rejected.shape[0]) * 100)
plt.title('We End up Getting {percent_alt}% Alt Hypotheses'.format(percent_alt=percent_alt))
plt.show()
In [129]:
plot_stepup_hist()
In [148]:
def plot_all_criteria(xmax=0.10, bins=500):
plt.hist([null_pulls, alt_pulls], bins=bins,
stacked=True)
plt.xlim(-0.001, xmax)
plt.title("Lines are Bonferroni, Step-Up, and Classic Alpha")
plt.legend(['null_pulls', 'alt_pulls'])
plt.axvline(0.05, color='red')
plt.axvline(max_p_fdr, color='pink')
plt.axvline(0.05 / 1000, color='purple')
plt.show()
In [4]:
plot_all_criteria(xmax=1.0, bins=40)
In [149]:
plot_all_criteria(xmax=0.10)
For the negatively correlated case, we have to reduce our p values further.
Bonferroni adjustment covers all cases.
Of course, there are lots of other ways to deal with this problem.
Feel free to try different methods until you get the results you want! (joke)
In [28]:
help(multipletests)