notebook.community

Edit and run



In [52]:

    
%pylab inline
from myfisher import fisherTestVec
import statsmodels.api as sm









    



Populating the interactive namespace from numpy and matplotlib



In [64]:

    
df = pd.read_csv(DATA_PATH + 'adevent.csv')
eCols = ['E%d' % i for i in range(1,29)]
df.loc[:,eCols] = df.loc[:,eCols].applymap(lambda s: 1. if s=='event' else 0.).astype(float)
N = df.shape[0]
print N



In [65]:

    
aInd = df.group == 'A'
bInd = df.group == 'B'

a = array([df[col].loc[bInd].sum() for col in eCols])
b = array([df[col].loc[aInd].sum() for col in eCols])
c = N - a
d = N - b

OR, p = fisherTestVec(a, b, c, d, alternative='greater')
resDf = pd.DataFrame(dict(OR=OR,p=p),index=eCols)









    Out[65]:





80



In [62]:

    
h, resDf['bonferroni'], _, _ = sm.stats.multipletests(resDf['p'], method='bonferroni')
h, resDf['holm'], _, _ = sm.stats.multipletests(resDf['p'], method='holm')
h, resDf['fdr_bh'], _, _ = sm.stats.multipletests(resDf['p'], method='fdr_bh')



In [63]:

    
resDf = resDf.sort('p')
resDf.head()

	OR	p	bonferroni	holm	fdr_bh
E1	3.518519	0.001390	0.038927	0.038927	0.038927
E8	inf	0.030275	0.847700	0.817425	0.423850
E6	inf	0.061326	1.000000	1.000000	0.572379
E5	2.548387	0.223967	1.000000	1.000000	1.000000
E15	inf	0.249216	1.000000	1.000000	1.000000