In [52]:
%pylab inline
from myfisher import fisherTestVec
import statsmodels.api as sm


Populating the interactive namespace from numpy and matplotlib

In [64]:
df = pd.read_csv(DATA_PATH + 'adevent.csv')
eCols = ['E%d' % i for i in range(1,29)]
df.loc[:,eCols] = df.loc[:,eCols].applymap(lambda s: 1. if s=='event' else 0.).astype(float)
N = df.shape[0]
print N


160

In [65]:
aInd = df.group == 'A'
bInd = df.group == 'B'

a = array([df[col].loc[bInd].sum() for col in eCols])
b = array([df[col].loc[aInd].sum() for col in eCols])
c = N - a
d = N - b

OR, p = fisherTestVec(a, b, c, d, alternative='greater')
resDf = pd.DataFrame(dict(OR=OR,p=p),index=eCols)


Out[65]:
80

In [62]:
h, resDf['bonferroni'], _, _ = sm.stats.multipletests(resDf['p'], method='bonferroni')
h, resDf['holm'], _, _ = sm.stats.multipletests(resDf['p'], method='holm')
h, resDf['fdr_bh'], _, _ = sm.stats.multipletests(resDf['p'], method='fdr_bh')

In [63]:
resDf = resDf.sort('p')
resDf.head()


Out[63]:
OR p bonferroni holm fdr_bh
E1 3.518519 0.001390 0.038927 0.038927 0.038927
E8 inf 0.030275 0.847700 0.817425 0.423850
E6 inf 0.061326 1.000000 1.000000 0.572379
E5 2.548387 0.223967 1.000000 1.000000 1.000000
E15 inf 0.249216 1.000000 1.000000 1.000000