In [52]:
%pylab inline
from myfisher import fisherTestVec
import statsmodels.api as sm
In [64]:
df = pd.read_csv(DATA_PATH + 'adevent.csv')
eCols = ['E%d' % i for i in range(1,29)]
df.loc[:,eCols] = df.loc[:,eCols].applymap(lambda s: 1. if s=='event' else 0.).astype(float)
N = df.shape[0]
print N
In [65]:
aInd = df.group == 'A'
bInd = df.group == 'B'
a = array([df[col].loc[bInd].sum() for col in eCols])
b = array([df[col].loc[aInd].sum() for col in eCols])
c = N - a
d = N - b
OR, p = fisherTestVec(a, b, c, d, alternative='greater')
resDf = pd.DataFrame(dict(OR=OR,p=p),index=eCols)
Out[65]:
In [62]:
h, resDf['bonferroni'], _, _ = sm.stats.multipletests(resDf['p'], method='bonferroni')
h, resDf['holm'], _, _ = sm.stats.multipletests(resDf['p'], method='holm')
h, resDf['fdr_bh'], _, _ = sm.stats.multipletests(resDf['p'], method='fdr_bh')
In [63]:
resDf = resDf.sort('p')
resDf.head()
Out[63]: