In [70]:
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import sys,os
sys.path.insert(1,'/home/arya/workspace/bio')
import Utils.Util as utl
import pandas as pd
pd.options.display.max_rows = 20;
pd.options.display.expand_frame_repr = True
from IPython.display import display
import seaborn as sns
import Utils.Util as utl
import Utils.Plots as pplt
import Scripts.KyrgysHAPH.Plot as kplt
import Scripts.KyrgysHAPH.Util as kutl
diff=lambda a: a['No-HAPH']-a['HAPH']
filterGap=lambda a: a.groupby(level=0,axis=1).apply(lambda x: utl.filterGap2(x,pad=x.name/2*1000)).T.reset_index(level=0,drop=True).T
# none=pd.Series(None,index=a.index.levels[3])
# filterSNPN=lambda a: a.groupby(level=0).apply(lambda b: b.loc[b.name].stack(level=[0,1]).reorder_levels([0,1,3,4,2]).groupby(level=range(4)).apply(lambda x: (none,x.loc[x.name])[x.loc[x.name].m>b.name/5]).unstack(level=[2,3]))
scanXPSFS=diff(pd.read_pickle(kutl.path+'scan/XP.SFS.df')['No-HAPH vs HAPH'])
scanXPSFS= filterGap(scanXPSFS.unstack(level=[0,3]))
# xp=filterGap(diff(pd.read_pickle(kutl.path+'scan/XP.SFS.df')['No-HAPH vs HAPH']))
regions=[]
In [71]:
method='D'
print method + 'for No-HAPH vs HAPH'
a=-scanXPSFS.xs(method,level=1,axis=1)[50]
pplt.Manhattan(a,top_k=100);
o=a.sort_values(ascending=False).iloc[:100]
o=utl.BED.getIntervals(o,padding=50000).sort_values('score',ascending=False).reset_index()
print 'Top 100 regions {} intervals'.format(o.shape[0])
display(o)
kplt.plotSFS(interval=o.iloc[0])
regions+=[pd.concat([o],keys=[method])]
In [72]:
method='H'
print method + 'for No-HAPH vs HAPH'
a=-scanXPSFS.xs(method,level=1,axis=1)[50]
pplt.Manhattan(a,top_k=100);
o=a.sort_values(ascending=False).iloc[:100]
o=utl.BED.getIntervals(o,padding=50000).sort_values('score',ascending=False).reset_index()
print 'Top 100 regions {} intervals'.format(o.shape[0])
display(o)
kplt.plotSFS(interval=o.iloc[0])
regions+=[pd.concat([o],keys=[method])]
In [73]:
method='SFSelect'
print method + 'for No-HAPH vs HAPH'
a=scanXPSFS.xs(method,level=1,axis=1)[50]
pplt.Manhattan(a,top_k=100);
o=a.sort_values(ascending=False).iloc[:100]
o=utl.BED.getIntervals(o,padding=50000).sort_values('score',ascending=False).reset_index()
print 'Top 100 regions {} intervals'.format(o.shape[0])
display(o)
kplt.plotSFS(interval=o.iloc[0])
regions+=[pd.concat([o],keys=[method])]
In [78]:
pd.read_pickle(kutl.path+'scan/XP.SFS.df')
Out[78]: