In [3]:
%matplotlib inline
import matplotlib as mpl
mpl.use('agg')
import sys;sys.path.insert(1,'/home/arya/workspace/bio/')
import os
import numpy as np
import matplotlib.pyplot as plt
import sys,os
path='/'.join(os.getcwd().split('/')[:-4])
sys.path.insert(1,path)
import Utils.Util as utl
import Utils.Plots as pplt
import pandas as pd
pd.options.display.max_rows = 20;
pd.options.display.expand_frame_repr = True
from IPython.display import display
import seaborn as sns
import Scripts.KyrgysHAPH.Util as kutl
import Scripts.KyrgysHAPH.Plot as kplt
import Scripts.HLI.Kyrgyz.IBSScan.IBDScan as ibd
import Scripts.HLI.Kyrgyz.PBS as pbs
from glob import glob
pd.options.display.max_colwidth = 2000;
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 100
path='/home/arya/storage/Data/Human/scan/selscan/'
pathb='/home/arya/storage/Data/Human/CDTS/percentile/quantile/hg19/'
a=pd.read_pickle(path+'EUR.df')[('ihs','EUR','NA')].dropna().rename('EU')
a=((a.rank()/a.size) *100).apply(np.ceil).astype(int)
one= lambda x: pd.read_csv(pathb+x,sep='\t',header=None,names=['CHROM','start','end']).applymap(utl.INT)
pad=1000;I.start-=pad;I.end+=pad
I=pd.concat(map(one,['AFR.low.EU.high.bed','AFR.low.EU.high.bed']),keys=[0,1]);I['len']=I.end-I.start
SFS=pd.read_pickle('/home/arya/Kyrgyz/scan/SFS/XP0.df').loc[50]
load=lambda x: pd.read_pickle(path+'{}.df'.format(x)).apply(lambda a: ((a.dropna().rank()/a.dropna().size) *100).apply(np.ceil).astype(int) )
d=SFS.xs('D',0,2).apply(lambda a: ((a.rank()/a.size) *100).apply(np.ceil).astype(int) )
eu=load('EUR')
af=load('AFR')
In [15]:
SFS.xs('m',0,2)[['AFR','EUR']]
Out[15]:
In [14]:
(SFS.xs('m',0,2)[['AFR','EUR']].rank()/SFS.shape[0]*100).apply(np.ceil).astype(int)
# .drop_duplicates().plot.scatter(x='AFR',y='EUR')
Out[14]:
In [10]:
SFS.xs('D',0,2)[['AFR','EUR']].drop_duplicates().plot.scatter(x='AFR',y='EUR')
Out[10]:
In [236]:
for stat in SFS.index.levels[2]:
break
d=SFS.xs(stat,0,2)[['AFR','EUR']].dropna().apply(lambda a: ((a.dropna().rank()/a.dropna().size) *100).apply(np.ceil).astype(int) )
x=pd.concat([d.loc[i.CHROM].loc[utl.roundto(i.start-5000,10000)+5000].rename(x) for x,i in I.iterrows()],1).T
d=d.applymap(lambda x: utl.roundto(x,5));d['a']=1
d=d.set_index(['AFR','EUR']).groupby(level=[0,1]).sum().unstack('AFR').sort_index(ascending=False)['a']
fig,ax=plt.subplots(1,2,figsize=(12,5))
d[d==d.max().max()]=None
sns.heatmap(d.applymap(np.log),ax=ax[0])
x.loc[0].plot.scatter(x='AFR',y='EUR',c='r',ax=ax[1],label='AFR=1, EUR=100 (CDTS %ile)');
x.loc[1].plot.scatter(x='AFR',y='EUR',c='b',ax=ax[1],label='AFR=100, EUR=1');
plt.xlim([0,102]);plt.ylim([0,102])
plt.suptitle(stat)
In [38]:
a=pd.read_csv('/home/arya/a.tsv',sep='\t',header=None,names=['CHROM','POS','CDTS']).set_index(['CHROM','POS']).CDTS
b=utl.scanGenome(a,winSize=1000,nsteps=5)
pplt.Manhattan(a);
pplt.Manhattan(b);
In [41]:
c=utl.scanGenome(a,winSize=2000,nsteps=5)
pplt.Manhattan(c);