In [2]:
%matplotlib inline
import matplotlib as mpl
mpl.use('agg')
import sys;sys.path.insert(1,'/home/arya/workspace/bio/')
import matplotlib as mpl
mpl.use('Agg')
import os
# os.environ["DISPLAY"] = "localhost:11.0"
import numpy as np
import matplotlib.pyplot as plt
import sys,os
path='/'.join(os.getcwd().split('/')[:-4])
sys.path.insert(1,path)
import Utils.Util as utl
import Utils.Plots as pplt
import pandas as pd
pd.options.display.max_rows = 20;
pd.options.display.expand_frame_repr = True
from IPython.display import display
import seaborn as sns
import Scripts.KyrgysHAPH.Util as kutl
import Scripts.KyrgysHAPH.Plot as kplt
import Scripts.HLI.Kyrgyz.IBSScan.IBDScan as ibd
import Scripts.HLI.Kyrgyz.PBS as pbs
from glob import glob
pd.options.display.max_colwidth = 2000;
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 100
i=pd.Series({'CHROM':22,'start':20000000,'end':20001000})
In [180]:
m=pd.DataFrame(None,index=a.index,columns=a.index)
for i in a.index:
for j in a.index:
m.loc[i,j]= (a.loc[i]-a.loc[j]).abs().sum()
m=1-m/float(a.shape[1])
m=m.applymap(lambda x: np.round(x,2))
m=m.T
# utl.augmentIndex(m,path='/home/arya/POP/HA/')
sns.heatmap(m)
Out[180]:
In [128]:
pops=['Healthy','HAPH','CHB']
a=utl.gz.GT('/home/arya/POP/KGZU/chrM.vcf.gz',coding='dominant').T
f=lambda a: pd.Series(a.values.reshape(-1)).value_counts()
p1,p2,p3=kutl.ID(pops[0]).astype(str),kutl.ID(pops[1]).astype(str),kutl.ID(pops[2],maxn=20).astype(str)
p=pd.concat([p1,p2]).astype(int)
a=a.loc[p].T
a=a[a.mean(1)>0].T
X=utl.pcaX(a,5)
x,y=0,1
X.loc[p1.astype(int)].plot.scatter(x=x,y=y,color='b')
X.loc[p2.astype(int)].plot.scatter(x=x,y=y,color='r',ax=plt.gca())
Out[128]:
In [111]:
POPS=[('Healthy','Sick'),('No-HAPH','HAPH'),('Healthy','HAPH')]
a=pd.concat(map(lambda pops: pd.read_pickle('/home/arya/POP/KGZU/chrM.{}.{}.Fisher.df'.format(pops[0],pops[1])).apply(np.log10).abs(),POPS),1)
a.columns=POPS
pplt.Manhattan(a);
In [192]:
pops=['Healthy','HAPH','CHB']
a=utl.gz.GT('/home/arya/POP/KGZU+ALL/chrM.vcf.gz',coding='dominant').T
f=lambda a: pd.Series(a.values.reshape(-1)).value_counts()
p1,p2,p3=kutl.ID(pops[0]).astype(str),kutl.ID(pops[1]).astype(str),kutl.ID(pops[2],maxn=20).astype(str)
p=pd.concat([p1,p2,p3])
a=a.loc[p].T
a=a[a.mean(1)>0].T
X=utl.pcaX(a,4)
X.columns=map(lambda x: 'PC'+str(x+1),X.columns)
X['pop']=''
X.loc[p1,'pop']=pops[0]
X.loc[p2,'pop']=pops[1]
X.loc[p3,'pop']=pops[2]
sns.pairplot(data=X,hue='pop',palette={pops[0]:'b',pops[1]:'r',pops[2]:'g'})
Out[192]:
In [197]:
pops=['Healthy','HAPH','CHB','CEU']
a=utl.gz.GT('/home/arya/POP/KGZU+ALL/chrM.vcf.gz',coding='dominant').T
f=lambda a: pd.Series(a.values.reshape(-1)).value_counts()
maxn=200
p1,p2,p3,p4=kutl.ID(pops[0]).astype(str),kutl.ID(pops[1]).astype(str),kutl.ID(pops[2],maxn=maxn).astype(str),kutl.ID(pops[3],maxn=maxn).astype(str)
p=pd.concat([p1,p2,p3,p4])
a=a.loc[p].T
a=a[a.mean(1)>0].T
X=utl.pcaX(a,4)
X.columns=map(lambda x: 'PC'+str(x+1),X.columns)
X['pop']=''
X.loc[p1,'pop']=pops[0]
X.loc[p2,'pop']=pops[1]
X.loc[p3,'pop']=pops[2]
X.loc[p4,'pop']=pops[3]
sns.pairplot(data=X,hue='pop',palette={pops[0]:'b',pops[1]:'r',pops[2]:'g',pops[3]:'k'})
a
Out[197]:
In [193]:
pops=['Healthy','HAPH','CHB']
a=utl.gz.GT('/home/arya/POP/KGZU+ALL/chrM.vcf.gz',coding='dominant').T
f=lambda a: pd.Series(a.values.reshape(-1)).value_counts()
p1,p2,p3=kutl.ID(pops[0]).astype(str),kutl.ID(pops[1]).astype(str),kutl.ID(pops[2],maxn=20).astype(str)
p=pd.concat([p1,p2])
a=a.loc[p].T
a=a[a.mean(1)>0].T
X=utl.pcaX(a,4)
X.columns=map(lambda x: 'PC'+str(x+1),X.columns)
X['pop']=''
X.loc[p1,'pop']=pops[0]
X.loc[p2,'pop']=pops[1]
sns.pairplot(data=X,hue='pop',palette={pops[0]:'b',pops[1]:'r',pops[2]:'g'})
Out[193]: