In [110]:
%matplotlib inline
import matplotlib as mpl
mpl.use('agg')
import os
# os.environ["DISPLAY"] = "localhost:11.0"
import numpy as np
import matplotlib.pyplot as plt
import sys,os
path='/'.join(os.getcwd().split('/')[:-4])
sys.path.insert(1,path)
import Utils.Util as utl
import Utils.Plots as pplt
import pandas as pd
pd.options.display.max_rows = 20;
pd.options.display.expand_frame_repr = True
from IPython.display import display
import pylab as plt
import seaborn as sns
import Scripts.KyrgysHAPH.Util as kutl
import Scripts.KyrgysHAPH.Plot as kplt
import Scripts.HLI.Kyrgyz.IBSScan.IBDScan as ibd
import Scripts.HLI.Kyrgyz.PBS as pbs
pd.options.display.max_colwidth = 2000;
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 100
sup=['AFR','EUR','EAS','SAS']
sub=['KGZ']+utl.VCF.loadPanel().set_index('super_pop').loc[['EAS','SAS']]['pop'].unique().tolist()[:-2]
eas=utl.VCF.loadPanel().set_index('super_pop').loc[['EAS']]['pop'].unique().tolist() +['KGZ'] +['YRI']
supk=['KGZ']+sup
In [108]:
A=pd.read_pickle('/home/arya/POP/KGZU+ALL/chr2.df').reset_index([2,3,4],drop=True)
In [111]:
sub
Out[111]:
In [109]:
alpha=0.
# a=A[supk][utl.polymorphix(A[sup].mean(1),1e-2/3,True)].T
# a=A[supk][A.KGZ>0.1].T
# a=A[supk][(A.AFR>0.1)&(A.EUR>0.1)&(A.EAS>0.1)&(A.SAS>0.1)&(A.KGZ>0.1)].T
a=A[eas][(A.EAS>alpha)&(A.SAS>alpha)&(A.KGZ>alpha)].T
c=pd.Series(sns.color_palette("hls",a.index.unique().size ), index= a.index.unique())
c['YRI']=(0,0,0)
plt.figure();ax=plt.gca()
for i in a.index:
utl.pcaX(a).loc[[i]].plot.scatter(x=0,y=1,label=i,ax=ax,c=c[i],s=100)
In [ ]:
path='/home/arya/POP/{}/'.format(pop)
f='noSingleton/chr{}.vcf.gz.eigenvec'.format(CHROM)
panel=pd.read_csv(path+'panel',sep='\t').set_index('sample').iloc[:,:2]
# panel=pd.read_csv('/home/arya/Kyrgyz/info/kyrgyz.panel',sep='\t').set_index('sample')['pop']
# print panel
a=pd.read_csv(path+f,sep=' ',header=None).set_index(0).join(panel).set_index(['super_pop'])#.reorder_levels([1,0]).sort_index()
c=pd.Series(sns.color_palette("hls",a.index.unique().size ), index= a.index.unique())
plt.figure(figsize=(6,6),dpi=150);ax=plt.gca()
a.groupby(level=0).apply(lambda x: x.plot.scatter(x=2,y=3,label=x.name,ax=ax,c=c[x.name]));
In [23]:
pop,CHROM='HA',22
path='/home/arya/POP/{}/'.format(pop)
f='noSingleton/chr{}.vcf.gz.eigenvec'.format(CHROM)
panel=pd.read_csv(path+'panel',sep='\t').set_index('sample').iloc[:,:2]
# panel=pd.read_csv('/home/arya/Kyrgyz/info/kyrgyz.panel',sep='\t').set_index('sample')['pop']
# print panel
a=pd.read_csv(path+f,sep=' ',header=None).set_index(0).join(panel).set_index(['super_pop'])#.reorder_levels([1,0]).sort_index()
c=pd.Series(sns.color_palette("hls",a.index.unique().size ), index= a.index.unique())
plt.figure(figsize=(6,6),dpi=150);ax=plt.gca()
a.groupby(level=0).apply(lambda x: x.plot.scatter(x=2,y=3,label=x.name,ax=ax,c=c[x.name]));
In [58]:
pop,CHROM='KGZU',22
path='/home/arya/POP/{}/'.format(pop)
f='chr{}.vcf.gz.eigenvec'.format(CHROM)
panel=pd.read_csv(path+'panel',sep='\t').set_index('sample')
panel=pd.read_csv('/home/arya/Kyrgyz/info/kyrgyz.panel',sep='\t').set_index('sample')['pop']
a=pd.read_csv(path+f,sep=' ',header=None).set_index(0).join(panel).set_index(['pop'])#.reorder_levels([1,0]).sort_index()
# a=a.loc[['AFR','KGZ','EUR','EAS','SAS']]
c=pd.Series(sns.color_palette("hls",a.index.unique().size ), index= a.index.unique())
fig,ax=plt.subplots(1,1,figsize=(6,5),dpi=150)
x,y=2,3
# for ax ,(x,y) in zip(axes,[(2,3),(2,4),(3,4)][:1]):
a.groupby(level=0).apply(lambda X: X.plot.scatter(x=x,y=y,label=X.name,ax=ax,c=c[X.name],alpha=0.995));ax.set_xlabel('PC'+str(x-1));ax.set_ylabel('PC'+str(y-1))
Out[58]:
In [52]:
pop,CHROM='KGZU+ALL',2
path='/home/arya/POP/{}/'.format(pop)
f='chr{}.vcf.gz.eigenvec'.format(CHROM)
panel=pd.read_csv(path+'panel',sep='\t').set_index('sample').iloc[:,:2]
a=pd.read_csv(path+f,sep=' ',header=None).set_index(0).join(panel).replace({'super_pop':{'Healthy':'KGZ','Sick':'KGZ'}}).set_index(['super_pop'])#.reorder_levels([1,0]).sort_index()
a=a.loc[['AFR','KGZ','EUR','EAS','SAS']]
c=pd.Series(sns.color_palette("hls",a.index.unique().size ), index= a.index.unique())
fig,ax=plt.subplots(1,1,figsize=(6,5),dpi=150)
x,y=2,3
# for ax ,(x,y) in zip(axes,[(2,3),(2,4),(3,4)][:1]):
a.groupby(level=0).apply(lambda X: X.plot.scatter(x=x,y=y,label=X.name,ax=ax,c=c[X.name],alpha=0.995));ax.set_xlabel('PC'+str(x-1));ax.set_ylabel('PC'+str(y-1))
Out[52]:
In [79]:
pop,CHROM='HA',22
path='/home/arya/POP/{}/'.format(pop)
f='beagle/noSingleton/chr{}.vcf.gz.eigenvec'.format(CHROM)
panel=pd.read_csv(path+'panel',sep='\t').set_index('sample').iloc[:,:2]
# panel=pd.read_csv('/home/arya/Kyrgyz/info/kyrgyz.panel',sep='\t').set_index('sample')['pop']
# print panel
a=pd.read_csv(path+f,sep=' ',header=None).set_index(0).join(panel).set_index(['super_pop'])#.reorder_levels([1,0]).sort_index()
c=pd.Series(sns.color_palette("hls",a.index.unique().size ), index= a.index.unique())
fig,axes=plt.subplots(1,3,figsize=(18,5),dpi=150)
x,y=2,3
for ax ,(x,y) in zip(axes,[(2,3),(2,4),(3,4)]):
a.groupby(level=0).apply(lambda X: X.plot.scatter(x=x,y=y,label=X.name,ax=ax,c=c[X.name],alpha=0.995));ax.set_xlabel('PC'+str(x-1));ax.set_ylabel('PC'+str(y-1))
In [68]:
pop,CHROM='HA',22
path='/home/arya/POP/{}/'.format(pop)
f='beagle/noSingleton/filtered/chr{}.vcf.gz.eigenvec'.format(CHROM)
panel=pd.read_csv(path+'panel',sep='\t').set_index('sample').iloc[:,:2]
# panel=pd.read_csv('/home/arya/Kyrgyz/info/kyrgyz.panel',sep='\t').set_index('sample')['pop']
# print panel
a=pd.read_csv(path+f,sep=' ',header=None).set_index(0).join(panel).set_index(['super_pop'])#.reorder_levels([1,0]).sort_index()
c=pd.Series(sns.color_palette("hls",a.index.unique().size ), index= a.index.unique())
fig,axes=plt.subplots(1,3,figsize=(18,5))
x,y=2,3
for ax ,(x,y) in zip(axes,[(2,3),(2,4),(3,4)]):
a.groupby(level=0).apply(lambda X: X.plot.scatter(x=x,y=y,label=X.name,ax=ax,c=c[X.name],alpha=0.995));ax.set_xlabel('PC'+str(x-1));ax.set_ylabel('PC'+str(y-1))
In [80]:
pop,CHROM='HA',22
path='/home/arya/POP/{}/'.format(pop)
f='beagle/filtered/chr{}.vcf.gz.eigenvec'.format(CHROM)
panel=pd.read_csv(path+'panel',sep='\t').set_index('sample').iloc[:,:2]
# panel=pd.read_csv('/home/arya/Kyrgyz/info/kyrgyz.panel',sep='\t').set_index('sample')['pop']
# print panel
a=pd.read_csv(path+f,sep=' ',header=None).set_index(0).join(panel).set_index(['super_pop'])#.reorder_levels([1,0]).sort_index()
c=pd.Series(sns.color_palette("hls",a.index.unique().size ), index= a.index.unique())
fig,axes=plt.subplots(1,3,figsize=(18,5))
x,y=2,3
for ax ,(x,y) in zip(axes,[(2,3),(2,4),(3,4)]):
a.groupby(level=0).apply(lambda X: X.plot.scatter(x=x,y=y,label=X.name,ax=ax,c=c[X.name],alpha=0.995));ax.set_xlabel('PC'+str(x-1));ax.set_ylabel('PC'+str(y-1))
In [120]:
path='/home/arya/POP/ASIA/merge/beagle/poly/'
f='chr22.vcf.gz.eigenvec'
panel=pd.read_csv(path+'panel',sep='\t').set_index('sample').iloc[:,:2]
a=pd.read_csv(path+f,sep=' ',header=None).set_index(0).join(panel).replace({'super_pop':{'Healthy':'KGZ','Sick':'KGZ'}})
.set_index(['super_pop'])#.reorder_levels([1,0]).sort_index()
a
# a.iloc[:,1:3].plot.scatter(x=x,y=y)
# a=a.loc[['AFR','KGZ','EUR','EAS','SAS']]
# c=pd.Series(sns.color_palette("hls",a.index.unique().size ), index= a.index.unique())
# a
# fig,ax=plt.subplots(1,1,figsize=(6,5),dpi=100)
# x,y=2,3
# # for ax ,(x,y) in zip(axes,[(2,3),(2,4),(3,4)][:1]):
# a.groupby(level=0).apply(lambda X: X.plot.scatter(x=x,y=y,label=X.name,ax=ax,c=c[X.name],alpha=0.995));ax.set_xlabel('PC'+str(x-1));ax.set_ylabel('PC'+str(y-1))
Out[120]: