In [14]:
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import sys,os
path='/'.join(os.getcwd().split('/')[:-4])
sys.path.insert(1,path)
import Utils.Util as utl
import Utils.Plots as pplt
import pandas as pd
pd.options.display.max_rows = 20;
pd.options.display.expand_frame_repr = True
from IPython.display import display
import pylab as plt
import seaborn as sns
In [60]:
import Utils.Plots as pplt
reload(pplt)
a=pd.read_excel('~/storage/Data/Human/Andes/info/FROM_Haddad_original.xlsx').rename(columns={'CMS/Non-CMS':'y'}).set_index('y').iloc[:,[3,4]]#.set_index('CMS/Non-CMS')
a
sns.pairplot(a.reset_index(),hue='y',size=5)
plt.figure()
a.groupby('y').size().plot.bar()
Out[60]:
In [ ]:
In [97]:
f=lambda x: ((x['CMS/Non-CMS']=='CMS')& (x.Hct>60)) | ((x['CMS/Non-CMS']=='Non-CMS')& (x.Hct<60))
id=pd.read_excel('~/storage/Data/Human/Andes/info/Haddad_sample_info_20151030_emily.xlsx').iloc[:,[1,2]].dropna().applymap(utl.INT).set_index('Subject ID').iloc[:,0].rename('sample')
b=pd.read_excel('~/storage/Data/Human/Andes/info/FROM_Haddad_original.xlsx').set_index('SampleID').join(id)
i= f(b)
b=b[i].reset_index()[['SampleID','CMS/Non-CMS','Gender','sample']]
b['super_pop']='AND'
b['pop']=b['CMS/Non-CMS'].replace({'CMS':'SIK','Non-CMS':'HLT'})
b['gender']=b.Gender.replace({'M':'male','F':'female'})
b=b[['sample','pop','super_pop','gender']]
b
b.to_csv('/home/arya/storage/Data/Human/Andes/Andean_HLI_BAM_VCF/hg19/snp/noINFO/byChr/norm/merge/PASS/noChr/panel.filtered',index=False,sep='\t')
id.loc[i[~i].index].to_csv('/home/arya/storage/Data/Human/Andes/Andean_HLI_BAM_VCF/hg19/snp/noINFO/byChr/norm/merge/PASS/noChr/filter',index=False,sep='\t')
# pd.read_excel('~/storage/Data/Human/Andes/info/FROM_Haddad_original.xlsx')
In [86]:
Out[86]:
In [59]:
I=((a.index=='CMS')& (a.Hct>60)) | ((a.index=='Non-CMS')& (a.Hct<60))
sns.pairplot(a[I].reset_index(),hue='y',size=5)
# print I.sum()
(~I).groupby(level=0).sum()
a[I]
# a.loc[['non-CMS']].Hct<60
a[I]
Out[59]: