notebook.community

Edit and run



In [14]:

    
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import sys,os
path='/'.join(os.getcwd().split('/')[:-4])
sys.path.insert(1,path)
import Utils.Util as utl
import Utils.Plots as pplt
import pandas as pd
pd.options.display.max_rows = 20;
pd.options.display.expand_frame_repr = True
from IPython.display import display
import pylab as plt
import seaborn as sns



In [60]:

    
import Utils.Plots as pplt
reload(pplt)
a=pd.read_excel('~/storage/Data/Human/Andes/info/FROM_Haddad_original.xlsx').rename(columns={'CMS/Non-CMS':'y'}).set_index('y').iloc[:,[3,4]]#.set_index('CMS/Non-CMS')
a
sns.pairplot(a.reset_index(),hue='y',size=5)
plt.figure()
a.groupby('y').size().plot.bar()









    Out[60]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f71ab94a610>



In [ ]:



In [97]:

    
f=lambda x: ((x['CMS/Non-CMS']=='CMS')& (x.Hct>60)) | ((x['CMS/Non-CMS']=='Non-CMS')& (x.Hct<60))
id=pd.read_excel('~/storage/Data/Human/Andes/info/Haddad_sample_info_20151030_emily.xlsx').iloc[:,[1,2]].dropna().applymap(utl.INT).set_index('Subject ID').iloc[:,0].rename('sample')
b=pd.read_excel('~/storage/Data/Human/Andes/info/FROM_Haddad_original.xlsx').set_index('SampleID').join(id)
i= f(b)
b=b[i].reset_index()[['SampleID','CMS/Non-CMS','Gender','sample']]
b['super_pop']='AND'
b['pop']=b['CMS/Non-CMS'].replace({'CMS':'SIK','Non-CMS':'HLT'})
b['gender']=b.Gender.replace({'M':'male','F':'female'})
b=b[['sample','pop','super_pop','gender']]
b
b.to_csv('/home/arya/storage/Data/Human/Andes/Andean_HLI_BAM_VCF/hg19/snp/noINFO/byChr/norm/merge/PASS/noChr/panel.filtered',index=False,sep='\t')
id.loc[i[~i].index].to_csv('/home/arya/storage/Data/Human/Andes/Andean_HLI_BAM_VCF/hg19/snp/noINFO/byChr/norm/merge/PASS/noChr/filter',index=False,sep='\t')
# pd.read_excel('~/storage/Data/Human/Andes/info/FROM_Haddad_original.xlsx')



In [86]:









    Out[86]:





SampleID
1CP-A      187524478
48CP-A     187524432
110CP-A    187524404
15CP-G     187524388
Name: Sample ID, dtype: int64



In [59]:

    
I=((a.index=='CMS')& (a.Hct>60)) | ((a.index=='Non-CMS')& (a.Hct<60))

sns.pairplot(a[I].reset_index(),hue='y',size=5)
# print I.sum()
(~I).groupby(level=0).sum()
a[I]

# a.loc[['non-CMS']].Hct<60
a[I]









    Out[59]:







  
    
      
      SampleID
      Gender
      Age
      Hct
      CMS score
      Notes
    
    
      y
      
      
      
      
      
      
    
  
  
    
      Non-CMS
      3CP-A
      M
      44
      54.0
      7
      NaN
    
    
      Non-CMS
      4CP-A
      M
      40
      53.0
      4
      NaN
    
    
      Non-CMS
      5CP-A
      M
      33
      50.0
      8
      NaN
    
    
      CMS
      8CP-A
      M
      58
      69.0
      21
      NaN
    
    
      CMS
      9CP-A
      M
      25
      64.0
      18
      NaN
    
    
      CMS
      10CP-A
      M
      55
      67.0
      16
      NaN
    
    
      CMS
      11CP-A
      M
      68
      66.0
      16
      NaN
    
    
      Non-CMS
      12CP-A
      M
      26
      55.0
      4
      NaN
    
    
      Non-CMS
      13CP-A
      M
      44
      50.5
      8
      NaN
    
    
      CMS
      15CP-A
      M
      46
      75.0
      18
      NaN
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      Non-CMS
      39CP-G
      M
      30
      52.0
      2
      NaN
    
    
      CMS
      41CP-G
      M
      43
      62.0
      10
      NaN
    
    
      Non-CMS
      50CP-G
      F
      45
      48.0
      4
      NaN
    
    
      CMS
      52CP-G
      M
      64
      82.5
      14
      NaN
    
    
      CMS
      69CP-G
      M
      40
      68.0
      9
      NaN
    
    
      Non-CMS
      80CP-G
      M
      36
      49.0
      1
      NaN
    
    
      Non-CMS
      84CP-G
      M
      32
      53.0
      0
      NaN
    
    
      Non-CMS
      89CP-G
      F
      45
      43.0
      4
      NaN
    
    
      Non-CMS
      94CP-G
      M
      26
      54.0
      4
      NaN
    
    
      CMS
      101CP-G
      M
      68
      66.0
      16
      NaN
    
  

96 rows × 6 columns

	SampleID	Gender	Age	Hct	CMS score	Notes
y
Non-CMS	3CP-A	M	44	54.0	7	NaN
Non-CMS	4CP-A	M	40	53.0	4	NaN
Non-CMS	5CP-A	M	33	50.0	8	NaN
CMS	8CP-A	M	58	69.0	21	NaN
CMS	9CP-A	M	25	64.0	18	NaN
CMS	10CP-A	M	55	67.0	16	NaN
CMS	11CP-A	M	68	66.0	16	NaN
Non-CMS	12CP-A	M	26	55.0	4	NaN
Non-CMS	13CP-A	M	44	50.5	8	NaN
CMS	15CP-A	M	46	75.0	18	NaN
...	...	...	...	...	...	...
Non-CMS	39CP-G	M	30	52.0	2	NaN
CMS	41CP-G	M	43	62.0	10	NaN
Non-CMS	50CP-G	F	45	48.0	4	NaN
CMS	52CP-G	M	64	82.5	14	NaN
CMS	69CP-G	M	40	68.0	9	NaN
Non-CMS	80CP-G	M	36	49.0	1	NaN
Non-CMS	84CP-G	M	32	53.0	0	NaN
Non-CMS	89CP-G	F	45	43.0	4	NaN
Non-CMS	94CP-G	M	26	54.0	4	NaN
CMS	101CP-G	M	68	66.0	16	NaN