In [122]:
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import sys,os
path='/'.join(os.getcwd().split('/')[:-4])
sys.path.insert(1,path)
import Utils.Util as utl
import pandas as pd
pd.options.display.max_rows = 20;
pd.options.display.expand_frame_repr = True
import Utils.Util as utl
from IPython.display import display
import seaborn as sns
path='/home/arya/HA_selection2/Kyrgyz/hg38/HLI_raw_data/vcf/byChr/'
f=path+ 'chr{}.H18-DD.vcf.gz.tsv'
def cdf(a):return (a.dropna().value_counts().sort_index()/a.dropna().size).cumsum()
a=pd.concat([pd.read_csv(f.format('X'),sep='\t',na_values=['.']),pd.read_csv(f.format('2'),sep='\t',na_values=['.'])],keys=['X',2]).reset_index(level=1).iloc[:,3:]

In [123]:
fig,ax=plt.subplots(1,3,sharey=True,figsize=(8,3),dpi=100)
for j,col in enumerate(a.columns[:3]):
    a[col].groupby(level=0).apply(cdf).unstack(level=0).plot(ax=ax[j])
    ax[j].set_title(col)
    ax[j].set_xlim([0,1000]);
ax[1].set_xlim([0,100]);
plt.ylim([0,1]);



In [125]:
#sns.stripplot(data=x,x='GT',y='GQ',jitter=0.05,ax=ax[0])
#sns.stripplot(data=a,x='GT',y='GQ',jitter=0.05,ax=ax[1])
for field in ['GQ','GQX','QUAL']:
    fig,ax=plt.subplots(1,2,sharey=True,dpi=100,figsize=(8,3))
    for j,col in enumerate(['X',2]):
        sns.violinplot(data=a.loc[col].set_index('GT').loc[['0/1','1/1']].reset_index(),x='GT',y=field,ax=ax[j])
        ax[j].set_title(col)
        ax[j].set_ylim([-50,350])
        if  field is 'QUAL':        ax[j].set_ylim([-50,950])



In [154]:
#a.reset_index().set_index('GT').loc[['0/1','1/1']].reset_index().groupby('index').apply(lambda x: x.GT.value_counts()).unstack(level=1).plot.bar()
print a
a.reset_index().groupby('index')#.apply(lambda x: x.FILTER.value_counts())


    QUAL  GQX     GQ   GT     AD
X     10    3    3.0  0/1    0,1
X    141   18   18.0  1/1    0,7
X     40    7    7.0  1/1    0,3
X     39    7    7.0  1/1    0,3
X     73   12   12.0  1/1    0,5
X     51   10   10.0  1/1    0,4
X     74   15   15.0  1/1    0,6
X      1    0    3.0  0/1    0,1
X     34    5    5.0  1/1    0,2
X     20   20   52.0  0/1    3,2
..   ...  ...    ...  ...    ...
2     10   10   43.0  0/1  60,14
2      3    3   34.0  0/1   33,5
2      0    0    NaN  0/1    2,1
2      2    0   30.0  0/1    3,1
2     53   53   86.0  0/1    8,3
2     15   15   48.0  0/1    5,2
2     15   15   48.0  0/1    7,2
2    106  106  133.0  0/1    7,6
2     72   72  105.0  0/1    8,4
2     59    7    7.0  1/1    0,3

[493752 rows x 5 columns]
Out[154]:
<pandas.core.groupby.DataFrameGroupBy object at 0x7ff3af90a5d0>