In [100]:
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import sys,os
path='/'.join(os.getcwd().split('/')[:-4])
sys.path.insert(1,path)
import Utils.Util as utl
import pandas as pd
pd.options.display.max_rows = 20;
pd.options.display.expand_frame_repr = True
import Utils.Util as utl
from IPython.display import display
import seaborn as sns
path='/home/arya/HA_selection2/Kyrgyz/hg38/HLI_raw_data/vcf/byChr/'
f=path+ 'chr{}.H18-DD.vcf.gz.tsv'
def cdf(a):return (a.dropna().value_counts().sort_index()/a.dropna().size).cumsum()
a=pd.concat([pd.read_csv(f.format('X'),sep='\t',na_values=['.']),pd.read_csv(f.format('21'),sep='\t',na_values=['.'])],keys=['X',21]).reset_index(level=1).iloc[:,3:]

In [101]:
fig,ax=plt.subplots(1,3,sharey=True,figsize=(8,3),dpi=100)
for j,col in enumerate(a.columns[:3]):
    a[col].groupby(level=0).apply(cdf).unstack(level=0).plot(ax=ax[j])
    ax[j].set_title(col)
    ax[j].set_xlim([0,1000]);
ax[1].set_xlim([0,100]);
plt.ylim([0,1]);



In [119]:
#sns.stripplot(data=x,x='GT',y='GQ',jitter=0.05,ax=ax[0])
#sns.stripplot(data=a,x='GT',y='GQ',jitter=0.05,ax=ax[1])
for field in ['GQ','GQX','QUAL']:
    fig,ax=plt.subplots(1,2,sharey=True,dpi=100,figsize=(8,3))
    for j,col in enumerate(['X',21]):
        sns.violinplot(data=a.loc[col].set_index('GT').loc[['0/1','1/1']].reset_index(),x='GT',y=field,ax=ax[j])
        ax[j].set_title(col)
        ax[j].set_ylim([-50,350])
        if  field is 'QUAL':        ax[j].set_ylim([-50,950])