In [2]:
%matplotlib inline
import matplotlib as mpl
mpl.use('agg')
import os
# os.environ["DISPLAY"] = "localhost:11.0"
import numpy as np
import matplotlib.pyplot as plt
import sys,os
path='/'.join(os.getcwd().split('/')[:-4])
sys.path.insert(1,path)
import Utils.Util as utl
import Utils.Plots as pplt
import pandas as pd
pd.options.display.max_rows = 20;
pd.options.display.expand_frame_repr = True
from IPython.display import display
import pylab as plt
import seaborn as sns
import Scripts.KyrgysHAPH.Util as kutl
import Scripts.KyrgysHAPH.Plot as kplt
import Scripts.HLI.Kyrgyz.IBSScan.IBDScan as ibd
import Scripts.HLI.Kyrgyz.PBS as pbs
pd.options.display.max_colwidth = 2000;
import matplotlib as mpl

In [79]:
i=pd.Series({'CHROM':22,'start':26099429,'end':27203877})
f='/home/arya/POP/HA/GT/chr22.vcf.gz.aa.gz'
reload(utl)
ulsrm 
# utl.gz.FreqPop(pop='KGZ')
# 
# kutl.ID('HLT')

In [89]:
# pd.concat([a,b]).T.dropna().T


Out[89]:
sample pop super_pop gender
0 HG00096 GBR EUR male
1 HG00097 GBR EUR female
2 HG00099 GBR EUR female
3 HG00100 GBR EUR female
4 HG00101 GBR EUR male
5 HG00102 GBR EUR female
6 HG00103 GBR EUR male
7 HG00105 GBR EUR male
8 HG00106 GBR EUR female
9 HG00107 GBR EUR male
... ... ... ... ...
2660 201852658 HLT KGZ male
2661 201852660 SIK KGZ male
2662 201852669 HLT KGZ male
2663 201852637 SIK KGZ male
2664 201852665 HLT KGZ male
2665 201852639 SIK KGZ male
2666 201852663 HLT KGZ male
2667 201852667 HLT KGZ male
2668 201852650 SIK KGZ male
2669 201852664 HLT KGZ male

2670 rows × 4 columns


In [57]:
# MINAC = 3
AFCF=0.05
# a= a[((a.sum(1) > MINAC) & (a.sum(1) < a.shape[1] * MINAC))]
print a.shape
a=a[(a[p1].mean(1) / 2 - a[p2].mean(1) / 2).abs() > AFCF]
print a.shape


(162558, 33)
(7167, 33)