notebook.community

Edit and run



In [2]:

    
import numpy as np
import matplotlib.pyplot as plt



In [3]:

    
%matplotlib inline

plt.rcParams['figure.figsize'] = [12, 10]
plt.rcParams['font.size'] = 12



In [3]:

    
%ll









    



total 160
drwxrwxr-x   3 Claudius  staff    102  1 Dec  2016 2D-SFS-axes/
drwxrwxr-x   6 Claudius  staff    204 30 Jun 22:31 BOOTSTRAP_CONTIGS/
drwxrwxr-x   5 Claudius  staff    170 18 May 09:01 DEDUPLICATED/
drwxrwxr-x   3 Claudius  staff    102 11 Dec  2016 Data/
drwxrwxr-x  11 Claudius  staff    374 12 May 10:11 FST/
-rw-rw-r--   1 Claudius  staff    197  2 Aug 17:30 MAF_by_pval_ery
-rw-rw-r--   1 Claudius  staff    194  2 Aug 17:24 MAF_by_pval_par
-rwxrwxr-x   1 Claudius  staff    210 22 Dec  2016 ML.sh*
drwxrwxr-x   3 Claudius  staff    102 12 Dec  2016 PCA/
drwxrwxr-x   5 Claudius  staff    170 11 Dec  2016 Quality_Control/
drwxrwxr-x  17 Claudius  staff    578 12 May 10:06 Results/
drwxrwxr-x   4 Claudius  staff    136 19 Dec  2016 SAFs/
drwxrwxr-x   9 Claudius  staff    306 12 May 10:14 SFS/
drwxr-xr-x   3 Claudius  staff    102 26 Dec  2016 Scripts/
drwxr-xr-x   3 Claudius  staff    102 14 Dec  2016 TEST/
drwxrwxr-x   4 Claudius  staff    136 26 Dec  2016 THETASTAT/
-rw-r--r--   1 Claudius  staff    986  1 Aug 14:23 Untitled.ipynb
-rw-r--r--   1 Claudius  staff     72  2 Aug 17:46 Untitled1.ipynb
-rw-r--r--   1 Claudius  staff  43334 23 Jul 10:28 heatmap.ipynb
-rw-r--r--@  1 Claudius  staff    189 23 Jul 09:39 hist2d_demo.py
-rw-r--r--   1 Claudius  staff   2658 16 Jul 15:44 issue86_post
-rw-r--r--   1 Claudius  staff   2658 16 Jul 10:45 issue86_post~
-rwxrwxr-x   1 Claudius  staff   1591 22 Nov  2016 sites2bed.pl*



In [18]:

    
pval, MAF, numSNP = [], [], []

with open("MAF_by_pval_ery") as f:
    f.readline() # read the first line, but discard (header)
    for line in f:
        one, two, three = line.strip().split("\t")
        pval.append(float(one))
        MAF.append(float(two))
        numSNP.append(int(three))
        
numSNP = np.array(numSNP)



In [9]:

    
plt.scatter?



In [35]:

    
plt.scatter(pval, MAF, s=75, c=numSNP, cmap='jet')
plt.colorbar()
plt.grid()
plt.xlabel('p-value')
plt.ylabel('average MAF')









    Out[35]:





<matplotlib.text.Text at 0x10952ef10>



In [29]:

    
plt.semilogx?



In [93]:

    
ax = plt.gca()
ax.semilogx()
plt.scatter(pval, MAF, s=80, c=np.log10(numSNP), cmap='jet')
cb = plt.colorbar(ticks=[2, 3, 4]) # shrink=0.9, ticks=[1, 2, 3, 4, 5]
cb.set_label(r"$log_{10}$" + " #SNP")
plt.grid()
plt.xlabel('p-value')
plt.ylabel('average MAF')
plt.title('Dependence of HWE p-value on MAF')
#ax.text(20, 0.445, r"$log_{10}$" + " #SNP")
plt.savefig("MAF_by_pval_ery.png")



In [80]:

    
plt.colorbar?



In [43]:

    
np.log10?



In [48]:

    
np.log10(numSNP)









    Out[48]:





array([ 4.6345478 ,  4.44111359,  4.03185263,  3.61616031,  3.40908737,
        2.84941941,  2.56937391,  1.74036269,  1.38021124])



In [53]:

    
plt.annotate?



In [89]:

    
cb.set_label?

I am still looking for a way to set tick labels on the colorbar.

Now do the same for the SNP's in the PAR population.



In [94]:

    
pval, MAF, numSNP = [], [], []

with open("MAF_by_pval_par") as f:
    f.readline() # read the first line, but discard (header)
    for line in f:
        one, two, three = line.strip().split("\t")
        pval.append(float(one))
        MAF.append(float(two))
        numSNP.append(int(three))
        
numSNP = np.array(numSNP)

ax = plt.gca()
ax.semilogx()
plt.scatter(pval, MAF, s=80, c=np.log10(numSNP), cmap='jet')
cb = plt.colorbar(ticks=[2, 3, 4]) # shrink=0.9, ticks=[1, 2, 3, 4, 5]
cb.set_label(r"$log_{10}$" + " #SNP")
plt.grid()
plt.xlabel('p-value')
plt.ylabel('average MAF')
plt.title('Dependence of HWE p-value on MAF')
#ax.text(20, 0.445, r"$log_{10}$" + " #SNP")
plt.savefig("MAF_by_pval_par.png")

I have also determined the MAF for SNP with negative F value for different p-value cutoffs.



In [4]:

    
pval, MAF, numSNP = [], [], []

with open("MAF_by_pval_negFis_par") as f:
    f.readline() # read the first line, but discard (header)
    for line in f:
        one, two, three = line.strip().split("\t")
        pval.append(float(one))
        MAF.append(float(two))
        numSNP.append(int(three))
        
numSNP = np.array(numSNP)

ax = plt.gca()
ax.semilogx()
plt.scatter(pval, MAF, s=80, c=np.log10(numSNP), cmap='jet')
cb = plt.colorbar(ticks=[2, 3, 4]) # shrink=0.9, ticks=[1, 2, 3, 4, 5]
cb.set_label(r"$log_{10}$" + " #SNP")
plt.grid()
plt.xlabel('p-value')
plt.ylabel('average MAF')
plt.title('Dependence of HWE p-value on MAF')
#ax.text(20, 0.445, r"$log_{10}$" + " #SNP")
plt.savefig("MAF_by_pval_par.png")



In [5]:

    
pval, MAF, numSNP = [], [], []

with open("MAF_by_pval_negFis_ery") as f:
    f.readline() # read the first line, but discard (header)
    for line in f:
        one, two, three = line.strip().split("\t")
        pval.append(float(one))
        MAF.append(float(two))
        numSNP.append(int(three))
        
numSNP = np.array(numSNP)

ax = plt.gca()
ax.semilogx()
plt.scatter(pval, MAF, s=80, c=np.log10(numSNP), cmap='jet')
cb = plt.colorbar(ticks=[2, 3, 4]) # shrink=0.9, ticks=[1, 2, 3, 4, 5]
cb.set_label(r"$log_{10}$" + " #SNP")
plt.grid()
plt.xlabel('p-value')
plt.ylabel('average MAF')
plt.title('Dependence of HWE p-value on MAF')
#ax.text(20, 0.445, r"$log_{10}$" + " #SNP")
plt.savefig("MAF_by_pval_par.png")



In [ ]:

Table of Contents