In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [34]:
%matplotlib inline

plt.rcParams['figure.figsize'] = [12, 10]
plt.rcParams['font.size'] = 12

In [3]:
%ll


total 160
drwxrwxr-x   3 Claudius  staff    102  1 Dec  2016 2D-SFS-axes/
drwxrwxr-x   6 Claudius  staff    204 30 Jun 22:31 BOOTSTRAP_CONTIGS/
drwxrwxr-x   5 Claudius  staff    170 18 May 09:01 DEDUPLICATED/
drwxrwxr-x   3 Claudius  staff    102 11 Dec  2016 Data/
drwxrwxr-x  11 Claudius  staff    374 12 May 10:11 FST/
-rw-rw-r--   1 Claudius  staff    197  2 Aug 17:30 MAF_by_pval_ery
-rw-rw-r--   1 Claudius  staff    194  2 Aug 17:24 MAF_by_pval_par
-rwxrwxr-x   1 Claudius  staff    210 22 Dec  2016 ML.sh*
drwxrwxr-x   3 Claudius  staff    102 12 Dec  2016 PCA/
drwxrwxr-x   5 Claudius  staff    170 11 Dec  2016 Quality_Control/
drwxrwxr-x  17 Claudius  staff    578 12 May 10:06 Results/
drwxrwxr-x   4 Claudius  staff    136 19 Dec  2016 SAFs/
drwxrwxr-x   9 Claudius  staff    306 12 May 10:14 SFS/
drwxr-xr-x   3 Claudius  staff    102 26 Dec  2016 Scripts/
drwxr-xr-x   3 Claudius  staff    102 14 Dec  2016 TEST/
drwxrwxr-x   4 Claudius  staff    136 26 Dec  2016 THETASTAT/
-rw-r--r--   1 Claudius  staff    986  1 Aug 14:23 Untitled.ipynb
-rw-r--r--   1 Claudius  staff     72  2 Aug 17:46 Untitled1.ipynb
-rw-r--r--   1 Claudius  staff  43334 23 Jul 10:28 heatmap.ipynb
-rw-r--r--@  1 Claudius  staff    189 23 Jul 09:39 hist2d_demo.py
-rw-r--r--   1 Claudius  staff   2658 16 Jul 15:44 issue86_post
-rw-r--r--   1 Claudius  staff   2658 16 Jul 10:45 issue86_post~
-rwxrwxr-x   1 Claudius  staff   1591 22 Nov  2016 sites2bed.pl*

In [18]:
pval, MAF, numSNP = [], [], []

with open("MAF_by_pval_ery") as f:
    f.readline() # read the first line, but discard (header)
    for line in f:
        one, two, three = line.strip().split("\t")
        pval.append(float(one))
        MAF.append(float(two))
        numSNP.append(int(three))
        
numSNP = np.array(numSNP)

In [9]:
plt.scatter?

In [35]:
plt.scatter(pval, MAF, s=75, c=numSNP, cmap='jet')
plt.colorbar()
plt.grid()
plt.xlabel('p-value')
plt.ylabel('average MAF')


Out[35]:
<matplotlib.text.Text at 0x10952ef10>

In [29]:
plt.semilogx?

In [93]:
ax = plt.gca()
ax.semilogx()
plt.scatter(pval, MAF, s=80, c=np.log10(numSNP), cmap='jet')
cb = plt.colorbar(ticks=[2, 3, 4]) # shrink=0.9, ticks=[1, 2, 3, 4, 5]
cb.set_label(r"$log_{10}$" + " #SNP")
plt.grid()
plt.xlabel('p-value')
plt.ylabel('average MAF')
plt.title('Dependence of HWE p-value on MAF')
#ax.text(20, 0.445, r"$log_{10}$" + " #SNP")
plt.savefig("MAF_by_pval_ery.png")



In [80]:
plt.colorbar?

In [43]:
np.log10?

In [48]:
np.log10(numSNP)


Out[48]:
array([ 4.6345478 ,  4.44111359,  4.03185263,  3.61616031,  3.40908737,
        2.84941941,  2.56937391,  1.74036269,  1.38021124])

In [53]:
plt.annotate?

In [89]:
cb.set_label?

I am still looking for a way to set tick labels on the colorbar.

Now do the same for the SNP's in the PAR population.


In [94]:
pval, MAF, numSNP = [], [], []

with open("MAF_by_pval_par") as f:
    f.readline() # read the first line, but discard (header)
    for line in f:
        one, two, three = line.strip().split("\t")
        pval.append(float(one))
        MAF.append(float(two))
        numSNP.append(int(three))
        
numSNP = np.array(numSNP)

ax = plt.gca()
ax.semilogx()
plt.scatter(pval, MAF, s=80, c=np.log10(numSNP), cmap='jet')
cb = plt.colorbar(ticks=[2, 3, 4]) # shrink=0.9, ticks=[1, 2, 3, 4, 5]
cb.set_label(r"$log_{10}$" + " #SNP")
plt.grid()
plt.xlabel('p-value')
plt.ylabel('average MAF')
plt.title('Dependence of HWE p-value on MAF')
#ax.text(20, 0.445, r"$log_{10}$" + " #SNP")
plt.savefig("MAF_by_pval_par.png")



In [ ]: