Plotting distribution of $\theta$ from the Empirical Bayesian Analysis



In [7]:

    
import numpy as np
import pandas as pd
import pandas.tools.rplot as rplot
import matplotlib.pyplot as plt
%matplotlib inline



In [8]:

    
def make_plot(mydat, title, fname):
    """ Plotting function for making the various panels of plots below """
    fig, axes = plt.subplots(nrows=2, ncols=2)
    fig.set_size_inches(12,10)
    fig.suptitle(title, fontsize=14)
    mydat[0][['q4', 'q5', 'q6']].plot(ax=axes[0,0], kind='density', xlim=(-0.1, 1.1), color=('r','b','k')); 
    axes[0,0].set_title('apn < 5'); 
    axes[0,0].set_xlabel('Line <--- AB ---> Tester')
    axes[0,0].axvline(x=0.5,linewidth=2, color='k')

    mydat[1][['q4', 'q5', 'q6']].plot(ax=axes[0,1], kind='density', xlim=(-0.1, 1.1), color=('r','b','k')); 
    axes[0,1].set_title('5 <= apn < 15'); 
    axes[0,1].set_xlabel('Line <--- AB ---> Tester')
    axes[0,1].axvline(x=0.5,linewidth=2, color='k')

    mydat[2][['q4', 'q5', 'q6']].plot(ax=axes[1,0], kind='density', xlim=(-0.1, 1.1), color=('r','b','k')); 
    axes[1,0].set_title('15 <= apn < 50'); 
    axes[1,0].set_xlabel('Line <--- AB ---> Tester')
    axes[1,0].axvline(x=0.5,linewidth=2, color='k')

    mydat[3][['q4', 'q5', 'q6']].plot(ax=axes[1,1], kind='density', xlim=(-0.1, 1.1), color=('r','b','k')); 
    axes[1,1].set_title('50 <= apn'); 
    axes[1,1].set_xlabel('Line <--- AB ---> Tester')
    axes[1,1].axvline(x=0.5,linewidth=2, color='k')

    fig.savefig(fname)

Import Empirical Bayesian dataset with flags



In [9]:

    
fname = '/home/jfear/mclab/cegs_ase_paper/pipeline_output/emp_bayesian/output/emp_for_plotting.csv'
mydat = pd.read_csv(fname)
mydat[:3]









    Out[9]:






  
    
      
      line
      ms
      fusion_id
      q4
      q5
      q6
      flag_q4_AI
      flag_q5_AI
      flag_q6_AI
      flag_all_AI
      rank_apn
      flag_fusion_biased
    
  
  
    
      0
       r101
       M
       F10059_SI
       0.489
       0.407
       0.324
       0
       0
       1
       0
       1
       0
    
    
      1
       r101
       V
       F10059_SI
       0.655
       0.584
       0.525
       0
       0
       0
       0
       2
       0
    
    
      2
       r109
       M
       F10059_SI
       0.200
       0.181
       0.153
       0
       0
       0
       0
       0
       0
    
  

3 rows × 12 columns

Create subsets

Initial subsetting is done by APN

r0 is an APN < 5
r1 is an APN between 5 and 15
r2 is an APN between 15 and 50
r3 is an APN > 50

Mated and Virgin is also split for plotting.



In [10]:

    
r0 = mydat[mydat['rank_apn'] == 0]
r1 = mydat[mydat['rank_apn'] == 1]
r2 = mydat[mydat['rank_apn'] == 2]
r3 = mydat[mydat['rank_apn'] == 3]



In [11]:

    
m0 = r0[r0['ms'].str.contains('M')]
m1 = r1[r1['ms'].str.contains('M')]
m2 = r2[r2['ms'].str.contains('M')]
m3 = r3[r3['ms'].str.contains('M')]

Make panel of plots for Mated and Virgin, seprating by APN



In [12]:

    
# plot the mated at different apn
make_plot([m0, m1, m2, m3], 'Mated', '/home/jfear/mclab/cegs_ase_paper/pipeline_output/emp_bayesian/output/emp_bayesian_density_by_apn_mated.png')



In [13]:

    
v0 = r0[r0['ms'].str.contains('V')]
v1 = r1[r1['ms'].str.contains('V')]
v2 = r2[r2['ms'].str.contains('V')]
v3 = r3[r3['ms'].str.contains('V')]



In [14]:

    
# plot the virgin at different apn
make_plot([v0, v1, v2, v3], 'Virgin', '/home/jfear/mclab/cegs_ase_paper/pipeline_output/emp_bayesian/output/emp_bayesian_density_by_apn_virgin.png')

Re-make panel of plots after removing fusions that are always biased

Plots still look a little odd. Now remove fusions that are biased in the 100 genome simulation and re-plot.



In [15]:

    
# Keep fusions that are not flagged as having bias
def drop_bias(mydat):
    return mydat[mydat['flag_fusion_biased'] == 0]

mated = [drop_bias(x) for x in [m0, m1, m2, m3]]
virgin = [drop_bias(x) for x in [v0, v1, v2, v3]]



In [16]:

    
make_plot(mated, 'Mated', '/home/jfear/mclab/cegs_ase_paper/pipeline_output/emp_bayesian/output/emp_bayesian_density_by_apn_mated_no_bias.png')



In [17]:

    
make_plot(virgin, 'Virgin', '/home/jfear/mclab/cegs_ase_paper/pipeline_output/emp_bayesian/output/emp_bayesian_density_by_apn_virgin_no_bias.png')



In [17]:

	line	ms	fusion_id	q4	q5	q6	flag_q6_AI	rank_apn
0	r101	M	F10059_SI	0.489	0.407	0.324	1	1
1	r101	V	F10059_SI	0.655	0.584	0.525	0	2
2	r109	M	F10059_SI	0.200	0.181	0.153	0	0