In [3]:

    
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import mpld3
mpld3.enable_notebook()

Set up output



In [4]:

    
mclab = os.getenv('MCLAB')
odir = os.path.join(mclab, 'cegs_ase_paper/pipeline_output/emp_bayesian/PG_model/')

Import data



In [5]:

    
fname = '/home/jfear/mclab/cegs_ase_paper/pipeline_output/emp_bayesian/PG_model/emp_for_plotting.csv'
dat = pd.read_csv(fname)
dat.head(1)









    Out[5]:






  
    
      
      line
      ms
      fusion_id
      q4
      q4_q025
      q4_q975
      q5
      q5_q025
      q5_q975
      q6
      q6_q025
      q6_q975
      flag_q4_AI
      flag_q5_AI
      flag_q6_AI
      flag_all_AI
      mean_apn
      rank_apn
      qsim_line
      flag_qsim_bias
    
  
  
    
      0
      r101
      M
      F10001_SI
      0.572
      0.422
      0.723
      0.48
      0.334
      0.624
      0.387
      0.25
      0.529
      0
      0
      0
      0
      14.255319
      1
      0.5
      0



In [6]:

    
# Import Background Bias Flags and merge to data
bg_bias = pd.read_csv('/home/jfear/mclab/cegs_ase_paper/pipeline_output/100_genome_simulation/fb551_100_genome_flag_bkg_bias.csv')

merged = pd.merge(dat, bg_bias, on='fusion_id', how='left')
masked = merged[merged['flag_bg_bias'] == 0]

print("""
Orig rows:                 {}
Masked rows:               {}
Orig fusions:              {}
Num fusions after masking: {}
""".format(dat.shape[0], 
           masked.shape[0], 
           len(set(dat.fusion_id)), 
           len(set(masked.fusion_id))))









    



Orig rows:                 1939828
Masked rows:               1772699
Orig fusions:              49947
Num fusions after masking: 45510

Generate Plots for Mated and Virgin



In [7]:

    
# Split dataset by sex
mated = masked[masked['ms'] == 'M'][['fusion_id', 'line','q4', 'q5', 'q6']]
mated.set_index('fusion_id', inplace=True)

virgin = masked[masked['ms'] == 'V'][['fusion_id', 'line','q4', 'q5', 'q6']]
virgin.set_index('fusion_id', inplace=True)

Plot Mated



In [8]:

    
# Get group levels
mLines = np.array([x for x in set(mated['line'])])
mLines.sort()

# Create a mask to split into multiple figures of 6x4 plots
grp = np.concatenate([np.ones(16), np.ones(16)*2, np.ones(16)*3, np.ones(16)*4, np.ones(4)*5])

# Iterate over multi figure groups and plot
for g in range(1,6):
    curr = mLines[grp == g]
    
    # Figure out the number of subplots I need
    num = int(np.sqrt(curr.shape[0]))
    
    # Plot figure
    fig, axes = plt.subplots(num, num, figsize=(8, 8))
    fig.suptitle(u'Distribution of Empirical Thetas\nMated With No Background Bias', fontsize=12)
    axs = np.ravel(axes)
    for i, line in enumerate(curr):
        p = mated[mated['line'] == line]
        p.plot(kind='kde', ax=axs[i], fontsize=8, title=line, legend=False, color=['b', 'r', 'g'])
        #axs[i].set_xlabel('Line <------> Tester', fontsize=12)
        axs[i].axvline(0.5, lw=1, c='k')
        axs[i].get_yaxis().set_visible(False)
    
    handles, labels = axs[0].get_legend_handles_labels()
    plt.legend(handles, labels, bbox_to_anchor = (0.2, 0.06), bbox_transform = plt.gcf().transFigure, fontsize=10)
    fig.text(0.5, 0.06, 'Line <------------> Tester', ha='center', fontsize=20)
    plt.savefig(os.path.join(odir, 'mated_dist_empirical_theta_g{0}_no_bg.png'.format(str(g))), bbox_inches='tight')
    plt.close(fig)









    



/home/jfear/.local/lib/python2.7/site-packages/pandas/tools/plotting.py:859: UserWarning: 'colors' is being deprecated. Please use 'color'instead of 'colors'
  warnings.warn(("'colors' is being deprecated. Please use 'color'"

Plot Virgin



In [9]:

    
# Get group levels
vLines = np.array([x for x in set(virgin['line'])])
vLines.sort()

# Create a mask to split into multiple figures of 6x4 plots
grp = np.concatenate([np.ones(16), np.ones(16)*2, np.ones(16)*3, np.ones(16)*4, np.ones(4)*5])

# Iterate over multi figure groups and plot
for g in range(1,6):
    curr = vLines[grp == g]
    
    # Figure out the number of subplots I need
    num = int(np.sqrt(curr.shape[0]))
    
    # Plot figure
    fig, axes = plt.subplots(num, num, figsize=(8,8))
    fig.suptitle(u'Distribution of Empirical Thetas\nVirgin', fontsize=12)

    axs = np.ravel(axes)
    for i, line in enumerate(curr):
        p = virgin[virgin['line'] == line]
        p.plot(kind='kde', ax=axs[i], fontsize=8, title=line, legend=False, color=['b','r','g'])
        #axs[i].set_xlabel('Line <--- AB ---> Tester', fontsize=12)
        axs[i].axvline(0.5, lw=1, c='k')
        axs[i].get_yaxis().set_visible(False)
        
    handles, labels = axs[0].get_legend_handles_labels()
    plt.legend(handles, labels, bbox_to_anchor= (0.2, 0.06), bbox_transform = plt.gcf().transFigure, fontsize=10)
    fig.text(0.5, 0.06, 'Line <------------> Tester', ha='center', fontsize=20)
    plt.savefig(os.path.join(odir, 'virgin_dist_empirical_theta_g{0}_no_bg.png'.format(str(g))), bbox_inches='tight')
    plt.close(fig)



In [ ]:



In [ ]: