In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

Set up output


In [2]:
mclab = os.getenv('MCLAB')
odir = os.path.join(mclab, 'cegs_ase_paper/pipeline_output/qsim_bayesian/')

Import data


In [3]:
fname = '/home/jfear/mclab/cegs_ase_paper/pipeline_output/qsim_bayesian/output/ase_dataset_for_bayesian_w_qsim_summary.csv'
dat = pd.read_csv(fname)
dat.head(3)


Out[3]:
line mating_status fusion_id qsim_mean_theta qsim_q025 qsim_q975 Bayesianpvalue_qsim flag_AI_qsim
0 r324 V S24441_SI 0.430 0.297 0.566 0.336 0
1 r324 M S24447_SI 0.923 0.876 0.956 0.000 1
2 r324 V S24447_SI 0.916 0.868 0.952 0.000 1

Generate Plots for Mated and Virgin


In [4]:
# Split dataset by sex
mated = dat[dat['mating_status'] == 'M'][['line','qsim_mean_theta']]
virgin = dat[dat['mating_status'] == 'V'][['line','qsim_mean_theta']]
virgin.head(1)


Out[4]:
line qsim_mean_theta
0 r324 0.43

Plot Mated


In [5]:
# Get group levels
mLines = np.array([x for x in set(mated['line'])])
mLines.sort()

# Create a mask to split into multiple figures of 6x4 plots
grp = np.concatenate([np.ones(16), np.ones(16)*2, np.ones(16)*3, np.ones(16)*4, np.ones(4)*5])

# Iterate over multi figure groups and plot
for g in range(1,6):
    curr = mLines[grp == g]
    
    # Figure out the number of subplots I need
    num = int(np.sqrt(curr.shape[0]))
    
    # Plot figure
    fig, axes = plt.subplots(num, num, figsize=(8, 8))
    fig.suptitle(u'Distribution of QSIM Thetas\nMated', fontsize=12)
    axs = np.ravel(axes)
    for i, line in enumerate(curr):
        p = mated[mated['line'] == line]
        p.plot(kind='kde', ax=axs[i], fontsize=8, title=line, legend=False, color = 'r')
        #axs[i].set_xlabel('Line <------> Tester', fontsize=12)
        axs[i].axvline(0.5, lw=1, c='k')
        axs[i].get_yaxis().set_visible(False)
    
    fig.text(0.5, 0.04, 'Line <------------> Tester', ha='center', fontsize=20)
    plt.savefig(os.path.join(odir, 'mated_dist_qsim_theta_g{0}.png'.format(str(g))), bbox_inches='tight')
    plt.close(fig)

Plot Virgin


In [6]:
# Get group levels
vLines = np.array([x for x in set(virgin['line'])])
vLines.sort()
                   
# Create a mask to split into multiple figures of 6x4 plots
grp = np.concatenate([np.ones(16), np.ones(16)*2, np.ones(16)*3, np.ones(16)*4, np.ones(4)*5])

# Iterate over multi figure groups and plot
for g in range(1,6):
    curr = vLines[grp == g]
    
    # Figure out the number of subplots I need
    num = int(np.sqrt(curr.shape[0]))
    
    # Plot figure
    fig, axes = plt.subplots(num, num, figsize=(8,8))
    fig.suptitle(u'Distribution of QSIM Thetas\nVirgin', fontsize=12)

    axs = np.ravel(axes)
    for i, line in enumerate(curr):
        p = virgin[virgin['line'] == line]
        p.plot(kind='kde', ax=axs[i], fontsize=8, title=line, legend=False, color = 'r')
        #axs[i].set_xlabel('Line <--- AB ---> Tester', fontsize=12)
        axs[i].axvline(0.5, lw=1, c='k')
        axs[i].get_yaxis().set_visible(False)
        
    fig.text(0.5, 0.04, 'Line <------------> Tester', ha='center', fontsize=20)
    plt.savefig(os.path.join(odir, 'virgin_dist_qsim_theta_g{0}.png'.format(str(g))), bbox_inches='tight')
    plt.close(fig)

In [ ]: