In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import mpld3
mpld3.enable_notebook()
In [2]:
%qtconsole
In [3]:
mclab = os.getenv('MCLAB')
odir = os.path.join(mclab, 'cegs_ase_paper/pipeline_output/emp_bayesian/PG_model/')
In [4]:
fname = '/home/jfear/mclab/cegs_ase_paper/pipeline_output/emp_bayesian/PG_model/emp_for_plotting.csv'
dat = pd.read_csv(fname)
dat.head(3)
Out[4]:
In [5]:
# Split dataset by sex
mated = dat[dat['ms'] == 'M'][['fusion_id', 'line','q4', 'q5', 'q6']]
mated.set_index('fusion_id', inplace=True)
virgin = dat[dat['ms'] == 'V'][['fusion_id', 'line','q4', 'q5', 'q6']]
virgin.set_index('fusion_id', inplace=True)
In [6]:
# Get group levels
mLines = np.array([x for x in set(mated['line'])])
mLines.sort()
# Create a mask to split into multiple figures of 6x4 plots
grp = np.concatenate([np.ones(16), np.ones(16)*2, np.ones(16)*3, np.ones(16)*4, np.ones(4)*5])
# Iterate over multi figure groups and plot
for g in range(1,6):
curr = mLines[grp == g]
# Figure out the number of subplots I need
num = int(np.sqrt(curr.shape[0]))
# Plot figure
fig, axes = plt.subplots(num, num, figsize=(8, 8))
fig.suptitle(u'Distribution of Empirical Thetas\nMated', fontsize=12)
axs = np.ravel(axes)
for i, line in enumerate(curr):
p = mated[mated['line'] == line]
p.plot(kind='kde', ax=axs[i], fontsize=8, title=line, legend=False, color=['b', 'r', 'g'])
#axs[i].set_xlabel('Line <------> Tester', fontsize=12)
axs[i].axvline(0.5, lw=1, c='k')
axs[i].get_yaxis().set_visible(False)
handles, labels = axs[0].get_legend_handles_labels()
plt.legend(handles, labels, bbox_to_anchor = (0.2, 0.06), bbox_transform = plt.gcf().transFigure, fontsize=10)
fig.text(0.5, 0.06, 'Line <------------> Tester', ha='center', fontsize=20)
plt.savefig(os.path.join(odir, 'mated_dist_empirical_theta_g{0}.png'.format(str(g))), bbox_inches='tight')
plt.close(fig)
In [7]:
# Get group levels
vLines = np.array([x for x in set(virgin['line'])])
vLines.sort()
# Create a mask to split into multiple figures of 6x4 plots
grp = np.concatenate([np.ones(16), np.ones(16)*2, np.ones(16)*3, np.ones(16)*4, np.ones(4)*5])
# Iterate over multi figure groups and plot
for g in range(1,6):
curr = vLines[grp == g]
# Figure out the number of subplots I need
num = int(np.sqrt(curr.shape[0]))
# Plot figure
fig, axes = plt.subplots(num, num, figsize=(8,8))
fig.suptitle(u'Distribution of Empirical Thetas\nVirgin', fontsize=12)
axs = np.ravel(axes)
for i, line in enumerate(curr):
p = virgin[virgin['line'] == line]
p.plot(kind='kde', ax=axs[i], fontsize=8, title=line, legend=False, color=['b','r','g'])
#axs[i].set_xlabel('Line <--- AB ---> Tester', fontsize=12)
axs[i].axvline(0.5, lw=1, c='k')
axs[i].get_yaxis().set_visible(False)
handles, labels = axs[0].get_legend_handles_labels()
plt.legend(handles, labels, bbox_to_anchor= (0.2, 0.06), bbox_transform = plt.gcf().transFigure, fontsize=10)
fig.text(0.5, 0.06, 'Line <------------> Tester', ha='center', fontsize=20)
plt.savefig(os.path.join(odir, 'virgin_dist_empirical_theta_g{0}.png'.format(str(g))), bbox_inches='tight')
plt.close(fig)
In [8]:
vr109 = virgin[virgin.line == 'r109']
fig = plt.figure()
ax = fig.add_subplot(111)
vr109.plot(kind='kde', colors=['b', 'r', 'g'], ax=ax)
mpld3.plugins.connect(fig, mpld3.plugins.MousePosition())
Peaks appear to be around 0.185-0.228
In [ ]:
In [ ]: