In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
In [2]:
mclab = os.getenv('MCLAB')
fname = os.path.join(mclab, 'cegs_ase_paper/pipeline_output/qsim_bayesian/qsim_bias_wide.csv')
dat = pd.read_csv(fname, index_col='fusion_id')
dat.head(3)
Out[2]:
In [ ]:
In [3]:
# Pull out only the columns with qsim value
pc = dat[dat['percent_bias'] >=10]
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
pc['percent_bias'].plot(kind='kde', ax=ax, fontsize=12)
plt.xlabel('Percent Genotypes with Bias', fontsize=12)
plt.axvline(14, color='r', lw=2, label='14% of gentoypes with bias')
plt.legend()
plt.title(u'Distriubtion of Exonic Regions Bias(qsim)\nAfter Removing Exonic Regions ≤ 10% Bias', fontsize=18)
plt.tight_layout()
plt.savefig(os.path.join(mclab,'cegs_ase_paper/pipeline_output/qsim_bayesian/qsim_density_percent_bias_gt10.png'))
In [4]:
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
mk = dat.ix[:,1:-1]
mk.boxplot(return_type='axes', ax=ax, rot=45, fontsize=6)
ax.axhline(0.5, color='r', lw=2)
plt.xlabel('Genotype', fontsize=12)
plt.ylabel(u'Exonic Region Bias\nTester <--------> Line', fontsize=12)
plt.title('Line Distribution of Biased Exonic Regions (qsim)', fontsize=18)
plt.tight_layout()
plt.savefig(os.path.join(mclab,'cegs_ase_paper/pipeline_output/qsim_bayesian/qsim_line_bias.png'))
In [ ]: