Import Data



In [1]:

    
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline



In [2]:

    
mclab = os.getenv('MCLAB')
fname = os.path.join(mclab, 'cegs_ase_paper/pipeline_output/qsim_bayesian/qsim_bias_wide.csv')
dat = pd.read_csv(fname, index_col='fusion_id')
dat.head(3)









    Out[2]:






  
    
      
      _NAME_
      r101
      r109
      r136
      r149
      r181
      r208
      r21
      r217
      r228
      ...
      w52
      w55
      w59
      w64
      w67
      w68
      w74
      w76
      w79
      percent_bias
    
    
      fusion_id
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      F10001_SI
      qsim_line
      0.500000
      0.500000
      0.500000
      0.500000
      0.500000
      0.5
      0.500000
      0.5
      0.500000
      ...
      0.500000
      0.500000
      0.500000
      0.500000
      0.500000
      0.500000
      0.500000
      0.500000
      0.500000
      0.000000
    
    
      F10005_SI
      qsim_line
      0.500000
      0.500000
      0.500000
      0.500000
      0.500000
      0.5
      0.500000
      0.5
      0.516484
      ...
      0.500000
      0.500000
      0.500000
      0.500000
      0.500000
      0.500000
      0.500000
      0.500000
      0.500000
      1.470588
    
    
      F10009_SI
      qsim_line
      0.500921
      0.500921
      0.500921
      0.501416
      0.500921
      0.5
      0.500921
      0.5
      0.500921
      ...
      0.500921
      0.500921
      0.500752
      0.500921
      0.500921
      0.500921
      0.500921
      0.500921
      0.500921
      91.176471
    
  

3 rows × 70 columns

Mask data for plotting



In [ ]:



In [3]:

    
# Pull out only the columns with qsim value
pc = dat[dat['percent_bias'] >=10]
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
pc['percent_bias'].plot(kind='kde', ax=ax, fontsize=12)
plt.xlabel('Percent Genotypes with Bias', fontsize=12)
plt.axvline(14, color='r', lw=2, label='14% of gentoypes with bias')
plt.legend()
plt.title(u'Distriubtion of Exonic Regions Bias(qsim)\nAfter Removing Exonic Regions ≤ 10% Bias', fontsize=18)
plt.tight_layout()
plt.savefig(os.path.join(mclab,'cegs_ase_paper/pipeline_output/qsim_bayesian/qsim_density_percent_bias_gt10.png'))

Boxplot to look at if certain lines have more bias than others



In [4]:

    
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
mk = dat.ix[:,1:-1]
mk.boxplot(return_type='axes', ax=ax, rot=45, fontsize=6)
ax.axhline(0.5, color='r', lw=2)
plt.xlabel('Genotype', fontsize=12)
plt.ylabel(u'Exonic Region Bias\nTester <--------> Line', fontsize=12)
plt.title('Line Distribution of Biased Exonic Regions (qsim)', fontsize=18)
plt.tight_layout()
plt.savefig(os.path.join(mclab,'cegs_ase_paper/pipeline_output/qsim_bayesian/qsim_line_bias.png'))



In [ ]:

	_NAME_	r101	r109	r136	r149	r181	r208	r21	r217	r228	...	w52	w55	w59	w64	w67	w68	w74	w76	w79	percent_bias
fusion_id
F10001_SI	qsim_line	0.500000	0.500000	0.500000	0.500000	0.500000	0.5	0.500000	0.5	0.500000	...	0.500000	0.500000	0.500000	0.500000	0.500000	0.500000	0.500000	0.500000	0.500000	0.000000
F10005_SI	qsim_line	0.500000	0.500000	0.500000	0.500000	0.500000	0.5	0.500000	0.5	0.516484	...	0.500000	0.500000	0.500000	0.500000	0.500000	0.500000	0.500000	0.500000	0.500000	1.470588
F10009_SI	qsim_line	0.500921	0.500921	0.500921	0.501416	0.500921	0.5	0.500921	0.5	0.500921	...	0.500921	0.500921	0.500752	0.500921	0.500921	0.500921	0.500921	0.500921	0.500921	91.176471