In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mclib.vcf2 as mcvcf
%matplotlib inline
In [1]:
%qtconsole
In [3]:
mclab = os.getenv('MCLAB')
sandbox = '/home/jfear/sandbox/cegs_ase_paper/ase_lvl2_filtered_vcf_files'
In [4]:
fname = os.path.join(mclab, 'cegs_ase_paper/pipeline_output/qsim_bayesian/qsim_bias_wide.csv')
dat = pd.read_csv(fname, index_col='fusion_id')
# Pull only line columns from dat and make qsim
qsim = dat.ix[:,1:-1]
qsim.convert_objects(convert_numeric=True)
qsim.head(2)
Out[4]:
In [12]:
rname = os.path.join(mclab, 'cegs_ase_paper/pipeline_output/polymorphisms_counts.csv')
results = pd.read_csv(rname, index_col='line')
results.head(2)
Out[12]:
In [13]:
# Create a blank bias column for filling in
results['bias'] = pd.Series()
In [16]:
for line in results.index:
cnts = qsim[line].value_counts()
results.ix[line, 'bias'] = cnts[cnts.index != 0.5].sum()
results.head(3)
Out[16]:
In [61]:
summary = results.describe().ix[['min', 'mean', 'max']]
summary.convert_objects(convert_numeric=True)
summary.ix['median', 'masked'] = summary.masked.median()
summary.ix['median', 'snps'] = summary.snps.median()
summary.ix['median', 'indels'] = summary.indels.median()
summary.ix['median', 'poly'] = summary.poly.median()
summary.ix['median', 'total'] = summary.total.median()
summary.ix['median', 'bias'] = summary.bias.median()
summary.apply(np.round)
Out[61]:
In [22]:
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8,8), dpi=100, sharex=True)
fig.subplots_adjust(hspace=0.4)
results.plot('bias', 'poly', kind='scatter', ax=ax1, fontsize=12)
results.plot('bias', 'snps', kind='scatter', ax=ax2, fontsize=12)
results.plot('bias', 'indels', kind='scatter', ax=ax3, fontsize=12)
ax3.set_xlabel('Number of Biased Exonic Regions', fontsize=12)
fig.suptitle(u'Number of Polymorphisms vs Biased Exonic Regions\nQSIM', fontsize=18, fontweight='medium')
ax1.text(-0.2, 1.1, 'A', transform=ax1.transAxes, fontsize=20, fontweight='bold')
ax2.text(-0.2, 1.1, 'B', transform=ax2.transAxes, fontsize=20, fontweight='bold')
ax3.text(-0.2, 1.1, 'C', transform=ax3.transAxes, fontsize=20, fontweight='bold')
plt.savefig(os.path.join(mclab, 'cegs_ase_paper/pipeline_output/qsim_bayesian/num_poly_vs_bias.png'), bbox_inches='tight')
In [ ]: