Before running the pipeline please make sure all required software packages are installed.
Please see README.md for a list of required software.
In [1]:
%matplotlib inline
In [5]:
import matplotlib.pyplot as plt
import matplotlib
In [11]:
import json
s = json.load( open("bmh_matplotlibrc.json") )
matplotlib.rcParams.update(s)
In [3]:
cd ../results
In [12]:
%run ../protocol/get_total_seq_size.py line6u_global.fa.clean.nr
In [13]:
%run ../protocol/get_total_seq_size.py line6i_global.fa.clean.nr
In [14]:
%run ../protocol/get_total_seq_size.py line7u_global.fa.clean.nr
In [15]:
%run ../protocol/get_total_seq_size.py line7i_global.fa.clean.nr
In [16]:
%run ../protocol/get_total_seq_size.py line6u_local.fa.clean.nr
In [17]:
%run ../protocol/get_total_seq_size.py line6i_local.fa.clean.nr
In [18]:
%run ../protocol/get_total_seq_size.py line7u_local.fa.clean.nr
In [19]:
%run ../protocol/get_total_seq_size.py line7i_local.fa.clean.nr
In [20]:
%run ../protocol/get_total_seq_size.py line6u_global.fa.clean.nr.uniq
In [21]:
%run ../protocol/get_total_seq_size.py line6i_global.fa.clean.nr.uniq
In [22]:
%run ../protocol/get_total_seq_size.py line7u_global.fa.clean.nr.uniq
In [23]:
%run ../protocol/get_total_seq_size.py line7i_global.fa.clean.nr.uniq
In [24]:
%run ../protocol/get_total_seq_size.py line6u_local.fa.clean.nr.uniq
In [25]:
%run ../protocol/get_total_seq_size.py line6i_local.fa.clean.nr.uniq
In [26]:
%run ../protocol/get_total_seq_size.py line7u_local.fa.clean.nr.uniq
In [27]:
%run ../protocol/get_total_seq_size.py line7i_local.fa.clean.nr.uniq
In [28]:
def blast_match(aligned_bases, total_bases):
print 'Total long unique sequences matched with mouse proteins = %d/%d (%.2f%%)' % \
(aligned_bases, total_bases, float(aligned_bases)/total_bases*100.0)
In [29]:
def get_match_bases(infile):
bases = 0
for line in open(infile):
base = line.split('\t')[-1]
bases += int(base)
return bases
In [30]:
line6u_global_uniq_match = get_match_bases('line6u_global.fa.clean.nr.uniq.long.xml.out')
line6i_global_uniq_match = get_match_bases('line6i_global.fa.clean.nr.uniq.long.xml.out')
line7u_global_uniq_match = get_match_bases('line7u_global.fa.clean.nr.uniq.long.xml.out')
line7i_global_uniq_match = get_match_bases('line7i_global.fa.clean.nr.uniq.long.xml.out')
In [31]:
line6u_local_uniq_match = get_match_bases('line6u_local.fa.clean.nr.uniq.long.xml.out')
line6i_local_uniq_match = get_match_bases('line6i_local.fa.clean.nr.uniq.long.xml.out')
line7u_local_uniq_match = get_match_bases('line7u_local.fa.clean.nr.uniq.long.xml.out')
line7i_local_uniq_match = get_match_bases('line7i_local.fa.clean.nr.uniq.long.xml.out')
In [32]:
line6u_global_uniq_long = !python ../protocol/get_total_seq_size.py line6u_global.fa.clean.nr.uniq.long
line6i_global_uniq_long = !python ../protocol/get_total_seq_size.py line6i_global.fa.clean.nr.uniq.long
line7u_global_uniq_long = !python ../protocol/get_total_seq_size.py line7u_global.fa.clean.nr.uniq.long
line7i_global_uniq_long = !python ../protocol/get_total_seq_size.py line7i_global.fa.clean.nr.uniq.long
In [33]:
line6u_local_uniq_long = !python ../protocol/get_total_seq_size.py line6u_local.fa.clean.nr.uniq.long
line6i_local_uniq_long = !python ../protocol/get_total_seq_size.py line6i_local.fa.clean.nr.uniq.long
line7u_local_uniq_long = !python ../protocol/get_total_seq_size.py line7u_local.fa.clean.nr.uniq.long
line7i_local_uniq_long = !python ../protocol/get_total_seq_size.py line7i_local.fa.clean.nr.uniq.long
In [34]:
line6u_global_uniq_long = int(line6u_global_uniq_long[0].split('\t')[1])
line6i_global_uniq_long = int(line6i_global_uniq_long[0].split('\t')[1])
line7u_global_uniq_long = int(line7u_global_uniq_long[0].split('\t')[1])
line7i_global_uniq_long = int(line7i_global_uniq_long[0].split('\t')[1])
In [35]:
line6u_local_uniq_long = int(line6u_local_uniq_long[0].split('\t')[1])
line6i_local_uniq_long = int(line6i_local_uniq_long[0].split('\t')[1])
line7u_local_uniq_long = int(line7u_local_uniq_long[0].split('\t')[1])
line7i_local_uniq_long = int(line7i_local_uniq_long[0].split('\t')[1])
In [36]:
blast_match(line6u_global_uniq_match, line6u_global_uniq_long)
In [37]:
blast_match(line6i_global_uniq_match, line6i_global_uniq_long)
In [38]:
blast_match(line7u_global_uniq_match, line7u_global_uniq_long)
In [39]:
blast_match(line7i_global_uniq_match, line7i_global_uniq_long)
In [40]:
blast_match(line6u_local_uniq_match, line6u_local_uniq_long)
In [41]:
blast_match(line6i_local_uniq_match, line6i_local_uniq_long)
In [42]:
blast_match(line7u_local_uniq_match, line7u_local_uniq_long)
In [43]:
blast_match(line7i_local_uniq_match, line7i_local_uniq_long)
In [44]:
k21_junctions = !cut -f 1 global_k21.fa.clean.nr.psl.best_all_sp.txt
k31_junctions = !cut -f 1 global_k31.fa.clean.nr.psl.best_all_sp.txt
k21_junctions = set(k21_junctions)
k31_junctions = set(k31_junctions)
print "Total junctions from k21 = %d, k31 = %d" % (len(k21_junctions), len(k31_junctions))
diff_junctions = list(k21_junctions.difference(k31_junctions))
print "Different junctions between k21 and k31 = %d junctions" % len(diff_junctions)
In [45]:
est_junctions = !cut -f 1 est.psl.best_all_sp.txt
est_junctions = set(est_junctions)
In [46]:
oasesM_junctions = !cut -f 1 oases-M.psl.best.all_sp.txt
oasesM_junctions = set(oasesM_junctions)
In [47]:
assembly_junctions = !cut -f 1 global_assembly.clean.nr.psl.best_all_sp.txt
assembly_junctions = set(assembly_junctions)
In [48]:
print 'Total junctions from Oases M = %d' % len(oasesM_junctions)
print 'Total junctions from global assembly = %d' % len(assembly_junctions)
In [49]:
print 'Unique Oases M junctions = %d' % (len(oasesM_junctions.difference(assembly_junctions)))
In [50]:
print 'Unique global assembly junctions = %d' % (len(assembly_junctions.difference(oasesM_junctions)))
In [51]:
print 'Unique Oases M junctions supported by ESTs = %d' % len(oasesM_junctions.difference(assembly_junctions).intersection(est_junctions))
In [52]:
print 'Unique global assembly junctions supported by ESTs = %d' % \
len(assembly_junctions.difference(oasesM_junctions).intersection(est_junctions))
In [53]:
gimme_junctions = !cut -f 1 all_assembly_models.bed_all_sp.txt
gimme_junctions = set(gimme_junctions)
In [54]:
ensembl_junctions = !cut -f 1 Gallus_gallus.WASHUC2.64.bed_all_sp.txt
ensembl_junctions = set(ensembl_junctions)
In [55]:
gimme_cuff_junctions = !cut -f 1 all_assembly_cufflinks_models.bed_all_sp.txt
gimme_cuff_junctions = set(gimme_cuff_junctions)
In [56]:
gimme_cuff_ensembl_junctions = !cut -f 1 all_assembly_cufflinks_ensembl_models.bed_all_sp.txt
gimme_cuff_ensembl_junctions = set(gimme_cuff_ensembl_junctions)
In [57]:
cufflinks_junctions = !cut -f 1 cufflinks.bed_all_sp.txt
cufflinks_junctions = set(cufflinks_junctions)
In [58]:
global_junctions = !cut -f 1 global_assembly_models.bed_all_sp.txt
global_junctions = set(global_junctions)
local_junctions = !cut -f 1 local_assembly_models.bed_all_sp.txt
local_junctions = set(local_junctions)
In [59]:
mrna_junctions = !cut -f 1 mrna.psl.best_all_sp.txt
mrna_junctions = set(mrna_junctions)
In [60]:
selected_chroms = ['chr1','chr10','chr11','chr12','chr13',
'chr14','chr15','chr16','chr17','chr18',
'chr19','chr2','chr3','chr4','chr5','chr6',
'chr7','chr8','chr9','chrX']
In [61]:
mus_gimme_junctions = !cut -f 1 mouse_assembly_models.bed_all_sp.txt
tmp = []
for ss in mus_gimme_junctions:
chrom = ss.split(':')[0]
if chrom in selected_chroms:
tmp.append(ss)
mus_gimme_junctions = set(tmp)
In [62]:
mus_cuff_junctions = !cut -f 1 mouse_cufflinks.bed_all_sp.txt
mus_cuff_junctions = set(mus_cuff_junctions)
In [63]:
mus_ensembl_junctions = !cut -f 1 Mus_musculus.NCBIM37.64.bed_all_sp.txt
tmp = []
for ss in mus_ensembl_junctions:
chrom = ss.split(':')[0]
if chrom in selected_chroms:
tmp.append(ss)
mus_ensembl_junctions = set(tmp)
In [64]:
from matplotlib_venn import venn3, venn3_circles, venn2, venn2_circles
from collections import Counter
In [65]:
union = cufflinks_junctions.union(gimme_junctions).union(ensembl_junctions)
indicators = ['%d%d%d' % (a in cufflinks_junctions, a in gimme_junctions, a in ensembl_junctions) for a in union]
subsets = Counter(indicators)
In [73]:
figsize(8,16)
v = venn3([ensembl_junctions, gimme_junctions, cufflinks_junctions],
set_labels=('', '', ''))
fontsize = 16
v.get_label_by_id('100').set_fontsize(fontsize)
v.get_label_by_id('110').set_fontsize(fontsize)
v.get_label_by_id('101').set_fontsize(fontsize)
v.get_label_by_id('111').set_fontsize(fontsize)
v.get_label_by_id('011').set_fontsize(fontsize)
v.get_label_by_id('001').set_fontsize(fontsize)
v.get_label_by_id('010').set_fontsize(fontsize)
c = venn3_circles([ensembl_junctions, gimme_junctions, cufflinks_junctions],
linestyle='solid', lw=2.5, color='white')
font = {'size':18}
text(-0.9, 0.3, 'Ensembl', fontdict=font)
text(0.35, -0.5, 'Cufflinks', fontdict=font)
text(0.3, 0.45, 'Gimme', fontdict=font)
legend_ls = {'linewidth':12, 'alpha':0.5}
legend_font = {'size':16}
hlines(0.3,1.0,0.6, color=v.get_patch_by_id('100').get_facecolor(), **legend_ls)
hlines(0.25,1.0,0.6, color=v.get_patch_by_id('110').get_facecolor(), **legend_ls)
hlines(0.2,1.0,0.6, color=v.get_patch_by_id('101').get_facecolor(), **legend_ls)
hlines(0.15,1.0,0.6, color=v.get_patch_by_id('111').get_facecolor(), **legend_ls)
hlines(0.1,1.0,0.6, color=v.get_patch_by_id('011').get_facecolor(), **legend_ls)
hlines(0.05,1.0,0.6, color=v.get_patch_by_id('001').get_facecolor(), **legend_ls)
hlines(0.0,1.0,0.6, color=v.get_patch_by_id('010').get_facecolor(), **legend_ls)
text(0.7,0.29, 'Ensembl', fontdict=legend_font)
text(0.7,0.24, 'Ensembl + Gimme', fontdict=legend_font)
text(0.7,0.19, 'Ensembl + Cufflinks', fontdict=legend_font)
text(0.7, 0.14, 'Ensembl + Cufflinks + Gimme', fontdict=legend_font)
text(0.7, 0.09, 'Cufflinks + Gimme', fontdict=legend_font)
text(0.7, 0.04, 'Cufflinks', fontdict=legend_font)
text(0.7, -0.01, 'Gimme', fontdict=legend_font)
savefig('chick_venn.pdf',bbox_inches='tight', pad_inches=0.1)
In [74]:
fig = plt.figure(0,figsize=(12,8))
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)
fig.tight_layout()
v1 = venn2([mus_ensembl_junctions, mus_cuff_junctions],
set_labels=('', ''), ax=ax1)
v1.get_label_by_id('10').set_fontsize(18)
v1.get_label_by_id('01').set_fontsize(18)
v1.get_label_by_id('11').set_fontsize(18)
v1.get_patch_by_id('10').set_color('green')
v1.get_patch_by_id('01').set_color('blue')
v1.get_patch_by_id('11').set_color('yellow')
c1 = venn2_circles([mus_ensembl_junctions, mus_cuff_junctions],
linestyle='solid', lw=2.5, ax=ax1, color='white')
font = {'size':18}
#ax1.text(0, 0.65, 'A', fontdict=font)
ax1.text(-0.6, -0.55, 'Ensembl', fontdict=font)
ax1.text(0.55, -0.1, 'Cufflinks', fontdict=font)
v2 = venn2([mus_ensembl_junctions, mus_gimme_junctions],
set_labels=('', ''), ax=ax2)
v2.get_patch_by_id('10').set_color('green')
v2.get_patch_by_id('01').set_color('blue')
v2.get_patch_by_id('11').set_color('red')
v2.get_label_by_id('10').set_fontsize(16)
v2.get_label_by_id('01').set_fontsize(16)
v2.get_label_by_id('11').set_fontsize(16)
c2 = venn2_circles([mus_ensembl_junctions, mus_gimme_junctions],
linestyle='solid', lw=2.5, ax=ax2, color='white')
#ax2.text(0, 0.65, 'B', fontdict=font)
ax2.text(-0.4, -0.6, 'Ensembl', fontdict=font)
ax2.text(0.3, -0.45, 'Gimme', fontdict=font)
plt.savefig('mouse_venn.pdf',bbox_inches='tight', pad_inches=0.1)
In [68]:
est_mrna_not_in_ensembl_junctions = est_junctions.union(mrna_junctions).difference(ensembl_junctions)
In [73]:
fig = plt.figure(figsize=(8,16))
v = venn3([cufflinks_junctions, gimme_junctions, est_junctions.union(mrna_junctions)],
set_labels=('', '', ''))
fontsize = 16
v.get_label_by_id('100').set_fontsize(fontsize)
v.get_label_by_id('110').set_fontsize(fontsize)
v.get_label_by_id('101').set_fontsize(fontsize)
v.get_label_by_id('111').set_fontsize(fontsize)
v.get_label_by_id('011').set_fontsize(fontsize)
v.get_label_by_id('001').set_fontsize(fontsize)
v.get_label_by_id('010').set_fontsize(fontsize)
c = venn3_circles([cufflinks_junctions, gimme_junctions, est_junctions.union(mrna_junctions)],
linestyle='solid', lw=2.5, color='white')
font = {'size':18}
plt.text(-0.6, 0.4, 'Cufflinks', fontdict=font)
plt.text(0.3, -0.7, 'ESTs or mRNAs', fontdict=font)
plt.text(0.45, 0.40, 'Gimme', fontdict=font)
legend_ls = {'linewidth':12, 'alpha':0.5}
legend_font = {'size':16}
x = 0.2
plt.hlines(0.3-x,1.0,0.6, color=v.get_patch_by_id('100').get_facecolor(), **legend_ls)
plt.hlines(0.25-x,1.0,0.6, color=v.get_patch_by_id('110').get_facecolor(), **legend_ls)
plt.hlines(0.2-x,1.0,0.6, color=v.get_patch_by_id('101').get_facecolor(), **legend_ls)
plt.hlines(0.15-x,1.0,0.6, color=v.get_patch_by_id('111').get_facecolor(), **legend_ls)
plt.hlines(0.1-x,1.0,0.6, color=v.get_patch_by_id('011').get_facecolor(), **legend_ls)
plt.hlines(0.05-x,1.0,0.6, color=v.get_patch_by_id('001').get_facecolor(), **legend_ls)
plt.hlines(0.0-x,1.0,0.6, color=v.get_patch_by_id('010').get_facecolor(), **legend_ls)
plt.text(0.65,0.29-x, 'Cufflinks', fontdict=legend_font)
plt.text(0.65,0.24-x, 'Cufflinks + Gimme', fontdict=legend_font)
plt.text(0.65,0.19-x, 'Cufflinks + ESTs', fontdict=legend_font)
plt.text(0.65,0.14-x, 'Cufflinks + Gimme + ESTs', fontdict=legend_font)
plt.text(0.65,0.09-x, 'Gimme + ESTs', fontdict=legend_font)
plt.text(0.65,0.04-x, 'ESTs', fontdict=legend_font)
plt.text(0.65,-0.01-x, 'Gimme', fontdict=legend_font)
plt.savefig('chick_est_venn.pdf',bbox_inches='tight', pad_inches=0.1)
In [75]:
len(cufflinks_junctions.union(gimme_junctions).intersection(est_mrna_not_in_ensembl_junctions))
Out[75]:
In [76]:
len(est_mrna_not_in_ensembl_junctions)
Out[76]:
In [77]:
len(est_junctions.difference(ensembl_junctions)), len(mrna_junctions.difference(ensembl_junctions))
Out[77]:
In [78]:
fig = plt.figure(0, figsize=(12,8))
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)
fig.tight_layout()
v1 = venn2([cufflinks_junctions, est_mrna_not_in_ensembl_junctions],
set_labels=('', ''), ax=ax1)
v1.get_label_by_id('10').set_fontsize(18)
v1.get_label_by_id('01').set_fontsize(18)
v1.get_label_by_id('11').set_fontsize(18)
v1.get_patch_by_id('10').set_color('green')
v1.get_patch_by_id('01').set_color('blue')
v1.get_patch_by_id('11').set_color('yellow')
c1 = venn2_circles([cufflinks_junctions, est_mrna_not_in_ensembl_junctions],
linestyle='dashed', lw=0.9, ax=ax1)
font = {'size':16}
ax1.text(0, 0.65, 'A', fontdict=font)
ax1.text(-0.6, -0.5, 'Cufflinks', fontdict=font)
ax1.text(0.1, -0.6, 'ESTs or mRNAs\n(not in Ensembl)', fontdict=font)
v2 = venn2([gimme_junctions, est_mrna_not_in_ensembl_junctions],
set_labels=('', ''), ax=ax2)
v2.get_patch_by_id('10').set_color('orange')
v2.get_patch_by_id('01').set_color('blue')
v2.get_patch_by_id('11').set_color('red')
v2.get_label_by_id('10').set_fontsize(18)
v2.get_label_by_id('01').set_fontsize(18)
v2.get_label_by_id('11').set_fontsize(18)
c2 = venn2_circles([gimme_junctions, est_mrna_not_in_ensembl_junctions],
linestyle='dashed', lw=0.9, ax=ax2)
font = {'size':16}
ax2.text(0, 0.65, 'B', fontdict=font)
ax2.text(-0.6, -0.5, 'Gimme', fontdict=font)
ax2.text(0.1, -0.6, 'EST or mRNA\n(not in Ensembl)', fontdict=font)
plt.savefig('chick_est_mrna_venn.pdf',bbox_inches='tight', pad_inches=0.1)
In [79]:
fig = plt.figure(0, figsize=(8,16))
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)
fig.tight_layout()
fontsize = 14
font = {'size':16}
c = venn3_circles([global_junctions, ensembl_junctions, est_junctions.union(mrna_junctions)],
linestyle='solid', lw=2.5, ax=ax1, color='white')
c[0].set_lw(2.0)
c[0].set_ls('dashed')
v = venn3([global_junctions, ensembl_junctions, est_junctions.union(mrna_junctions)],
set_labels=('', '', ''), ax=ax1)
v.get_label_by_id('100').set_fontsize(fontsize)
v.get_label_by_id('110').set_fontsize(fontsize)
v.get_label_by_id('101').set_fontsize(fontsize)
v.get_label_by_id('111').set_fontsize(fontsize)
v.get_label_by_id('011').set_fontsize(fontsize)
v.get_label_by_id('001').set_fontsize(fontsize)
v.get_label_by_id('010').set_fontsize(fontsize)
ax1.text(-0.8, 0.5, 'Global', fontdict=font)
ax1.text(0.3, -0.7, 'ESTs or mRNAs', fontdict=font)
ax1.text(0.45, 0.40, 'Ensembl', fontdict=font)
ax1.text(0, -0.8, 'A', fontdict={'size':18})
c = venn3_circles([global_junctions.union(local_junctions), ensembl_junctions, est_junctions.union(mrna_junctions)],
linestyle='solid', lw=2.5, ax=ax2, color='white')
c[0].set_lw('2.0')
c[0].set_ls('dashed')
v = venn3([global_junctions.union(local_junctions), ensembl_junctions, est_junctions.union(mrna_junctions)],
set_labels=('', '', ''), ax=ax2)
v.get_label_by_id('100').set_fontsize(fontsize)
v.get_label_by_id('110').set_fontsize(fontsize)
v.get_label_by_id('101').set_fontsize(fontsize)
v.get_label_by_id('111').set_fontsize(fontsize)
v.get_label_by_id('011').set_fontsize(fontsize)
v.get_label_by_id('001').set_fontsize(fontsize)
v.get_label_by_id('010').set_fontsize(fontsize)
ax2.text(-0.8, 0.5, 'Global+local', fontdict=font)
ax2.text(0.3, -0.7, 'ESTs or mRNAs', fontdict=font)
ax2.text(0.45, 0.40, 'Ensembl', fontdict=font)
ax2.text(0, -0.8, 'B', fontdict={'size':18})
plt.savefig('global_local_junctions_venn.pdf',bbox_inches='tight', pad_inches=0.1)
In [80]:
fig = plt.figure(0, figsize=(8,8))
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)
ax4 = fig.add_subplot(2,2,4)
fig.tight_layout()
fontsize = 14
font = {'size':16}
v = venn3([gimme_junctions, ensembl_junctions, est_junctions.union(mrna_junctions)],
set_labels=('', '', ''), ax=ax1)
v.get_label_by_id('100').set_fontsize(fontsize)
v.get_label_by_id('110').set_fontsize(fontsize)
v.get_label_by_id('101').set_fontsize(fontsize)
v.get_label_by_id('111').set_fontsize(fontsize)
v.get_label_by_id('011').set_fontsize(fontsize)
v.get_label_by_id('001').set_fontsize(fontsize)
v.get_label_by_id('010').set_fontsize(fontsize)
ax1.text(-0.8, 0.5, 'Cufflinks', fontdict=font)
ax1.text(0.3, -0.7, 'ESTs or mRNAs', fontdict=font)
ax1.text(0.45, 0.40, 'Ensembl', fontdict=font)
ax1.text(0, -0.8, 'A', fontdict={'size':18})
c = venn3_circles([gimme_junctions, ensembl_junctions, est_junctions.union(mrna_junctions)],
linestyle='solid', lw=1.5, ax=ax1, color='white')
#c[0].set_lw('2.0')
#c[0].set_ls('dashed')
ax3.set_xlim(0,10)
ax3.set_ylim(0,1.0)
x = 0.1
ax3.hlines(1.0-x,2.0,1.5, color=v.get_patch_by_id('100').get_facecolor(), **legend_ls)
ax3.hlines(0.94-x,2.0,1.5, color=v.get_patch_by_id('110').get_facecolor(), **legend_ls)
ax3.hlines(0.88-x,2.0,1.5, color=v.get_patch_by_id('101').get_facecolor(), **legend_ls)
ax3.hlines(0.82-x,2.0,1.5, color=v.get_patch_by_id('111').get_facecolor(), **legend_ls)
ax3.hlines(0.76-x,2.0,1.5, color=v.get_patch_by_id('011').get_facecolor(), **legend_ls)
ax3.hlines(0.70-x,2.0,1.5, color=v.get_patch_by_id('001').get_facecolor(), **legend_ls)
ax3.hlines(0.64-x,2.0,1.5, color=v.get_patch_by_id('010').get_facecolor(), **legend_ls)
xx=0.02
ax3.text(2.3, 1.0-x-xx, 'Cufflinks', size=16)
ax3.text(2.3, 0.94-x-xx, 'Cufflinks+Ensembl', size=16)
ax3.text(2.3, 0.88-x-xx, 'Cufflinks+ESTs', size=16)
ax3.text(2.3, 0.82-x-xx, 'Cufflinks+Ensembl+ESTs', size=16)
ax3.text(2.3, 0.76-x-xx, 'Ensembl+ESTs', size=16)
ax3.text(2.3, 0.70-x-xx, 'ESTs', size=16)
ax3.text(2.3, 0.64-x-xx, 'Ensembl', size=16)
ax3.axis('off')
v = venn3([gimme_cuff_junctions, ensembl_junctions, est_junctions.union(mrna_junctions)],
set_labels=('', '', ''), ax=ax2)
v.get_label_by_id('100').set_fontsize(fontsize)
v.get_label_by_id('110').set_fontsize(fontsize)
v.get_label_by_id('101').set_fontsize(fontsize)
v.get_label_by_id('111').set_fontsize(fontsize)
v.get_label_by_id('011').set_fontsize(fontsize)
v.get_label_by_id('001').set_fontsize(fontsize)
v.get_label_by_id('010').set_fontsize(fontsize)
c = venn3_circles([gimme_cuff_junctions, ensembl_junctions, est_junctions.union(mrna_junctions)],
linestyle='solid', lw=1.5, ax=ax2, color='white')
#c[0].set_lw(2.0)
#c[0].set_ls('dashed')
ax2.text(-0.8, 0.5, 'Cufflinks+Gimme', fontdict=font)
ax2.text(0.3, -0.7, 'ESTs or mRNAs', fontdict=font)
ax2.text(0.45, 0.4, 'Ensembl', fontdict=font)
ax2.text(0, -0.8, 'B', fontdict={'size':18})
ax4.set_xlim(0,10)
ax4.set_ylim(0,1.0)
x = 0.1
ax4.hlines(1.0-x,2.0,1.5, color=v.get_patch_by_id('100').get_facecolor(), **legend_ls)
ax4.hlines(0.94-x,2.0,1.5, color=v.get_patch_by_id('110').get_facecolor(), **legend_ls)
ax4.hlines(0.88-x,2.0,1.5, color=v.get_patch_by_id('101').get_facecolor(), **legend_ls)
ax4.hlines(0.82-x,2.0,1.5, color=v.get_patch_by_id('111').get_facecolor(), **legend_ls)
ax4.hlines(0.76-x,2.0,1.5, color=v.get_patch_by_id('011').get_facecolor(), **legend_ls)
ax4.hlines(0.70-x,2.0,1.5, color=v.get_patch_by_id('001').get_facecolor(), **legend_ls)
ax4.hlines(0.64-x,2.0,1.5, color=v.get_patch_by_id('010').get_facecolor(), **legend_ls)
xx=0.02
ax4.text(2.3, 1.0-x-xx, 'Cufflinks+Gimme', size=16)
ax4.text(2.3, 0.94-x-xx, 'Cufflinks+Gimme+Ensembl', size=16)
ax4.text(2.3, 0.88-x-xx, 'Cufflinks+Gimme+ESTs', size=16)
ax4.text(2.3, 0.82-x-xx, 'Cufflinks+Gimme+Ensembl+ESTs', size=16)
ax4.text(2.3, 0.76-x-xx, 'Ensembl+ESTs', size=16)
ax4.text(2.3, 0.70-x-xx, 'ESTs', size=16)
ax4.text(2.3, 0.64-x-xx, 'Ensembl', size=16)
ax4.axis('off')
plt.savefig('cuff_gimme_junctions_venn.pdf',bbox_inches='tight', pad_inches=0.1)
plt.savefig('cuff_gimme_junctions_venn.eps',bbox_inches='tight', pad_inches=0.1, format='eps')
In [81]:
from scipy.stats import cumfreq
from glob import glob
from collections import defaultdict
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset, inset_axes
In [82]:
def get_read_count(filename, junctions_db):
'''Read read counts and return a dictionary
containing the number of reads of each splice
junction.
'''
with open(filename, 'r') as infile:
for line in infile:
junction, reads = line.strip().split('\t')
chrom = junction.split(':')[0]
jstart, jend = junction.split(':')[1].split('-')
jstart = int(jstart) + 1 # convert the end of an exon to the start of an intron
jend = int(jend) - 1 # convert the start of an exon to the end of an intron
if jend - jstart > 50: # min intron size default value in compare_splice_junctions script
junction = '%s:%d-%d' % (chrom, jstart, jend)
junctions_db[junction] += int(reads)
In [83]:
def get_normfreqs(inputfiles, junctions_db):
for countfile in glob(inputfiles):
get_read_count(countfile, junctions_db)
sorted_counts = sorted([v for v in junctions_db.itervalues()])
cumfreqs, lowlim, binsize, extrapoints = cumfreq(sorted_counts,
max(sorted_counts))
norm_cumfreqs = cumfreqs / max(cumfreqs)
poor_junctions = [i for i in sorted_counts if i < 4]
print 'Total junctions with no or fewer than four reads = %d (%.4f%%)' % \
(len(poor_junctions), len(poor_junctions)/float(len(sorted_counts))*100)
return norm_cumfreqs
In [84]:
gimme_se_junctions_counts = defaultdict(int)
gimme_se_norm_cumfreqs = get_normfreqs('*gimme.sorted.bam.reads_counts', gimme_se_junctions_counts)
gimme_pe_junctions_counts = defaultdict(int)
gimme_pe_norm_cumfreqs = get_normfreqs('*gimme.pe.sorted.bam.reads_counts', gimme_pe_junctions_counts)
In [85]:
cuff_se_junctions_counts = defaultdict(int)
cuff_se_norm_cumfreqs = get_normfreqs('*cufflinks.sorted.bam.reads_counts', cuff_se_junctions_counts)
cuff_pe_junctions_counts = defaultdict(int)
cuff_pe_norm_cumfreqs = get_normfreqs('*cufflinks.pe.sorted.bam.reads_counts', cuff_pe_junctions_counts)
In [87]:
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(cuff_se_norm_cumfreqs, linewidth=2.0, color='blue', label='Cufflinks SE')
ax.plot(cuff_pe_norm_cumfreqs, linewidth=2.0, color='red', label='Cufflinks PE')
ax.plot(gimme_se_norm_cumfreqs, linewidth=2.0, color='green', label='Gimme SE')
ax.plot(gimme_pe_norm_cumfreqs, linewidth=2.0, color='orange', label='Gimme PE')
ax.set_xscale('log')
axins = inset_axes(ax, 2.8, 2.8, loc=1)
axins.plot(cuff_se_norm_cumfreqs, linewidth=3.0, color='blue')
axins.plot(cuff_pe_norm_cumfreqs, linewidth=3.0, color='red')
axins.plot(gimme_se_norm_cumfreqs, linewidth=3.0, color='green')
axins.plot(gimme_pe_norm_cumfreqs, linewidth=3.0, color='orange')
axins.set_xlim(0,3)
axins.set_ylim(0,0.1)
axins.set_xticks([0, 1, 2, 3])
axins.set_yticks([0,0.1])
for label in (axins.get_xticklabels() + axins.get_yticklabels()):
label.set_fontsize(13)
#axins.set_xscale('log')
#ax.hlines(0.09, 1, 3, linestyles='solid', lw=3.0, color='k')
#ax.vlines(3, 0, 0.085, linestyles='solid', lw=3.0, color='k')
#axins.hlines(cuff_pe_norm_cumfreqs[4], 0, 4, linestyles='solid', lw=1.0, color='k')
#axins.vlines(4, 0, cuff_pe_norm_cumfreqs[4], linestyles='solid', lw=1.0, color='k')
axins.set_axis_bgcolor('white')
ax.legend(loc='upper left', fontsize=16)
ax.set_xlabel("Number of spliced reads", size=18)
ax.set_ylabel("Number of splice junction", size=18)
# change the font size of x and y ticks
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
label.set_fontsize(18)
plt.savefig('cdf_splice.pdf', bbox_inches='tight')
In [88]:
# CDF of Cufflinks junctions at 3 or fewer reads
cuff_se_norm_cumfreqs[4], cuff_pe_norm_cumfreqs[3]
Out[88]:
In [89]:
# CDF of Gimme junctions at 3 or fewer reads
gimme_se_norm_cumfreqs[4], gimme_pe_norm_cumfreqs[3]
Out[89]:
In [90]:
combined_junctions_counts = {}
for j in gimme_pe_junctions_counts:
combined_junctions_counts[j] = gimme_pe_junctions_counts[j] + gimme_se_junctions_counts[j]
print 'Junctions with no spliced reads = ', len([k for k,v
in combined_junctions_counts.iteritems() if v == 0 and 'random' in k])
print 'Junctions with fewer than 4 spliced reads = ', len([k for k,v
in combined_junctions_counts.iteritems() if v < 4])
In [91]:
combined_cuff_junctions_counts = {}
for j in cuff_pe_junctions_counts:
combined_cuff_junctions_counts[j] = cuff_pe_junctions_counts[j] + cuff_se_junctions_counts[j]
print 'Junctions with no spliced reads = ', len([k for k,v
in combined_cuff_junctions_counts.iteritems() if v == 0 and 'random' in k])
print 'Junctions with fewer than 4 spliced reads = ', len([k for k,v
in combined_cuff_junctions_counts.iteritems() if v < 4])
In [92]:
len(combined_junctions_counts), len(gimme_pe_junctions_counts)
Out[92]:
In [93]:
len(combined_cuff_junctions_counts)
Out[93]:
In [94]:
print 'Junctions with no spliced reads = ', len([k for k,v
in gimme_pe_junctions_counts.iteritems() if v == 0])
print 'Junctions with no spliced reads = ', len([k for k,v
in gimme_se_junctions_counts.iteritems() if v == 0])
In [95]:
print 'Junctions with no spliced reads = ', len([k for k,v
in gimme_pe_junctions_counts.iteritems() if v == 0 and 'random' in k])
print 'Junctions with no spliced reads = ', len([k for k,v
in gimme_se_junctions_counts.iteritems() if v == 0 and 'random' in k])
In [96]:
print 'Junctions with no spliced reads = ', len([k for k,v
in gimme_pe_junctions_counts.iteritems() if v < 4])
print 'Junctions with no spliced reads = ', len([k for k,v
in gimme_se_junctions_counts.iteritems() if v < 4])
In [97]:
print 'Junctions with no spliced reads = ', len([k for k,v
in cuff_pe_junctions_counts.iteritems() if v == 0])
print 'Junctions with no spliced reads = ', len([k for k,v
in cuff_se_junctions_counts.iteritems() if v == 0])
In [98]:
print 'Junctions with no spliced reads = ', len([k for k,v
in cuff_pe_junctions_counts.iteritems() if v == 0 and 'random' in k])
print 'Junctions with no spliced reads = ', len([k for k,v
in cuff_se_junctions_counts.iteritems() if v == 0 and 'random' in k])
In [99]:
figure, axes = plt.subplots(nrows=2, sharex=True, figsize=(10,10))
bar_width = 0.35
axes[0].bar(range(4), [58.76, 55.61, 56.94, 57.37],
width=bar_width, label='Ensembl', color='#660000')
axes[0].bar([bar_width + i for i in range(4)], [81.96, 75.62, 78.93, 82.01],
width=bar_width, label='Gimme', color='#006600')
axes[0].set_ylabel('Percent mapped reads', size=16)
axes[0].legend(loc='lower right', fontsize=16)
axes[0].set_title('Single-end reads', size=16)
axes[0].set_yticklabels(range(0,100,10), fontsize=16)
axes[1].bar(range(4), [52.35, 51.65, 53.05, 52.40],
width=bar_width, label='Ensembl', color='#660000')
axes[1].bar([bar_width + i for i in range(4)], [74.40, 75.13, 74.76, 74.86],
width=bar_width, label='Gimme', color='#006600')
axes[1].set_ylabel('Percent mapped reads', size=16)
axes[1].set_title('Paired-end reads', size=16)
axes[1].set_yticklabels(range(0,100,10), fontsize=16)
axes[1].legend(loc='lower right', fontsize=16)
plt.xticks([0.25+i for i in range(4)],
['Line 6 (Ctrl)', 'Line 6 (Inf)', 'Line 7 (Ctrl)', 'Line 7 (Inf)'],
size=14)
plt.savefig('mapped-reads.pdf', bbox_inches='tight')
In [100]:
from Bio import SeqIO
from collections import defaultdict
In [101]:
genes_length = defaultdict(int)
seqs_length = {}
for rec in SeqIO.parse('all_assembly_models.bed.faa', 'fasta'):
seqid = rec.id.strip(';')
seqs_length[seqid] = len(rec)
geneid = seqid.split('.')[0]
if len(rec) > genes_length[geneid]:
genes_length[geneid] = len(rec)
cufflinks_genes_length = defaultdict(int)
cufflinks_seqs_length = {}
for rec in SeqIO.parse('cufflinks.bed.faa', 'fasta'):
seqid = rec.id.strip(';')
cufflinks_seqs_length[seqid] = len(rec)
geneid = seqid.split('.')[0]
if len(rec) > cufflinks_genes_length[geneid]:
cufflinks_genes_length[geneid] = len(rec)
asm_cuff_genes_length = defaultdict(int)
asm_cuff_seqs_length = {}
for rec in SeqIO.parse('all_assembly_cufflinks_models.bed.faa', 'fasta'):
seqid = rec.id.strip(';')
asm_cuff_seqs_length[seqid] = len(rec)
geneid = seqid.split('.')[0]
if len(rec) > asm_cuff_genes_length[geneid]:
asm_cuff_genes_length[geneid] = len(rec)
In [102]:
genes_match = defaultdict(float)
seqs_match = []
genes_length_match = {}
isoforms_length = []
for line in open('all_assembly_models.bed.faa.blastp.out'):
seqid, bits, homolog = line.split('\t')
bits = float(bits)
seqid = seqid.strip(';')
geneid = seqid.split('.')[0]
seqlen = seqs_length[seqid]
seqs_match.append(bits/seqlen)
ratio = bits/genes_length[geneid]
isoforms_length.append(seqlen)
if ratio > genes_match[geneid]:
genes_match[geneid] = ratio
genes_length_match[geneid] = genes_length[geneid]
asm_cuff_genes_match = defaultdict(float)
asm_cuff_seqs_match = []
asm_cuff_genes_length_match = {}
asm_cuff_isoforms_length = []
for line in open('all_assembly_cufflinks_models.bed.faa.blastp.out'):
seqid, bits, homolog = line.split('\t')
bits = float(bits)
seqid = seqid.strip(';')
geneid = seqid.split('.')[0]
seqlen = asm_cuff_seqs_length[seqid]
asm_cuff_seqs_match.append(bits/seqlen)
ratio = bits/asm_cuff_genes_length[geneid]
asm_cuff_isoforms_length.append(seqlen)
if ratio > asm_cuff_genes_match[geneid]:
asm_cuff_genes_match[geneid] = ratio
asm_cuff_genes_length_match[geneid] = asm_cuff_genes_length[geneid]
cufflinks_genes_match = defaultdict(float)
cufflinks_seqs_match = []
cufflinks_genes_length_match = {}
cufflinks_isoforms_length = []
for line in open('cufflinks.bed.faa.blastp.out'):
seqid, bits, homolog = line.split('\t')
bits = float(bits)
seqid = seqid.strip(';')
geneid = seqid.split('.')[0]
seqlen = cufflinks_seqs_length[seqid]
cufflinks_seqs_match.append(bits/seqlen)
cufflinks_isoforms_length.append(seqlen)
ratio = bits/cufflinks_genes_length[geneid]
if ratio > cufflinks_genes_match[geneid]:
cufflinks_genes_match[geneid] = ratio
cufflinks_genes_length_match[geneid] = cufflinks_genes_length[geneid]
In [106]:
fig = plt.figure(0, figsize=(12,8))
plt.subplot(211)
plt.boxplot([[v for v in cufflinks_genes_match.itervalues()],
[v for v in genes_match.itervalues()],
[v for v in asm_cuff_genes_match.itervalues()]
, cufflinks_seqs_match, seqs_match, asm_cuff_seqs_match])
plt.ylabel('bits/length ratio', size=15)
plt.xticks([1,2,3,4], [''] * 4)
plt.yticks(size=14)
plt.title('A', size=18)
#yscale('log')
plt.subplot(212)
plt.boxplot([[v for v in cufflinks_genes_length.itervalues()],
[v for v in genes_length.itervalues()],
[v for v in asm_cuff_genes_length.itervalues()],
cufflinks_isoforms_length, isoforms_length, asm_cuff_isoforms_length])
plt.xticks([1,2,3,4,5,6], ['Gene\nCufflinks', 'Gene\nGimme',
'Gene\nCufflinks+Gimme', 'Isoform\nCufflinks',
'Isoform\nGimme', 'Isoform\nGimme+Cufflinks'],
size=16)
plt.title('B', size=18)
plt.yticks(size=14)
plt.ylabel('size (bp)', size=15)
plt.yscale('log')
plt.savefig('mouse-homolog-boxplot.pdf',bbox_inches='tight')
In [107]:
len(cufflinks_seqs_match), len(seqs_match)
Out[107]:
In [108]:
len(cufflinks_genes_match), len(genes_match)
Out[108]:
In [109]:
mlength = defaultdict(int)
mrat = {}
for rec in SeqIO.parse('mouse.sample.faa', 'fasta'):
mlength[rec.id] = len(rec)
for line in open('mouse.sample.faa.blastp.out'):
seqid, bits, homolog = line.split('\t')
bits = float(bits)
seqlen = mlength[seqid]
mrat[seqid] = bits/seqlen
In [110]:
clength = defaultdict(int)
crat = {}
for rec in SeqIO.parse('chick.sample.faa', 'fasta'):
clength[rec.id] = len(rec)
for line in open('chick-vs-mouse.sample.faa.blastp.out'):
seqid, bits, homolog = line.split('\t')
bits = float(bits)
seqlen = clength[seqid]
crat[seqid] = bits/seqlen
In [113]:
!cat all_assembly_models.bed_all_sp.txt | python ../protocol/canonical-splice-site.py \
../../chick.fa > all_assembly_models.noncanonical
In [114]:
!cat all_assembly_cufflinks_ensembl_models.bed_all_sp.txt | python ../protocol/canonical-splice-site.py \
../../chick.fa > all_assembly_cufflinks_ensembl_models.noncanonical
In [115]:
!cat Gallus_gallus.WASHUC2.64.bed_all_sp.txt | python ../protocol/canonical-splice-site.py \
../../chick.fa > Gallus_gallus.WASHUC2.64.noncanonical
In [116]:
!cat cufflinks-ref.bed_all_sp.txt | python ../protocol/canonical-splice-site.py \
../../chick.fa > cufflinks-ref.canonical
In [117]:
!cat cufflinks.bed_all_sp.txt | python ../protocol/canonical-splice-site.py \
../../chick.fa > cufflinks.canonical
In [118]:
!cat est.psl.best_all_sp.txt | python ../protocol/canonical-splice-site.py \
../../chick.fa > est.psl.best.noncanonical
In [119]:
!cat mrna.psl.best_all_sp.txt | python ../protocol/canonical-splice-site.py \
../../chick.fa > mrna.psl.noncanonical
In [120]:
!cat mouse_cufflinks.bed_all_sp.txt | python ../protocol/canonical-splice-site.py \
mm9.fa > mouse_cufflinks.noncanonical
In [121]:
muschroms = !cat ../protocol/mouse.list.txt
op = open('Mus_musculus.NCBIM37.64.bed_all_sp.flt.txt', 'w') # remove some chromosomes not in Cufflinks
for line in open('Mus_musculus.NCBIM37.64.bed_all_sp.txt'):
chro = line.split(':')[0]
if chro in muschroms:
print >> op, line.strip('\n')
op.close()
!cat Mus_musculus.NCBIM37.64.bed_all_sp.flt.txt | python ../protocol/canonical-splice-site.py \
mm9.fa > mouse_ensembl_cufflinks.noncanonical
In [122]:
gimme_noncano = !cat all_assembly_cufflinks_ensembl_models.noncanonical
gimme_noncano = set(gimme_noncano)
In [123]:
ensbl_noncano = !cat Gallus_gallus.WASHUC2.64.noncanonical
ensbl_noncano = set(ensbl_noncano)
In [124]:
est_noncano = !cat est.psl.best.noncanonical
mrna_noncano = !cat mrna.psl.noncanonical
est_noncano = set(est_noncano)
est_noncano.update(set(mrna_noncano))
In [125]:
cuff_noncano = !cat cufflinks-ref.canonical
cuff_noncano = set(cuff_noncano)
In [126]:
# non-canonical splice site from Gimme models supported by Ensembl
print len(gimme_noncano.intersection(ensbl_noncano))
print len(gimme_noncano.intersection(ensbl_noncano))/float(len(gimme_noncano))*100
In [127]:
# non-canonical splice site from Gimme models supported by ESTs or mRNAs.
print len(gimme_noncano.intersection(est_noncano)), 'sites',
print '%.2f%%' % (len(gimme_noncano.intersection(est_noncano))/float(len(gimme_noncano))*100)
In [128]:
# non-canonical splice site from Cufflinks-ref models supported by ESTs or mRNAs.
print len(cuff_noncano.intersection(est_noncano)), 'sites',
print '%.2f%%' % (len(cuff_noncano.intersection(est_noncano))/float(len(cuff_noncano))*100)
In [129]:
# non-canonical splice site from Cufflinks-ref models supported by ESTs or mRNAs.
print len(cuff_noncano.intersection(ensbl_noncano)), 'sites',
print '%.2f%%' % (len(cuff_noncano.intersection(ensbl_noncano))/float(len(cuff_noncano))*100)
In [130]:
tmp = !cut -f 1 all_assembly_cufflinks_ensembl_models.bed_all_sp.txt
gimme_cano = set(tmp)
In [131]:
tmp = !cut -f 1 est.psl.best_all_sp.txt
est_cano = set(tmp)
tmp = !cut -f 1 mrna.psl.best_all_sp.txt
est_cano.update(set(tmp))
In [132]:
# canonical splice site from Gimme models supported by ESTs or mRNAs.
print len(gimme_cano.intersection(est_cano)), 'sites',
print '%.2f%%' % (len(gimme_cano.intersection(est_cano))/float(len(gimme_cano))*100)
In [134]:
from collections import defaultdict
chromcounts = defaultdict(int)
for line in open('all_assembly_models.noncanonical'):
chrom = line.split(':')[0]
chromcounts[chrom] += 1
chromlist = sorted(chromcounts.keys())
plt.bar(range(len(chromlist)), [chromcounts[c] for c in chromlist])
plt.ylabel('Non-Canonical Splice Site', size=14)
plt.xticks(range(len(chromlist)), chromlist, rotation=90, size=14)
plt.yticks(size=13)
plt.savefig('noncanonical.pdf')
In [135]:
chromcounts['chrUn_random']
Out[135]:
In [141]:
import numpy as np
from numpy import arange
In [138]:
err6u = np.loadtxt('line6u.profile', delimiter=',', unpack=True)
err6i = np.loadtxt('line6i.profile', delimiter=',', unpack=True)
err7u = np.loadtxt('line7u.profile', delimiter=',', unpack=True)
err7i = np.loadtxt('line7i.profile', delimiter=',', unpack=True)
In [142]:
plt.subplot(221)
plt.plot(arange(1,76), err6u[:,0], 'blue', label='A')
plt.plot(arange(1,76), err6u[:,1], 'red', label='C')
plt.plot(arange(1,76), err6u[:,2], 'green', label='G')
plt.plot(arange(1,76), err6u[:,3], 'orange', label='T')
plt.legend(loc='upper left')
plt.ylabel('Mismatch', size=14)
plt.title('Line 6 Control', size=14)
plt.xticks(size=13)
plt.yticks(size=13)
plt.subplot(222)
plt.plot(arange(1,76), err6i[:,0], 'blue', label='A')
plt.plot(arange(1,76), err6i[:,1], 'red', label='C')
plt.plot(arange(1,76), err6i[:,2], 'green', label='G')
plt.plot(arange(1,76), err6i[:,3], 'orange', label='T')
plt.legend(loc='upper left')
plt.title('Line 6 Infected', size=14)
plt.xticks(size=13)
plt.yticks(size=13)
plt.subplot(223)
plt.plot(arange(1,76), err7u[:,0], 'blue', label='A')
plt.plot(arange(1,76), err7u[:,1], 'red', label='C')
plt.plot(arange(1,76), err7u[:,2], 'green', label='G')
plt.plot(arange(1,76), err7u[:,3], 'orange', label='T')
plt.legend(loc='upper left')
plt.ylabel('Mismatch', size=14)
plt.xlabel('Nucleotide Position', size=14)
plt.title('Line 7 Control', size=14)
plt.xticks(size=13)
plt.yticks(size=13)
plt.subplot(224)
plt.plot(arange(1,76), err7i[:,0], 'blue', label='A')
plt.plot(arange(1,76), err7i[:,1], 'red', label='C')
plt.plot(arange(1,76), err7i[:,2], 'green', label='G')
plt.plot(arange(1,76), err7i[:,3], 'orange', label='T')
plt.legend(loc='upper left')
plt.xlabel('Nucleotide Position', size=14)
plt.title('Line 7 Infected', size=14)
plt.xticks(size=13)
plt.yticks(size=13)
plt.savefig('error_profile.pdf')
In [ ]: