In [36]:
%matplotlib inline
import os
from matplotlib import pyplot as plt
import pandas as pd
import screed
import numpy as np
import seaborn
from matplotlib_venn import venn3, venn3_circles
In [2]:
v = venn3((1,1,1,1,1,1,1), set_labels=('Ref', 'PacBio', 'RNA'))
v.get_label_by_id('100').set_text('Only in ref')
v.get_label_by_id('010').set_text('Only in PacBio')
v.get_label_by_id('001').set_text('Only in RNA')
v.get_label_by_id('011').set_text('*')
v.get_label_by_id('101').set_text('A')
v.get_label_by_id('110').set_text('B')
v.get_label_by_id('111').set_text('C')
Blat ref/rna $= A + C$
Blat PacBio/rna $= C + *$
BLASR ref/PacBio $= B + C$
$C = (A + C) \cap (C + *)$
$* = $ Blat PacBio/rna $ - C$
$A = $ Blat ref/rna $-C$
In [166]:
! cd .. && make outputs/reference/galGal4.fa_screed
! cd .. && make outputs/pacbio/galGal4.unmapped_reads
! cd .. && make workdirs/blat/pacbio_minlen200.h5
! cd .. && make outputs/rna/pacbio/galGal4/summary.csv
In [167]:
def venn_diagram(only_rna, only_rna_seq, only_rna_ref, intersection, ref="galGal4", seq="PacBio"):
v = venn3((1,1,1,1,1,1,1), set_labels=(ref, seq, 'RNA'))
v.get_label_by_id('100').set_text('') # 'Only in ref', len(only_ref)
v.get_label_by_id('010').set_text('') # 'Only in seq', len(only_in_seq)
v.get_label_by_id('001').set_text('D\n%.1fK' % (float(only_rna) / 1000)) # 121.3K
v.get_label_by_id('011').set_text('C\n%.1fK' % (float(only_rna_seq) / 1000)) # 14.4K
v.get_label_by_id('101').set_text('A\n%.1fK' % (float(only_rna_ref) / 1000)) # 35.2K
v.get_label_by_id('110').set_text('')
v.get_label_by_id('111').set_text('B\n%.1fK' % (float(intersection) / 1000)) # 248.2K
In [168]:
summary = pd.Series.from_csv("../outputs/rna/pacbio/galGal4/summary.csv")
venn_diagram(*[summary[s] for s in ('Only in RNA', 'RNA/Seq', 'RNA/Ref', 'Intersection')],
ref="galGal4", seq='PacBio')