In [1]:
%run ~/relmapping/annot/notebooks/annot__init__.ipynb
annot_ = 'annot_ce11'
def mp(fp, annot_=annot_): return os.path.join(annot_, 'canonical_geneset', fp)
In [2]:
fp_ = 'annot_ce11/canonical_geneset/WS260_ce11.transcripts.gtf.gz'
df_ = yp.read_wbgtf(fp_, parse_attr=True, coords_adj=True)
df_.head()
Out[2]:
In [3]:
# .bed-file for masking nuisance RNAs in ce11 tracks
l_ = ['tRNA', 'snoRNA', 'miRNA', 'snRNA', 'rRNA']
c_ = ['chrom', 'start', 'end', 'gene_id', 'score', 'strand', 'transcript_biotype']
q_ = '(feature == "transcript") & (transcript_biotype in @l_)'
fp_ = mp('WS260_ce11.transcripts.non_coding.bed')
df_.query(q_)[c_].reset_index(drop=True).to_csv(fp_, **yp.TO_GTF_KWARGS)
!wc -l {fp_}