In [40]:
# Initialise
%run ~/relmapping/annot/notebooks/__init__.ipynb


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
os.getcwd(): /mnt/beegfs/scratch_copy/ahringer/jj374/lab/relmapping

In [39]:
# Load new annotation & test known corner cases
#fp_regl = 'annot/S2_regulatory_annotation/S2_regulatory_annotation.tsv'
fp_regl = 'annot/Fig2D2_regulatory_annotation_Apr27/Fig2D2_regulatory_annotation_Apr27.tsv'
df_regl = pd.read_csv(fp_regl, sep='\t')
print('%d regions loaded' % (len(df_regl),))

def check_(igvstr, **kwargs):
    (chrom, start, end) = yp.parse_igvstr(igvstr)
    pos = int((start + end) / 2)
    df_ = df_regl.query('(chrom == "%s") & (start <= %d) & (%d < end)' % (chrom, pos, pos))
    assert len(df_) == 1
    for (k, v) in kwargs.items():
        print('\t'.join([igvstr, '%25s' % (k,), '%25s' % (v,), '%25s' % (df_[k].tolist()[0],)]))
        assert df_[k].tolist()[0] == v

#> rfc-4 (black element - promoter inside masked??)
#> zen-4 (black element - should be promoter?)sonRNA, I guess could mask promoter
#Rare but compelling cases of bidirectional promoters where one direction was protein-coding, and the other was an annotated non-coding RNA. Addressed by relaxing the non-coding RNA annotation. They're now treated more similarly to protein-coding annotations, so a site can be annotated as protein_coding on one strand, and snoRNA on the other strand. (Previously, any site that overlapped a non-coding RNA was excluded from any other annotation...)
#> attf-2 (black element maybe promoter?)
#Now annotated as a promoter for F09G2.2 (protein-coding gene, forward strabd), and F09G2.14 (snoRNA, reverse strand). Could also be a promoter for attf-2, but can't think of much one could do about this give current fragmented long cap data.
check_('chrIV:6117863-6118043',
       annot_fwd='coding_promoter', promoter_locus_id_fwd='zen-4',
       annot_rev='non-coding_RNA', promoter_locus_id_rev='M03D4.78')

## "Dual" coding/non-coding annotations: coding_promoter >> non-coding_RNA
check_('chrIII:6971981-6972101', 
       annot_fwd='coding_promoter', promoter_locus_id_fwd='rfc-4',
       annot_rev='coding_promoter', promoter_locus_id_rev='eef-1A.1') # alternative: mir-5549
check_('chrV:7196589-7196718',
       annot_fwd='coding_promoter', promoter_locus_id_fwd='F09G2.2',
       annot_rev='coding_promoter', promoter_locus_id_rev='attf-2') # alternative: F09G2.14

check_('chrII:13596635-13596738', annot_rev='coding_promoter', promoter_locus_id_rev='fbxc-23')

#> fbxc-15
#Fixed by tweaking the long cap jump test.
check_('chrII:979338-979455', annot_fwd='coding_promoter', promoter_locus_id_fwd='fbxc-15')

#> sri-5 (promoter annotated to rsr-1 only)
#> C25F9.6 (promoter annotate to C25F9.6 only) - looks like there is jump in txn?
#> F08F3.8 (promoter annotated to acl-6 only) - looks like there is longcap signal
#> rsd-2 (maybe one of the yellow elements promoter?) Looks like there is txn from furthest right yellow element
#Weak outron signal; now captured by requiring 1 long cap tag in each replicate, and at least 3 tags total (previously at least 2 tags in both replicates).
check_('chrI:11182502-11182624', annot_rev='coding_promoter', promoter_locus_id_rev='rsr-1')
check_('chrV:19405507-19405707', 
       annot_fwd='coding_promoter', promoter_locus_id_fwd='C25F9.10',
       annot_rev='coding_promoter', promoter_locus_id_rev='C25F9.6')
check_('chrV:5433170-5433353', annot_fwd='coding_promoter', promoter_locus_id_fwd='acl-6')

check_('chrIV:13541820-13541934', annot_rev='coding_promoter', promoter_locus_id_rev='rsd-2')

# Sites downstream within a long exon that should not be called promoters
check_('chrII:7150014-7150236', annot_fwd='no_transcription', annot_rev='transcription_initiation')
check_('chrI:183422-183568', annot_fwd='transcription_initiation', associated_locus_id='atm-1')

# "Long distance" ncRNA annotations (should not happen)
check_('chrII:6891890-6892040', annot_fwd='coding_promoter', annot_rev='coding_promoter')

# addressed via closest_exon_flank ~< 250 (ncRNA downstream of site; exon1 from upstream gets mis-assigned)
check_('chrI:111191-111378', annot_fwd='non-coding_RNA', promoter_locus_id_fwd='F53G12.12')

# Non-coding RNAs within outrons should not 'block' upstream promoter annotation
check_('chrI:13324560-13324714', annot_fwd='coding_promoter', promoter_locus_id_fwd='gcy-35') # Reverse strand shows weak-gapped-but-compelling long cap?
check_('chrI:13372998-13373215', annot_rev='coding_promoter', promoter_locus_id_rev='sptf-3')
check_('chrIII:6971981-6972101', annot_fwd='coding_promoter', promoter_locus_id_fwd='rfc-4')
check_('chrIII:6971981-6972101', annot_rev='coding_promoter', promoter_locus_id_rev='eef-1A.1')
check_('chrI:13372998-13373215', annot_rev='coding_promoter', promoter_locus_id_rev='sptf-3')

# Long cap jump -- weak regions
check_('chrI:14316482-14316632', annot_fwd='pseudogene_promoter', promoter_locus_id_fwd='F49B2.4') # Also, reverse strand shows weak-gapped-but-compelling long cap (however no short cap!)?

# Short cap 100..125bp from peak accessibility...
#check_('chrI:6341909-6342059', annot_rev='unknown_promoter')
check_('chrII:14468955-14469118', annot_fwd='unknown_promoter')
# Short cap >125bp from peak accessibility...
#check_('chrII:14468955-14469118', annot_rev='unknown_promoter')

# Promoters initiating transcription within the UTR, downstream of the annotated TSS
check_('chrI:6011491-6011614', annot_rev='coding_promoter', promoter_locus_id_rev='rnf-1')
check_('chrI:7643236-7643386', annot_rev='coding_promoter', promoter_locus_id_rev='ncbp-2')
check_('chrII:2320231-2320381', annot_fwd='coding_promoter', promoter_locus_id_fwd='ZK1240.3')
check_('chrII:6775038-6775173', annot_rev='coding_promoter', promoter_locus_id_rev='agr-1')
check_('chrII:2296175-2296310', annot_rev='coding_promoter', promoter_locus_id_rev='fbxc-25')

# Various weak-ish/corner cases where a clear unique promoter candidate exists
check_('chrI:5509301-5509451', annot_rev='coding_promoter', promoter_locus_id_rev='zipt-11') # upstream long cap overlaps upstream gene 3' end
check_('chrX:14706659-14706809', annot_rev='transcription_initiation') # should not be an unknown_promoter, as is proximal to first exon
check_('chrIII:1163615-1163765', annot_fwd='no_transcription', annot_rev='no_transcription') # Should not be a promoter of Y119D3B.13
check_('chrX:14524974-14525124', annot_rev='coding_promoter', promoter_locus_id_rev='nspc-17')
check_('chrV:5433187-5433337', annot_rev='coding_promoter', promoter_locus_id_rev='F08F3.8') # distal; site has no long cap (but has continuous txn)
check_('chrI:13376619-13376731', annot_fwd='coding_promoter', promoter_locus_id_fwd='C01A2.9') # should be detected in d13_fwd -- padj 0.32, missed

# No short cap
check_('chrI:11182502-11182624', annot_fwd='coding_promoter', promoter_locus_id_fwd='sri-5') # no short cap
check_('chrII:13596635-13596738', annot_fwd='coding_promoter', promoter_locus_id_fwd='csc-1') # "distal"; no scap

# txn init within coding region
#check_('chrII:2772185-2772335', annot_rev='coding_promoter', promoter_locus_id_rev='sdz-10')
#check_('chrII:1818315-1818462', annot_rev='coding_promoter', promoter_locus_id_rev='fbxb-10') # short cap within 1st exon
#check_('chrII:14563893-14564084', annot_rev='pseudogene_promoter', promoter_locus_id_rev='C01G12.12') # pseudogene promoters without short cap!

# afd-1 locus -- two sites with long cap gaps, should not be called promoters (but not necessarily unknown_promoter either)
check_('chrI:2167906-2168056', annot_fwd='transcription_initiation')
check_('chrI:2179963-2180113', annot_fwd='transcription_initiation')

# Y37E3.11 locus -- too far from the annotated TSS to be associated with that UTR
check_('chrI:2084937-2085087', annot_fwd='non-coding_RNA', promoter_locus_id_fwd='Y37E3.20')

# Weak promoter with several non-first alternative isoforms overlapping the first exon
check_('chrI:2084303-2084453', annot_fwd='coding_promoter', promoter_locus_id_fwd='Y37E3.11')

check_('chrI:5485932-5486082', annot_rev='coding_promoter', promoter_locus_id_rev='rpl-19')
check_('chrI:5485932-5486082', annot_fwd='unknown_promoter')

# Only apply the low-confidence promoter rule to intergenic sites
check_('chrI:2190535-2190685', annot_fwd='transcription_initiation') # afd-1  (in intron)
check_('chrI:4202291-4202441', annot_fwd='transcription_initiation') # C18E3.4


42245 regions loaded
chrIV:6117863-6118043	                annot_fwd	          coding_promoter	          coding_promoter
chrIV:6117863-6118043	    promoter_locus_id_fwd	                    zen-4	                    zen-4
chrIV:6117863-6118043	                annot_rev	           non-coding_RNA	           non-coding_RNA
chrIV:6117863-6118043	    promoter_locus_id_rev	                 M03D4.78	                 M03D4.78
chrIII:6971981-6972101	                annot_fwd	          coding_promoter	          coding_promoter
chrIII:6971981-6972101	    promoter_locus_id_fwd	                    rfc-4	                    rfc-4
chrIII:6971981-6972101	                annot_rev	          coding_promoter	          coding_promoter
chrIII:6971981-6972101	    promoter_locus_id_rev	                 eef-1A.1	                 eef-1A.1
chrV:7196589-7196718	                annot_fwd	          coding_promoter	          coding_promoter
chrV:7196589-7196718	    promoter_locus_id_fwd	                  F09G2.2	                  F09G2.2
chrV:7196589-7196718	                annot_rev	          coding_promoter	          coding_promoter
chrV:7196589-7196718	    promoter_locus_id_rev	                   attf-2	                   attf-2
chrII:13596635-13596738	                annot_rev	          coding_promoter	          coding_promoter
chrII:13596635-13596738	    promoter_locus_id_rev	                  fbxc-23	                  fbxc-23
chrII:979338-979455	                annot_fwd	          coding_promoter	          coding_promoter
chrII:979338-979455	    promoter_locus_id_fwd	                  fbxc-15	                  fbxc-15
chrI:11182502-11182624	                annot_rev	          coding_promoter	          coding_promoter
chrI:11182502-11182624	    promoter_locus_id_rev	                    rsr-1	                    rsr-1
chrV:19405507-19405707	                annot_fwd	          coding_promoter	          coding_promoter
chrV:19405507-19405707	    promoter_locus_id_fwd	                 C25F9.10	                 C25F9.10
chrV:19405507-19405707	                annot_rev	          coding_promoter	          coding_promoter
chrV:19405507-19405707	    promoter_locus_id_rev	                  C25F9.6	                  C25F9.6
chrV:5433170-5433353	                annot_fwd	          coding_promoter	          coding_promoter
chrV:5433170-5433353	    promoter_locus_id_fwd	                    acl-6	                    acl-6
chrIV:13541820-13541934	                annot_rev	          coding_promoter	          coding_promoter
chrIV:13541820-13541934	    promoter_locus_id_rev	                    rsd-2	                    rsd-2
chrII:7150014-7150236	                annot_fwd	         no_transcription	         no_transcription
chrII:7150014-7150236	                annot_rev	 transcription_initiation	 transcription_initiation
chrI:183422-183568	                annot_fwd	 transcription_initiation	 transcription_initiation
chrI:183422-183568	      associated_locus_id	                    atm-1	                    atm-1
chrII:6891890-6892040	                annot_fwd	          coding_promoter	          coding_promoter
chrII:6891890-6892040	                annot_rev	          coding_promoter	          coding_promoter
chrI:111191-111378	                annot_fwd	           non-coding_RNA	           non-coding_RNA
chrI:111191-111378	    promoter_locus_id_fwd	                F53G12.12	                F53G12.12
chrI:13324560-13324714	                annot_fwd	          coding_promoter	          coding_promoter
chrI:13324560-13324714	    promoter_locus_id_fwd	                   gcy-35	                   gcy-35
chrI:13372998-13373215	                annot_rev	          coding_promoter	          coding_promoter
chrI:13372998-13373215	    promoter_locus_id_rev	                   sptf-3	                   sptf-3
chrIII:6971981-6972101	                annot_fwd	          coding_promoter	          coding_promoter
chrIII:6971981-6972101	    promoter_locus_id_fwd	                    rfc-4	                    rfc-4
chrIII:6971981-6972101	                annot_rev	          coding_promoter	          coding_promoter
chrIII:6971981-6972101	    promoter_locus_id_rev	                 eef-1A.1	                 eef-1A.1
chrI:13372998-13373215	                annot_rev	          coding_promoter	          coding_promoter
chrI:13372998-13373215	    promoter_locus_id_rev	                   sptf-3	                   sptf-3
chrI:14316482-14316632	                annot_fwd	      pseudogene_promoter	      pseudogene_promoter
chrI:14316482-14316632	    promoter_locus_id_fwd	                  F49B2.4	                  F49B2.4
chrII:14468955-14469118	                annot_fwd	         unknown_promoter	         unknown_promoter
chrI:6011491-6011614	                annot_rev	          coding_promoter	          coding_promoter
chrI:6011491-6011614	    promoter_locus_id_rev	                    rnf-1	                    rnf-1
chrI:7643236-7643386	                annot_rev	          coding_promoter	          coding_promoter
chrI:7643236-7643386	    promoter_locus_id_rev	                   ncbp-2	                   ncbp-2
chrII:2320231-2320381	                annot_fwd	          coding_promoter	          coding_promoter
chrII:2320231-2320381	    promoter_locus_id_fwd	                 ZK1240.3	                 ZK1240.3
chrII:6775038-6775173	                annot_rev	          coding_promoter	          coding_promoter
chrII:6775038-6775173	    promoter_locus_id_rev	                    agr-1	                    agr-1
chrII:2296175-2296310	                annot_rev	          coding_promoter	          coding_promoter
chrII:2296175-2296310	    promoter_locus_id_rev	                  fbxc-25	                  fbxc-25
chrI:5509301-5509451	                annot_rev	          coding_promoter	          coding_promoter
chrI:5509301-5509451	    promoter_locus_id_rev	                  zipt-11	                  zipt-11
chrX:14706659-14706809	                annot_rev	 transcription_initiation	 transcription_initiation
chrIII:1163615-1163765	                annot_fwd	         no_transcription	         no_transcription
chrIII:1163615-1163765	                annot_rev	         no_transcription	         no_transcription
chrX:14524974-14525124	                annot_rev	          coding_promoter	          coding_promoter
chrX:14524974-14525124	    promoter_locus_id_rev	                  nspc-17	                  nspc-17
chrV:5433187-5433337	                annot_rev	          coding_promoter	          coding_promoter
chrV:5433187-5433337	    promoter_locus_id_rev	                  F08F3.8	                  F08F3.8
chrI:13376619-13376731	                annot_fwd	          coding_promoter	          coding_promoter
chrI:13376619-13376731	    promoter_locus_id_fwd	                  C01A2.9	                  C01A2.9
chrI:11182502-11182624	                annot_fwd	          coding_promoter	          coding_promoter
chrI:11182502-11182624	    promoter_locus_id_fwd	                    sri-5	                    sri-5
chrII:13596635-13596738	                annot_fwd	          coding_promoter	          coding_promoter
chrII:13596635-13596738	    promoter_locus_id_fwd	                    csc-1	                    csc-1
chrI:2167906-2168056	                annot_fwd	 transcription_initiation	 transcription_initiation
chrI:2179963-2180113	                annot_fwd	 transcription_initiation	 transcription_initiation
chrI:2084937-2085087	                annot_fwd	           non-coding_RNA	           non-coding_RNA
chrI:2084937-2085087	    promoter_locus_id_fwd	                 Y37E3.20	                 Y37E3.20
chrI:2084303-2084453	                annot_fwd	          coding_promoter	          coding_promoter
chrI:2084303-2084453	    promoter_locus_id_fwd	                 Y37E3.11	                 Y37E3.11
chrI:5485932-5486082	                annot_rev	          coding_promoter	          coding_promoter
chrI:5485932-5486082	    promoter_locus_id_rev	                   rpl-19	                   rpl-19
chrI:5485932-5486082	                annot_fwd	         unknown_promoter	         unknown_promoter
chrI:2190535-2190685	                annot_fwd	 transcription_initiation	 transcription_initiation
chrI:4202291-4202441	                annot_fwd	 transcription_initiation	 transcription_initiation

In [ ]:
# Differences to previous github version
#fp_head_bed = 'annot/S2_regulatory_annotation/S2_regulatory_annotation.bed_HEAD.bed'
#fp_head_tsv = 'annot/S2_regulatory_annotation/S2_regulatory_annotation.tsv_HEAD.tsv'
#!git show HEAD:{fp_regl} > {fp_head_tsv}
#!git show HEAD:annot/S2_regulatory_annotation/S2_regulatory_annotation.bed > {fp_head_bed}

# Diff against a specific earlier version
#fp_head_bed = 'annot/S2_regulatory_annotation/S2_regulatory_annotation.bed_1030.bed'
#fp_head_tsv = 'annot/S2_regulatory_annotation/S2_regulatory_annotation.tsv_1030.tsv'
#!git show 2040a6:annot/S2_regulatory_annotation/S2_regulatory_annotation.bed > {fp_head_bed}
#!git show 2040a6:annot/S2_regulatory_annotation/S2_regulatory_annotation.tsv > {fp_head_tsv}

fp_head_bed = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17.bed'
fp_head_tsv = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17.tsv'

df_head = pd.read_csv(fp_head_tsv, sep='\t')
m_diff_ = (df_head['annot_fwd'] != df_regl['annot_fwd']) | (df_head['annot_rev'] != df_regl['annot_rev'])# \
#| ((df_head['promoter_gene_id_fwd'] != df_regl['promoter_gene_id_fwd'])) \
#| ((df_head['promoter_gene_id_rev'] != df_regl['promoter_gene_id_rev'])) \
#| ((df_head['associated_gene_id'] != df_regl['associated_gene_id']))
print('%d regions annotated differently compared to last commit' % (sum(m_diff_)))

In [ ]:
# Randomly sample altered regions
df_diff = df_regl[['chrom', 'start', 'end']].copy()
df_diff['label_head'] = df_head['label']
df_diff['label_regl'] = df_regl['label']
df_diff['annot_rev_head'] = df_head['annot_rev']
df_diff['annot_rev_regl'] = df_regl['annot_rev']
df_diff['annot_fwd_head'] = df_head['annot_fwd']
df_diff['annot_fwd_regl'] = df_regl['annot_fwd']
yp.df_sample(df_diff.loc[m_diff_], 50)

In [ ]:
# Write a .bed-file of all the regions that changed
fp_diff = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_diff.bed'
df_diff[m_diff_][['chrom', 'start', 'end']].to_csv(fp_diff, index=False, header=False, sep='\t')
!wc -l {fp_diff}

Possible additional improvements...


In [ ]:
# exon2 overlap rule is too conservative & masks even when txn clearly does not originate from exon
# => test for jump/incr at exon boundary & discard from annotation only if it tests positive
#check_annot_summary('chrIV:13588263-13588441', 'Y45F10B.12') trans-spliced promoter in L4?

In [ ]:
# extra long outrons???
# snt-2 has a distal promoter spanning 40kb and multiple genes (chrIII:787,433-829,218)
# => cut-off at e.g. 10kb?
#check_annot_summary('chrIII:828052-828187', '??')
#chrIII:4369629-4369711

In [ ]:
#check_annot_summary('chrII:11371902-11372117', '(C33B4.2)') # short cap 100bp downstream of annotated coding region
#check_annot_summary('chrI:5485952-5486062', 'rpl-19') # promoter assigned to un-translated tiny exon; scap not align
#check_annot_summary('chrII:11347905-11348076', 'B0491.6 / B0491.1') # long UTR; short cap precedes TSS
#check_annot_sumary('chrII:1818315-1818462', 'fbxb-10') # true promoter; scap ~60bp chrII:1821387-1821512
#check_annot_summary('chrIII:5876290-5876433', 'aldo-2')

In [ ]:
#> lir-2 (green element may be promoter?) Looks like a promoter, I think
#check_annot_summary('chrII:7667887-7668072', 'lir-2') # Tricky region, as short cap does not align with 5'

In [ ]:
fp_reglA = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17.bed'
fp_reglB = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_20Apr18.bed'
df_reglA = read_gffbed(fp_reglA)
df_reglB = read_gffbed(fp_reglB)

In [ ]:
fp_ = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17_to_20Apr18_lost_promoters_fwd.bed'
df_reglA[(df_reglA['annot_fwd'] == 'coding_promoter') & (df_reglB['annot_fwd'] != 'coding_promoter')]\
    [['chrom', 'start', 'end']].to_csv(fp_, index=False, header=False, sep='\t')
!wc -l {fp_}

fp_ = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17_to_20Apr18_lost_promoters_rev.bed'
df_reglA[(df_reglA['annot_rev'] == 'coding_promoter') & (df_reglB['annot_rev'] != 'coding_promoter')]\
    [['chrom', 'start', 'end']].to_csv(fp_, index=False, header=False, sep='\t')
!wc -l {fp_}

In [ ]:
fp_reglB_lowconf_fwd = 'annot/S2_regulatory_annotation/metrics/regulatory_annotation_lowconf_fwd.bed'
df_reglB_lowconf_fwd = read_gffbed(fp_reglB_lowconf_fwd)
fp_reglB_lowconf_rev = 'annot/S2_regulatory_annotation/metrics/regulatory_annotation_lowconf_rev.bed'
df_reglB_lowconf_rev = read_gffbed(fp_reglB_lowconf_rev)

In [ ]:
df_a_ = df_reglA.query('(annot_fwd == "coding_promoter") | (annot_fwd == "pseudogene_promoter")')[['chrom', 'start', 'end']]
df_b_ = df_reglB_lowconf_fwd.query('annot_lowconf == "True"')[['chrom', 'start', 'end']]

gv_f = yp.GenomicVenn2(
    BedTool.from_dataframe(df_a_),
    BedTool.from_dataframe(df_b_),
    label_a='Dec fwd promoters',
    label_b='Apr fwd low-confidence promoters',
)
gv_f.plot()

In [ ]:
gv_f.df_b_only

In [ ]:
df_a_ = df_reglA.query('(annot_rev == "coding_promoter") | (annot_rev == "pseudogene_promoter")')[['chrom', 'start', 'end']]
df_b_ = df_reglB_lowconf_rev.query('annot_lowconf == "True"')[['chrom', 'start', 'end']]

gv_r = yp.GenomicVenn2(
    BedTool.from_dataframe(df_a_),
    BedTool.from_dataframe(df_b_),
    label_a='Dec rev promoters',
    label_b='Apr rev low-confidence promoters',
)
gv_r.plot()

In [ ]:
fp_reglA = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17.bed'
fp_reglB = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_20Apr18.bed'
fp_reglC = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_22Apr18.bed'
fp_reglD = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_23Apr18.bed'
fp_reglE = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_23A_Apr18.bed'
df_reglA = read_gffbed(fp_reglA)
df_reglB = read_gffbed(fp_reglB)
df_reglC = read_gffbed(fp_reglC)
df_reglD = read_gffbed(fp_reglD)
df_reglE = read_gffbed(fp_reglE)

In [ ]:
def overlap_(df_A, df_B):
    return sum((df_A['annot_fwd'] == "coding_promoter") & (df_B['annot_fwd'] == "coding_promoter")) + \
           sum((df_A['annot_rev'] == "coding_promoter") & (df_B['annot_rev'] == "coding_promoter"))

print('6Dec', overlap_(df_reglA, df_reglA), overlap_(df_reglA, df_reglB), overlap_(df_reglA, df_reglC), overlap_(df_reglA, df_reglD), overlap_(df_reglA, df_reglE), len(df_reglA.query('annot=="coding_promoter"')))
print('20Apr18', overlap_(df_reglB, df_reglA), overlap_(df_reglB, df_reglB), overlap_(df_reglB, df_reglC), overlap_(df_reglB, df_reglD), overlap_(df_reglB, df_reglE), len(df_reglB.query('annot=="coding_promoter"')))
print('22Apr18', overlap_(df_reglC, df_reglA), overlap_(df_reglC, df_reglB), overlap_(df_reglC, df_reglC), overlap_(df_reglC, df_reglD), overlap_(df_reglC, df_reglE), len(df_reglC.query('annot=="coding_promoter"')))
print('23Apr18', overlap_(df_reglD, df_reglA), overlap_(df_reglD, df_reglB), overlap_(df_reglD, df_reglC), overlap_(df_reglD, df_reglD), overlap_(df_reglD, df_reglE), len(df_reglD.query('annot=="coding_promoter"')))
print('23A_Apr18', overlap_(df_reglE, df_reglA), overlap_(df_reglE, df_reglB), overlap_(df_reglE, df_reglC), overlap_(df_reglE, df_reglD), overlap_(df_reglE, df_reglE), len(df_reglE.query('annot=="coding_promoter"')))

In [ ]:
df_reglD[(df_reglD['annot_fwd'] == 'coding_promoter') & \
         (df_reglE['annot_fwd'] != 'coding_promoter')][['chrom', 'start', 'end']].sample(10)

In [ ]:
df_reglD[(df_reglD['annot_rev'] == 'coding_promoter') & \
         (df_reglE['annot_rev'] != 'coding_promoter')][['chrom', 'start', 'end']].sample(10)

In [28]:
def diff_coding_promoter_lost(verA, verB):
    fp_reglA = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_%s.bed' % (verA,)
    fp_reglB = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_%s.bed' % (verB,)
    df_reglA = read_gffbed(fp_reglA)
    df_reglB = read_gffbed(fp_reglB)
    m_AtoB_fwd = (df_reglA['annot_fwd'] == 'coding_promoter') & (df_reglB['annot_fwd'] != 'coding_promoter')
    m_AtoB_rev = (df_reglA['annot_rev'] == 'coding_promoter') & (df_reglB['annot_rev'] != 'coding_promoter')
    #print(sum(m_AtoB_fwd), sum(m_AtoB_rev))

    fp_fwd = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_%s_to_%s_coding_promoter_lost_fwd.bed' % (verA, verB)
    fp_rev = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_%s_to_%s_coding_promoter_lost_rev.bed' % (verA, verB)
    df_reglA[m_AtoB_fwd][['chrom', 'start', 'end']].to_csv(fp_fwd, index=False, header=False, sep='\t')
    df_reglB[m_AtoB_rev][['chrom', 'start', 'end']].to_csv(fp_rev, index=False, header=False, sep='\t')
    !wc -l {fp_fwd}
    !wc -l {fp_rev}

def diff_coding_promoter_gained(verA, verB):
    fp_reglA = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_%s.bed' % (verA,)
    fp_reglB = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_%s.bed' % (verB,)
    df_reglA = read_gffbed(fp_reglA)
    df_reglB = read_gffbed(fp_reglB)
    m_AtoB_fwd = (df_reglA['annot_fwd'] != 'coding_promoter') & (df_reglB['annot_fwd'] == 'coding_promoter')
    m_AtoB_rev = (df_reglA['annot_rev'] != 'coding_promoter') & (df_reglB['annot_rev'] == 'coding_promoter')
    print(sum(m_AtoB_fwd), sum(m_AtoB_rev))

    fp_fwd = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_%s_to_%s_coding_promoter_gained_fwd.bed' % (verA, verB)
    fp_rev = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_%s_to_%s_coding_promoter_gained_rev.bed' % (verA, verB)
    df_reglA[m_AtoB_fwd][['chrom', 'start', 'end']].to_csv(fp_fwd, index=False, header=False, sep='\t')
    df_reglB[m_AtoB_rev][['chrom', 'start', 'end']].to_csv(fp_rev, index=False, header=False, sep='\t')
    !wc -l {fp_fwd}
    !wc -l {fp_rev}

#diff_coding_promoter_lost('23Apr18', '23A_Apr18')
#diff_coding_promoter_gained('23Apr18', '23A_Apr18')
#diff_coding_promoter_lost('Apr24a', 'Apr24b')
#diff_coding_promoter_gained('Apr24a', 'Apr24b')
#diff_coding_promoter_lost('Apr24b', 'Apr24c')
#diff_coding_promoter_gained('Apr24b', 'Apr24c')
#diff_coding_promoter_lost('Apr24c', 'TMP')
#diff_coding_promoter_gained('Apr24c', 'TMP')
diff_coding_promoter_lost('Apr24c', 'Apr25a')
diff_coding_promoter_gained('Apr24c', 'Apr25a')
diff_coding_promoter_lost('Apr25a', 'Apr25b')
diff_coding_promoter_gained('Apr25a', 'Apr25b')


/mnt/home1/ahringer/jj374/relmapping/scripts/yarp/yarp.py:400: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  df_name = df_name.convert_objects(convert_numeric=True)
187 annot/S2_regulatory_annotation/S2_regulatory_annotation_Apr24c_to_Apr25a_coding_promoter_lost_fwd.bed
179 annot/S2_regulatory_annotation/S2_regulatory_annotation_Apr24c_to_Apr25a_coding_promoter_lost_rev.bed
21 21
21 annot/S2_regulatory_annotation/S2_regulatory_annotation_Apr24c_to_Apr25a_coding_promoter_gained_fwd.bed
21 annot/S2_regulatory_annotation/S2_regulatory_annotation_Apr24c_to_Apr25a_coding_promoter_gained_rev.bed
6 annot/S2_regulatory_annotation/S2_regulatory_annotation_Apr25a_to_Apr25b_coding_promoter_lost_fwd.bed
9 annot/S2_regulatory_annotation/S2_regulatory_annotation_Apr25a_to_Apr25b_coding_promoter_lost_rev.bed
79 72
79 annot/S2_regulatory_annotation/S2_regulatory_annotation_Apr25a_to_Apr25b_coding_promoter_gained_fwd.bed
72 annot/S2_regulatory_annotation/S2_regulatory_annotation_Apr25a_to_Apr25b_coding_promoter_gained_rev.bed

In [ ]:
fp_ = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17_to_22Apr18_lost_promoters_fwd.bed'
df_reglA[(df_reglA['annot_fwd'] == 'coding_promoter') & (df_reglC['annot_fwd'] != 'coding_promoter')]\
    [['chrom', 'start', 'end']].to_csv(fp_, index=False, header=False, sep='\t')
!wc -l {fp_}

fp_ = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17_to_22Apr18_lost_promoters_rev.bed'
df_reglA[(df_reglA['annot_rev'] == 'coding_promoter') & (df_reglC['annot_rev'] != 'coding_promoter')]\
    [['chrom', 'start', 'end']].to_csv(fp_, index=False, header=False, sep='\t')
!wc -l {fp_}

fp_ = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17_to_22Apr18_gained_promoters_fwd.bed'
df_reglA[(df_reglA['annot_fwd'] != 'coding_promoter') & (df_reglC['annot_fwd'] == 'coding_promoter')]\
    [['chrom', 'start', 'end']].to_csv(fp_, index=False, header=False, sep='\t')
!wc -l {fp_}

fp_ = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17_to_22Apr18_gained_promoters_rev.bed'
df_reglA[(df_reglA['annot_rev'] != 'coding_promoter') & (df_reglC['annot_rev'] == 'coding_promoter')]\
    [['chrom', 'start', 'end']].to_csv(fp_, index=False, header=False, sep='\t')
!wc -l {fp_}

In [ ]:
fp_ = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17_to_23Apr18_lost_promoters_fwd.bed'
df_reglA[(df_reglA['annot_fwd'] == 'coding_promoter') & (df_reglC['annot_fwd'] != 'coding_promoter')]\
    [['chrom', 'start', 'end']].to_csv(fp_, index=False, header=False, sep='\t')
!wc -l {fp_}

fp_ = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17_to_23Apr18_lost_promoters_rev.bed'
df_reglA[(df_reglA['annot_rev'] == 'coding_promoter') & (df_reglC['annot_rev'] != 'coding_promoter')]\
    [['chrom', 'start', 'end']].to_csv(fp_, index=False, header=False, sep='\t')
!wc -l {fp_}

fp_ = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17_to_23Apr18_gained_promoters_fwd.bed'
df_reglA[(df_reglA['annot_fwd'] != 'coding_promoter') & (df_reglC['annot_fwd'] == 'coding_promoter')]\
    [['chrom', 'start', 'end']].to_csv(fp_, index=False, header=False, sep='\t')
!wc -l {fp_}

fp_ = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_6Dec17_to_23Apr18_gained_promoters_rev.bed'
df_reglA[(df_reglA['annot_rev'] != 'coding_promoter') & (df_reglC['annot_rev'] == 'coding_promoter')]\
    [['chrom', 'start', 'end']].to_csv(fp_, index=False, header=False, sep='\t')
!wc -l {fp_}

In [7]:
fp_ = 'annot/S2_regulatory_annotation/S2_regulatory_annotation_TMP.bed'
read_gffbed(fp_).query('annot_detailed_fwd == "unknown_promoter_jump_incr"').sample(10)


/mnt/home1/ahringer/jj374/relmapping/scripts/yarp/yarp.py:400: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  df_name = df_name.convert_objects(convert_numeric=True)
Out[7]:
chrom start end score strand thickStart thickEnd itemRgb Name annot annot_detailed_fwd annot_detailed_rev annot_fwd annot_rev
13518 chrIII 1267017 1267168 0 + 1267017 1267168 NaN unknown_promoter unknown_promoter_jump_incr no_transcription unknown_promoter no_transcription
21201 chrIV 5961399 5961550 0 + 5961399 5961550 NaN (unc-44) unknown_promoter unknown_promoter_jump_incr no_transcription unknown_promoter no_transcription
33858 chrX 1541202 1541353 0 + 1541202 1541353 NaN (K06A9.3) unknown_promoter unknown_promoter_jump_incr transcription_initiation unknown_promoter transcription_initiation
32234 chrV 17714437 17714588 0 - 17714437 17714588 NaN twk-33 coding_promoter unknown_promoter_jump_incr coding_promoter_jump unknown_promoter coding_promoter
11464 chrII 11485248 11485399 0 - 11485248 11485399 NaN twk-5 coding_promoter unknown_promoter_jump_incr coding_promoter_jump unknown_promoter coding_promoter
22434 chrIV 8937884 8938035 0 + 8937884 8938035 NaN unknown_promoter unknown_promoter_jump_incr no_transcription unknown_promoter no_transcription
25021 chrIV 15609918 15610069 0 . 15609918 15610069 NaN unknown_promoter unknown_promoter_jump_incr unknown_promoter_jump_scap unknown_promoter unknown_promoter
27873 chrV 6741304 6741455 0 + 6741304 6741455 NaN (T25F10.3) unknown_promoter unknown_promoter_jump_incr no_transcription unknown_promoter no_transcription
36482 chrX 6536751 6536902 0 - 6536751 6536902 NaN ilys-5 coding_promoter unknown_promoter_jump_incr coding_promoter_jump unknown_promoter coding_promoter
3368 chrI 7935017 7935168 0 + 7935017 7935168 NaN (T22C1.1,nmy-2) unknown_promoter unknown_promoter_jump_incr transcription_initiation unknown_promoter transcription_initiation

In [43]:
df_Apr27 = pd.read_csv('annot_Apr27/Fig2D2_regulatory_annotation_Apr27.tsv', sep='\t')
df_May11 = pd.read_csv('annot_May11/S2_regulatory_annotation_May11.tsv', sep='\t')

In [47]:
df_Apr27[df_Apr27['annot_rev'] != df_May11['annot_rev']]


Out[47]:
chrom start end annot annot_fwd annot_rev promoter_gene_id_fwd promoter_locus_id_fwd promoter_gene_biotype_fwd promoter_gene_id_rev ... lcap_glp1_d13_rev_baseMean lcap_glp1_d13_rev_log2FoldChange lcap_glp1_d13_rev_lfcSE lcap_glp1_d13_rev_stat lcap_glp1_d13_rev_pvalue lcap_glp1_d13_rev_padj lcap_glp1_d13_rev_passed_jump lcap_glp1_d13_rev_passed_incr lcap_glp1_d13_rev_passed lcap_glp1_d13_rev_summary
641 chrI 1693242 1693393 putative_enhancer transcription_initiation transcription_initiation NaN NaN NaN NaN ... 4.795765 0.104583 1.577261 0.066307 0.473567 0.858442 False False False False / 0.10 / 0.858
3719 chrI 8866829 8866980 putative_enhancer transcription_initiation transcription_initiation NaN NaN NaN NaN ... 2.391290 0.352516 2.065759 0.170647 0.432251 0.824217 False False False False / 0.35 / 0.824
4178 chrI 9751340 9751491 putative_enhancer transcription_initiation transcription_initiation NaN NaN NaN NaN ... 8.911809 -0.797888 1.182489 0.000000 0.750084 1.000000 False False False False / -0.80 / 1
4403 chrI 10103547 10103698 non-coding_RNA non-coding_RNA transcription_initiation WBGene00219788 B0379.13 snoRNA NaN ... 17.066740 0.262403 0.913074 0.287384 0.386909 0.784215 False False False False / 0.26 / 0.784
4661 chrI 10565172 10565323 coding_promoter coding_promoter transcription_initiation WBGene00003926 pas-5 protein_coding NaN ... 0.758194 -0.722521 2.662982 0.000000 0.606928 NaN False False False False / -0.72 / nan
4666 chrI 10569181 10569332 non-coding_RNA non-coding_RNA transcription_initiation WBGene00045138 F25H2.14 snoRNA NaN ... 0.508923 -2.342994 2.722519 0.000000 0.805270 NaN False False False False / -2.34 / nan
5241 chrI 12288151 12288302 putative_enhancer no_transcription transcription_initiation NaN NaN NaN NaN ... 0.914426 3.091751 2.681217 1.153115 0.124432 NaN False True True True / 3.09 / nan
5670 chrI 13324888 13325039 coding_promoter coding_promoter transcription_initiation WBGene00001555 gcy-35 protein_coding NaN ... 0.254461 -1.903904 2.714572 0.000000 0.758462 NaN False False False False / -1.90 / nan
8617 chrII 5912257 5912408 coding_promoter coding_promoter unknown_promoter WBGene00019127 cgt-3 protein_coding NaN ... 3.011869 1.949587 1.922013 1.014346 0.155209 0.451397 False False False False / 1.95 / 0.451
9522 chrII 7677364 7677515 putative_enhancer transcription_initiation transcription_initiation NaN NaN NaN NaN ... 0.688526 2.459077 2.722450 0.903259 0.183194 NaN False False False False / 2.46 / nan
9774 chrII 8130619 8130770 non-coding_RNA non-coding_RNA transcription_initiation WBGene00219852 C41C4.16 snoRNA NaN ... 0.935771 0.961622 2.531971 0.379792 0.352050 NaN False False False False / 0.96 / nan
9775 chrII 8130869 8131020 putative_enhancer transcription_initiation transcription_initiation NaN NaN NaN NaN ... 2.086922 2.233311 2.197527 1.016284 0.154747 NaN False False False False / 2.23 / nan
9778 chrII 8132922 8133073 putative_enhancer transcription_initiation transcription_initiation NaN NaN NaN NaN ... 0.528686 -2.361539 2.722619 0.000000 0.807133 NaN False False False False / -2.36 / nan
11307 chrII 11167182 11167333 unknown_promoter no_transcription unknown_promoter NaN NaN NaN NaN ... 4.317366 5.432245 2.329498 2.331938 0.009852 0.053128 True True True True / 5.43 / 0.0531
11347 chrII 11224024 11224175 non-coding_RNA non-coding_RNA transcription_initiation WBGene00014506 T06D8.11 snoRNA NaN ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan
11544 chrII 11620119 11620270 unknown_promoter unknown_promoter transcription_initiation NaN NaN NaN NaN ... 11.196070 -0.058758 1.080641 0.000000 0.521681 0.891795 False False False False / -0.06 / 0.892
11754 chrII 11959213 11959364 non-coding_RNA non-coding_RNA transcription_initiation WBGene00014539 W03C9.t1 tRNA NaN ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan
12473 chrII 13946799 13946950 non-coding_RNA non-coding_RNA transcription_initiation WBGene00220125 W07G1.23 snoRNA NaN ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan
13444 chrIII 1122345 1122496 non-coding_RNA non-coding_RNA transcription_initiation WBGene00023189 Y71D11A.7 snoRNA NaN ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan
14562 chrIII 3953227 3953378 non-coding_RNA non-coding_RNA transcription_initiation WBGene00219936 F37A8.7 snoRNA NaN ... 2.743038 -1.358002 1.961799 0.000000 0.755601 1.000000 False False False False / -1.36 / 1
16115 chrIII 7142140 7142291 non-coding_RNA non-coding_RNA transcription_initiation WBGene00004850 sls-2.18 snRNA NaN ... 0.681310 2.656068 2.714726 0.978393 0.163940 NaN False True True True / 2.66 / nan
17429 chrIII 9861544 9861695 coding_promoter no_transcription coding_promoter NaN NaN NaN WBGene00010778 ... 5.962369 3.914527 1.712874 2.285356 0.011146 0.058545 True False True True / 3.91 / 0.0585
17962 chrIII 11090823 11090974 non-coding_RNA non-coding_RNA transcription_initiation WBGene00004847 sls-2.15 snRNA NaN ... 0.910818 3.099206 2.679973 1.156432 0.123752 NaN False True True True / 3.10 / nan
18060 chrIII 11353461 11353612 non-coding_RNA non-coding_RNA transcription_initiation WBGene00014536 T28D6.t2 tRNA NaN ... 0.451801 2.208452 2.721528 0.811475 0.208547 NaN False False False False / 2.21 / nan
18103 chrIII 11454261 11454412 putative_enhancer transcription_initiation transcription_initiation NaN NaN NaN NaN ... 0.681310 2.656068 2.714726 0.978393 0.163940 NaN False True True True / 2.66 / nan
20573 chrIV 4347567 4347718 coding_promoter coding_promoter transcription_initiation WBGene00020974 W03B1.5 protein_coding NaN ... 0.480362 -0.094701 2.712973 0.000000 0.513923 NaN False False False False / -0.09 / nan
21392 chrIV 6460680 6460831 non-coding_RNA non-coding_RNA transcription_initiation WBGene00045205 smy-7 snRNA NaN ... 0.264343 -1.926593 2.714843 0.000000 0.761040 NaN False False False False / -1.93 / nan
21508 chrIV 6792917 6793068 coding_promoter coding_promoter transcription_initiation WBGene00015937 C17H12.12 protein_coding NaN ... 0.508923 -2.342994 2.722519 0.000000 0.805270 NaN False False False False / -2.34 / nan
21698 chrIV 7316658 7316809 non-coding_RNA non-coding_RNA transcription_initiation WBGene00194997 F42C5.12 snRNA NaN ... 48.760770 -4.270618 0.736731 0.000000 1.000000 1.000000 False False False False / -4.27 / 1
22151 chrIV 8252977 8253128 non-coding_RNA non-coding_RNA transcription_initiation WBGene00023083 H09I01.2 snoRNA NaN ... 0.716144 0.460523 2.675516 0.172125 0.431670 NaN False False False False / 0.46 / nan
22752 chrIV 9725626 9725777 putative_enhancer transcription_initiation transcription_initiation NaN NaN NaN NaN ... 0.229509 1.734175 2.715167 0.638699 0.261509 NaN False False False False / 1.73 / nan
24490 chrIV 13600744 13600895 non-coding_RNA non-coding_RNA transcription_initiation WBGene00045203 smy-8 snRNA NaN ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan
24491 chrIV 13601531 13601682 coding_promoter coding_promoter transcription_initiation WBGene00194917 Y45F10B.59 protein_coding NaN ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan
24567 chrIV 13870141 13870292 non-coding_RNA non-coding_RNA transcription_initiation WBGene00009206 B0513.t1 tRNA NaN ... 49.857390 -1.521411 0.640646 0.000000 0.991221 1.000000 False False False False / -1.52 / 1
24834 chrIV 14780760 14780911 coding_promoter coding_promoter transcription_initiation WBGene00013306 Y57G11C.8 protein_coding NaN ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan
24948 chrIV 15234365 15234516 non-coding_RNA non-coding_RNA transcription_initiation WBGene00220205 Y73F8A.1175 snoRNA NaN ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan
25812 chrV 858989 859140 non-coding_RNA non-coding_RNA transcription_initiation WBGene00235188 K09C6.12 snRNA NaN ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan
26774 chrV 4310554 4310705 non-coding_RNA non-coding_RNA transcription_initiation WBGene00023207 ZC132.t1 tRNA NaN ... 11.900880 0.618486 1.085621 0.569708 0.284438 0.669479 False False False False / 0.62 / 0.669
27179 chrV 5330143 5330294 coding_promoter no_transcription coding_promoter NaN NaN NaN WBGene00015684 ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan
28414 chrV 8348994 8349145 non-coding_RNA non-coding_RNA transcription_initiation WBGene00044957 C08D8.3 snoRNA NaN ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan
30402 chrV 12491180 12491331 non-coding_RNA non-coding_RNA transcription_initiation WBGene00014541 W04D2.7 snoRNA NaN ... 0.793028 -2.748084 2.715537 0.000000 0.844227 NaN False False False False / -2.75 / nan
31905 chrV 16225668 16225819 putative_enhancer no_transcription transcription_initiation NaN NaN NaN NaN ... 22.020950 -0.732089 0.927309 0.000000 0.785083 1.000000 False False False False / -0.73 / 1
31906 chrV 16225813 16225964 coding_promoter no_transcription coding_promoter NaN NaN NaN WBGene00008680 ... 11.508410 3.163502 1.194353 2.648716 0.004040 0.026140 True False True True / 3.16 / 0.0261
32124 chrV 17117991 17118142 non-coding_RNA non-coding_RNA transcription_initiation WBGene00077488 sls-1.12 snRNA NaN ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan
32125 chrV 17118854 17119005 putative_enhancer no_transcription transcription_initiation NaN NaN NaN NaN ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan
35929 chrX 5466173 5466324 putative_enhancer transcription_initiation transcription_initiation NaN NaN NaN NaN ... 3.397812 0.187769 1.733510 0.108317 0.456872 0.842641 False False False False / 0.19 / 0.843
40516 chrX 14476715 14476866 coding_promoter coding_promoter transcription_initiation WBGene00044147 M163.11 protein_coding NaN ... 0.716144 0.460523 2.675516 0.172125 0.431670 NaN False False False False / 0.46 / nan
40519 chrX 14478276 14478427 non-coding_RNA non-coding_RNA transcription_initiation WBGene00045158 M163.14 snoRNA NaN ... 0.000000 NaN NaN NaN NaN NaN False False False False / nan / nan

48 rows × 338 columns


In [ ]: