In [1]:
from collections import defaultdict
import warnings
import logging
import gffutils
import pybedtools
import pandas as pd
import copy
import re
from gffutils.pybedtools_integration import tsses

logging.basicConfig(level=logging.INFO)

In [6]:
gtf = '/home/cmb-panasas2/skchoudh/genomes/S_cerevisiae_BY4741/annotation/BY4741_Toronto_2012.gff'
gtf_db = '/home/cmb-panasas2/skchoudh/genomes/S_cerevisiae_BY4741/annotation/BY4741_Toronto_2012.gff.db'
prefix = '/home/cmb-panasas2/skchoudh/genomes/S_cerevisiae_BY4741/annotation/BY4741_Toronto_2012.gffutils'
chrsizes = '/home/cmb-panasas2/skchoudh/genomes/S_cerevisiae_BY4741/fasta/BY4741_Toronto_2012.chrom.sizes'

In [30]:
db = gffutils.create_db(gtf, dbfn=gtf_db, merge_strategy='merge',
                        force=True)

In [102]:
def create_gene_dict(db):
    '''
    Store each feature line db.all_features() as a dict of dicts
    '''
    gene_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
    for line_no, feature in enumerate(db.all_features()):
        feature_type = feature.featuretype
        if feature_type == 'exon':
            print feature
        if feature_type == 'contig':
            continue        
        elif feature_type == 'gene':
            gene_ids = feature.attributes['ID']
            if len(gene_ids)!=1:
                logging.warning('Found multiple gene_ids on line {} in gtf'.format(line_no))
                break
            else:
                gene_id = gene_ids[0]
                feature.attributes['gene_id'] = gene_id
                gene_dict[gene_id]['gene'] = feature                
        elif feature_type == 'mRNA':
            gene_id = feature.attributes['Parent']
            transcript_id = feature.attributes['ID']
            assert len(gene_id) == 1
            assert len(transcript_id) == 1
            feature.attributes['gene_id'] = gene_id
            gene_dict[gene_id[0]][transcript_id[0]][feature_type].append(feature)
        else:
            try:
                transcript_id = feature.attributes['Parent']
            except:
                pass
                #print feature
            for gene_id in gene_dict.keys():
                assert len(transcript_id) == 1
                ## Insert only at place where the gene has evidence of this mRNA:
                if transcript_id[0] in gene_dict[gene_id].keys():
                    feature.attributes['gene_id'] = gene_id
                    gene_dict[gene_id][transcript_id[0]][feature_type].append(feature)            
    return gene_dict

for x in db.featuretypes():
    print x


ARS
CDS
LTR_retrotransposon
centromere
centromere_DNA_Element_I
centromere_DNA_Element_II
centromere_DNA_Element_III
contig
exon
external_transcribed_spacer_region
five_prime_UTR_intron
gene
internal_transcribed_spacer_region
intron
long_terminal_repeat
mRNA
ncRNA
plus_1_translational_frameshift
pseudogene
rRNA
snRNA
snoRNA
tRNA
telomere
transposable_element_gene

In [103]:
db = gffutils.FeatureDB(gtf_db, keep_order=True)
gene_dict = create_gene_dict(db)


chr07	liftOver	exon	609584	609696	.	+	.	Name=snR48_noncoding_exon;Parent=snR48_BY4741
chr15	liftOver	exon	259489	259578	.	+	.	Name=snR50_noncoding_exon;Parent=snR50_BY4741
chr16	liftOver	exon	718700	718806	.	-	.	Name=snR51_noncoding_exon;Parent=snR51_BY4741
chr05	liftOver	exon	430032	430123	.	-	.	Name=snR52_noncoding_exon;Parent=snR52_BY4741
chr05	liftOver	exon	61699	61789	.	+	.	Name=snR53_noncoding_exon;Parent=snR53_BY4741
chr13	liftOver	exon	163532	163617	.	-	.	Name=snR54_noncoding_exon;Parent=snR54_BY4741
chr12	liftOver	exon	792298	792395	.	-	.	Name=snR55_noncoding_exon;Parent=snR55_BY4741
chr02	liftOver	exon	88190	88277	.	+	.	Name=snR56_noncoding_exon;Parent=snR56_BY4741
chr12	liftOver	exon	792538	792625	.	-	.	Name=snR57_noncoding_exon;Parent=snR57_BY4741
chr15	liftOver	exon	136088	136183	.	-	.	Name=snR58_noncoding_exon;Parent=snR58_BY4741
chr16	liftOver	exon	173827	173904	.	+	.	Name=snR59_noncoding_exon;Parent=snR59_BY4741
chr10	liftOver	exon	349130	349233	.	-	.	Name=snR60_noncoding_exon;Parent=snR60_BY4741
chr12	liftOver	exon	792087	792176	.	-	.	Name=snR61_noncoding_exon;Parent=snR61_BY4741
chr15	liftOver	exon	409765	409864	.	-	.	Name=snR62_noncoding_exon;Parent=snR62_BY4741
chr04	liftOver	exon	323217	323471	.	-	.	Name=snR63_noncoding_exon;Parent=snR63_BY4741
chr11	liftOver	exon	38811	38911	.	+	.	Name=snR64_noncoding_exon;Parent=snR64_BY4741
chr03	liftOver	exon	166604	166703	.	+	.	Name=snR65_noncoding_exon;Parent=snR65_BY4741
chr14	liftOver	exon	586088	586173	.	+	.	Name=snR66_noncoding_exon;Parent=snR66_BY4741
chr05	liftOver	exon	61352	61433	.	+	.	Name=snR67_noncoding_exon;Parent=snR67_BY4741
chr09	liftOver	exon	97112	97247	.	+	.	Name=snR68_noncoding_exon;Parent=snR68_BY4741
chr11	liftOver	exon	364779	364879	.	+	.	Name=snR69_noncoding_exon;Parent=snR69_BY4741
chr16	liftOver	exon	718887	719050	.	-	.	Name=snR70_noncoding_exon;Parent=snR70_BY4741
chr08	liftOver	exon	411226	411315	.	+	.	Name=snR71_noncoding_exon;Parent=snR71_BY4741
chr02	liftOver	exon	680689	681863	.	-	.	Name=LSR1_noncoding_exon;Parent=LSR1_BY4741
chr12	liftOver	exon	459675	459795	.	+	.	Name=RDN5-1_noncoding_exon;Parent=RDN5-1_BY4741
chr12	liftOver	exon	468812	468930	.	+	.	Name=RDN5-2_noncoding_exon;Parent=RDN5-2_BY4741
chr12	liftOver	exon	472464	472582	.	+	.	Name=RDN5-3_noncoding_exon;Parent=RDN5-3_BY4741
chr12	liftOver	exon	455932	457731	.	-	.	Name=RDN18-1_noncoding_exon;Parent=RDN18-1_BY4741
chr12	liftOver	exon	465069	466868	.	-	.	Name=RDN18-2_noncoding_exon;Parent=RDN18-2_BY4741
chr12	liftOver	exon	451785	455180	.	-	.	Name=RDN25-1_noncoding_exon;Parent=RDN25-1_BY4741
chr12	liftOver	exon	460922	464317	.	-	.	Name=RDN25-2_noncoding_exon;Parent=RDN25-2_BY4741
chr12	liftOver	exon	455932	457731	.	-	.	Name=RDN37-1_noncoding_exon;Parent=RDN37-1_BY4741
chr12	liftOver	exon	455413	455570	.	-	.	Name=RDN37-1_noncoding_exon;Parent=RDN37-1_BY4741
chr12	liftOver	exon	451785	455180	.	-	.	Name=RDN37-1_noncoding_exon;Parent=RDN37-1_BY4741
chr12	liftOver	exon	465069	466868	.	-	.	Name=RDN37-2_noncoding_exon;Parent=RDN37-2_BY4741
chr12	liftOver	exon	464550	464707	.	-	.	Name=RDN37-2_noncoding_exon;Parent=RDN37-2_BY4741
chr12	liftOver	exon	460922	464317	.	-	.	Name=RDN37-2_noncoding_exon;Parent=RDN37-2_BY4741
chr12	liftOver	exon	455413	455570	.	-	.	Name=RDN58-1_noncoding_exon;Parent=RDN58-1_BY4741
chr12	liftOver	exon	464550	464707	.	-	.	Name=RDN58-2_noncoding_exon;Parent=RDN58-2_BY4741
chr05	liftOver	exon	116570	116938	.	-	.	Name=RPR1_noncoding_exon;Parent=RPR1_BY4741
chr05	liftOver	exon	440890	441411	.	+	.	Name=SCR1_noncoding_exon;Parent=SCR1_BY4741
chr10	liftOver	exon	663749	663942	.	+	.	Name=snR3_noncoding_exon;Parent=snR3_BY4741
chr05	liftOver	exon	423601	423786	.	+	.	Name=snR4_noncoding_exon;Parent=snR4_BY4741
chr12	liftOver	exon	366234	366345	.	+	.	Name=snR6_noncoding_exon;Parent=snR6_BY4741
chr07	liftOver	exon	939459	939672	.	-	.	Name=snR7-L_noncoding_exon;Parent=snR7-L_BY4741
chr07	liftOver	exon	939494	939672	.	-	.	Name=snR7-S_noncoding_exon;Parent=snR7-S_BY4741
chr15	liftOver	exon	832142	832331	.	+	.	Name=snR8_noncoding_exon;Parent=snR8_BY4741
chr04	liftOver	exon	1402919	1403042	.	+	.	Name=snR13_noncoding_exon;Parent=snR13_BY4741
chr05	liftOver	exon	166330	166489	.	-	.	Name=snR14_noncoding_exon;Parent=snR14_BY4741
chr12	liftOver	exon	896781	896983	.	+	.	Name=snR34_noncoding_exon;Parent=snR34_BY4741
chr10	liftOver	exon	228094	228479	.	-	.	Name=snR37_noncoding_exon;Parent=snR37_BY4741
chr11	liftOver	exon	559019	559369	.	-	.	Name=snR42_noncoding_exon;Parent=snR42_BY4741
chr03	liftOver	exon	99831	100039	.	-	.	Name=snR43_noncoding_exon;Parent=snR43_BY4741
chr12	liftOver	exon	854311	854521	.	+	.	Name=snR44_noncoding_exon;Parent=snR44_BY4741
chr16	liftOver	exon	821732	821903	.	+	.	Name=snR45_noncoding_exon;Parent=snR45_BY4741
chr07	liftOver	exon	545370	545566	.	+	.	Name=snR46_noncoding_exon;Parent=snR46_BY4741
chr04	liftOver	exon	541602	541700	.	-	.	Name=snR47_noncoding_exon;Parent=snR47_BY4741
chr10	liftOver	exon	139566	139691	.	-	.	Name=snR128_noncoding_exon;Parent=snR128_BY4741
chr10	liftOver	exon	139761	139950	.	-	.	Name=snR190_noncoding_exon;Parent=snR190_BY4741
chr04	liftOver	exon	410379	410451	.	+	.	Name=tA(AGC)D_noncoding_exon;Parent=tA(AGC)D_BY4741
chr06	liftOver	exon	204924	204996	.	-	.	Name=tA(AGC)F_noncoding_exon;Parent=tA(AGC)F_BY4741
chr07	liftOver	exon	774349	774421	.	+	.	Name=tA(AGC)G_noncoding_exon;Parent=tA(AGC)G_BY4741
chr08	liftOver	exon	146240	146312	.	-	.	Name=tA(AGC)H_noncoding_exon;Parent=tA(AGC)H_BY4741
chr10	liftOver	exon	197313	197385	.	-	.	Name=tA(AGC)J_noncoding_exon;Parent=tA(AGC)J_BY4741
chr11	liftOver	exon	219898	219970	.	+	.	Name=tA(AGC)K1_noncoding_exon;Parent=tA(AGC)K1_BY4741
chr11	liftOver	exon	517991	518063	.	+	.	Name=tA(AGC)K2_noncoding_exon;Parent=tA(AGC)K2_BY4741
chr12	liftOver	exon	656933	657005	.	-	.	Name=tA(AGC)L_noncoding_exon;Parent=tA(AGC)L_BY4741
chr13	liftOver	exon	321144	321216	.	-	.	Name=tA(AGC)M1_noncoding_exon;Parent=tA(AGC)M1_BY4741
chr13	liftOver	exon	768366	768438	.	-	.	Name=tA(AGC)M2_noncoding_exon;Parent=tA(AGC)M2_BY4741
chr16	liftOver	exon	856902	856974	.	+	.	Name=tA(AGC)P_noncoding_exon;Parent=tA(AGC)P_BY4741
chr01	liftOver	exon	166269	166341	.	+	.	Name=tA(UGC)A_noncoding_exon;Parent=tA(UGC)A_BY4741
chr05	liftOver	exon	310926	310998	.	-	.	Name=tA(UGC)E_noncoding_exon;Parent=tA(UGC)E_BY4741
chr07	liftOver	exon	794417	794489	.	+	.	Name=tA(UGC)G_noncoding_exon;Parent=tA(UGC)G_BY4741
chr12	liftOver	exon	214882	214954	.	-	.	Name=tA(UGC)L_noncoding_exon;Parent=tA(UGC)L_BY4741
chr15	liftOver	exon	853997	854069	.	-	.	Name=tA(UGC)O_noncoding_exon;Parent=tA(UGC)O_BY4741
chr02	liftOver	exon	643008	643079	.	-	.	Name=tC(GCA)B_noncoding_exon;Parent=tC(GCA)B_BY4741
chr07	liftOver	exon	707108	707179	.	-	.	Name=tC(GCA)G_noncoding_exon;Parent=tC(GCA)G_BY4741
chr16	liftOver	exon	435893	435964	.	-	.	Name=tC(GCA)P1_noncoding_exon;Parent=tC(GCA)P1_BY4741
chr16	liftOver	exon	775765	775836	.	-	.	Name=tC(GCA)P2_noncoding_exon;Parent=tC(GCA)P2_BY4741
chr02	liftOver	exon	405961	406032	.	+	.	Name=tD(GUC)B_noncoding_exon;Parent=tD(GUC)B_BY4741
chr04	liftOver	exon	568964	569035	.	+	.	Name=tD(GUC)D_noncoding_exon;Parent=tD(GUC)D_BY4741
chr07	liftOver	exon	531610	531681	.	+	.	Name=tD(GUC)G1_noncoding_exon;Parent=tD(GUC)G1_BY4741
chr07	liftOver	exon	544577	544648	.	-	.	Name=tD(GUC)G2_noncoding_exon;Parent=tD(GUC)G2_BY4741
chr09	liftOver	exon	324306	324377	.	-	.	Name=tD(GUC)I1_noncoding_exon;Parent=tD(GUC)I1_BY4741
chr09	liftOver	exon	336352	336423	.	-	.	Name=tD(GUC)I2_noncoding_exon;Parent=tD(GUC)I2_BY4741
chr10	liftOver	exon	204735	204806	.	+	.	Name=tD(GUC)J1_noncoding_exon;Parent=tD(GUC)J1_BY4741
chr10	liftOver	exon	355456	355527	.	+	.	Name=tD(GUC)J2_noncoding_exon;Parent=tD(GUC)J2_BY4741
chr10	liftOver	exon	374424	374495	.	-	.	Name=tD(GUC)J3_noncoding_exon;Parent=tD(GUC)J3_BY4741
chr10	liftOver	exon	541508	541579	.	+	.	Name=tD(GUC)J4_noncoding_exon;Parent=tD(GUC)J4_BY4741
chr11	liftOver	exon	513335	513406	.	-	.	Name=tD(GUC)K_noncoding_exon;Parent=tD(GUC)K_BY4741
chr12	liftOver	exon	427131	427202	.	+	.	Name=tD(GUC)L1_noncoding_exon;Parent=tD(GUC)L1_BY4741
chr12	liftOver	exon	791519	791590	.	+	.	Name=tD(GUC)L2_noncoding_exon;Parent=tD(GUC)L2_BY4741
chr13	liftOver	exon	463551	463622	.	+	.	Name=tD(GUC)M_noncoding_exon;Parent=tD(GUC)M_BY4741
chr14	liftOver	exon	519097	519167	.	-	.	Name=tD(GUC)N_noncoding_exon;Parent=tD(GUC)N_BY4741
chr15	liftOver	exon	571958	572029	.	+	.	Name=tD(GUC)O_noncoding_exon;Parent=tD(GUC)O_BY4741
chr04	liftOver	exon	1017207	1017278	.	-	.	Name=tE(CUC)D_noncoding_exon;Parent=tE(CUC)D_BY4741
chr09	liftOver	exon	197596	197667	.	+	.	Name=tE(CUC)I_noncoding_exon;Parent=tE(CUC)I_BY4741
chr02	liftOver	exon	645168	645239	.	+	.	Name=tE(UUC)B_noncoding_exon;Parent=tE(UUC)B_BY4741
chr03	liftOver	exon	82462	82533	.	-	.	Name=tE(UUC)C_noncoding_exon;Parent=tE(UUC)C_BY4741
chr05	liftOver	exon	176002	176073	.	+	.	Name=tE(UUC)E1_noncoding_exon;Parent=tE(UUC)E1_BY4741
chr05	liftOver	exon	353837	353908	.	+	.	Name=tE(UUC)E2_noncoding_exon;Parent=tE(UUC)E2_BY4741
chr05	liftOver	exon	486234	486305	.	-	.	Name=tE(UUC)E3_noncoding_exon;Parent=tE(UUC)E3_BY4741
chr07	liftOver	exon	328583	328654	.	+	.	Name=tE(UUC)G1_noncoding_exon;Parent=tE(UUC)G1_BY4741
chr07	liftOver	exon	401527	401598	.	-	.	Name=tE(UUC)G2_noncoding_exon;Parent=tE(UUC)G2_BY4741
chr07	liftOver	exon	541850	541921	.	+	.	Name=tE(UUC)G3_noncoding_exon;Parent=tE(UUC)G3_BY4741
chr09	liftOver	exon	370420	370491	.	+	.	Name=tE(UUC)I_noncoding_exon;Parent=tE(UUC)I_BY4741
chr10	liftOver	exon	115939	116010	.	-	.	Name=tE(UUC)J_noncoding_exon;Parent=tE(UUC)J_BY4741
chr11	liftOver	exon	141021	141092	.	-	.	Name=tE(UUC)K_noncoding_exon;Parent=tE(UUC)K_BY4741
chr12	liftOver	exon	794779	794850	.	+	.	Name=tE(UUC)L_noncoding_exon;Parent=tE(UUC)L_BY4741
chr13	liftOver	exon	290798	290869	.	+	.	Name=tE(UUC)M_noncoding_exon;Parent=tE(UUC)M_BY4741
chr16	liftOver	exon	210192	210263	.	-	.	Name=tE(UUC)P_noncoding_exon;Parent=tE(UUC)P_BY4741
chr02	liftOver	exon	36398	36434	.	+	.	Name=tF(GAA)B_noncoding_exon;Parent=tF(GAA)B_BY4741
chr02	liftOver	exon	36453	36488	.	+	.	Name=tF(GAA)B_noncoding_exon;Parent=tF(GAA)B_BY4741
chr04	liftOver	exon	1095425	1095461	.	-	.	Name=tF(GAA)D_noncoding_exon;Parent=tF(GAA)D_BY4741
chr04	liftOver	exon	1095370	1095405	.	-	.	Name=tF(GAA)D_noncoding_exon;Parent=tF(GAA)D_BY4741
chr06	liftOver	exon	157971	158007	.	-	.	Name=tF(GAA)F_noncoding_exon;Parent=tF(GAA)F_BY4741
chr06	liftOver	exon	157916	157951	.	-	.	Name=tF(GAA)F_noncoding_exon;Parent=tF(GAA)F_BY4741
chr07	liftOver	exon	440771	440807	.	-	.	Name=tF(GAA)G_noncoding_exon;Parent=tF(GAA)G_BY4741
chr07	liftOver	exon	440716	440751	.	-	.	Name=tF(GAA)G_noncoding_exon;Parent=tF(GAA)G_BY4741
chr08	liftOver	exon	237901	237937	.	-	.	Name=tF(GAA)H1_noncoding_exon;Parent=tF(GAA)H1_BY4741
chr08	liftOver	exon	237846	237881	.	-	.	Name=tF(GAA)H1_noncoding_exon;Parent=tF(GAA)H1_BY4741
chr08	liftOver	exon	358531	358567	.	-	.	Name=tF(GAA)H2_noncoding_exon;Parent=tF(GAA)H2_BY4741
chr08	liftOver	exon	358476	358511	.	-	.	Name=tF(GAA)H2_noncoding_exon;Parent=tF(GAA)H2_BY4741
chr13	liftOver	exon	352277	352313	.	+	.	Name=tF(GAA)M_noncoding_exon;Parent=tF(GAA)M_BY4741
chr13	liftOver	exon	352332	352367	.	+	.	Name=tF(GAA)M_noncoding_exon;Parent=tF(GAA)M_BY4741
chr14	liftOver	exon	374866	374902	.	+	.	Name=tF(GAA)N_noncoding_exon;Parent=tF(GAA)N_BY4741
chr14	liftOver	exon	374921	374956	.	+	.	Name=tF(GAA)N_noncoding_exon;Parent=tF(GAA)N_BY4741
chr16	liftOver	exon	560253	560289	.	-	.	Name=tF(GAA)P1_noncoding_exon;Parent=tF(GAA)P1_BY4741
chr16	liftOver	exon	560198	560233	.	-	.	Name=tF(GAA)P1_noncoding_exon;Parent=tF(GAA)P1_BY4741
chr16	liftOver	exon	622595	622631	.	-	.	Name=tF(GAA)P2_noncoding_exon;Parent=tF(GAA)P2_BY4741
chr16	liftOver	exon	622540	622575	.	-	.	Name=tF(GAA)P2_noncoding_exon;Parent=tF(GAA)P2_BY4741
chr04	liftOver	exon	1257008	1257079	.	-	.	Name=tG(CCC)D_noncoding_exon;Parent=tG(CCC)D_BY4741
chr15	liftOver	exon	594354	594425	.	-	.	Name=tG(CCC)O_noncoding_exon;Parent=tG(CCC)O_BY4741
chr02	liftOver	exon	197629	197699	.	-	.	Name=tG(GCC)B_noncoding_exon;Parent=tG(GCC)B_BY4741
chr03	liftOver	exon	135028	135098	.	-	.	Name=tG(GCC)C_noncoding_exon;Parent=tG(GCC)C_BY4741
chr04	liftOver	exon	83548	83618	.	+	.	Name=tG(GCC)D1_noncoding_exon;Parent=tG(GCC)D1_BY4741
chr04	liftOver	exon	992832	992902	.	+	.	Name=tG(GCC)D2_noncoding_exon;Parent=tG(GCC)D2_BY4741
chr05	liftOver	exon	61890	61960	.	-	.	Name=tG(GCC)E_noncoding_exon;Parent=tG(GCC)E_BY4741
chr06	liftOver	exon	162228	162298	.	+	.	Name=tG(GCC)F1_noncoding_exon;Parent=tG(GCC)F1_BY4741
chr06	liftOver	exon	180974	181044	.	-	.	Name=tG(GCC)F2_noncoding_exon;Parent=tG(GCC)F2_BY4741
chr07	liftOver	exon	845649	845719	.	+	.	Name=tG(GCC)G1_noncoding_exon;Parent=tG(GCC)G1_BY4741
chr07	liftOver	exon	930953	931023	.	-	.	Name=tG(GCC)G2_noncoding_exon;Parent=tG(GCC)G2_BY4741
chr10	liftOver	exon	396726	396796	.	-	.	Name=tG(GCC)J1_noncoding_exon;Parent=tG(GCC)J1_BY4741
chr10	liftOver	exon	531828	531898	.	+	.	Name=tG(GCC)J2_noncoding_exon;Parent=tG(GCC)J2_BY4741
chr13	liftOver	exon	183895	183965	.	-	.	Name=tG(GCC)M_noncoding_exon;Parent=tG(GCC)M_BY4741
chr15	liftOver	exon	226611	226681	.	-	.	Name=tG(GCC)O1_noncoding_exon;Parent=tG(GCC)O1_BY4741
chr15	liftOver	exon	282164	282234	.	+	.	Name=tG(GCC)O2_noncoding_exon;Parent=tG(GCC)O2_BY4741
chr16	liftOver	exon	572269	572339	.	+	.	Name=tG(GCC)P1_noncoding_exon;Parent=tG(GCC)P1_BY4741
chr16	liftOver	exon	860379	860449	.	+	.	Name=tG(GCC)P2_noncoding_exon;Parent=tG(GCC)P2_BY4741
chr07	liftOver	exon	779616	779687	.	+	.	Name=tG(UCC)G_noncoding_exon;Parent=tG(UCC)G_BY4741
chr14	liftOver	exon	96241	96312	.	-	.	Name=tG(UCC)N_noncoding_exon;Parent=tG(UCC)N_BY4741
chr15	liftOver	exon	110962	111033	.	-	.	Name=tG(UCC)O_noncoding_exon;Parent=tG(UCC)O_BY4741
chr05	liftOver	exon	206260	206331	.	+	.	Name=tH(GUG)E1_noncoding_exon;Parent=tH(GUG)E1_BY4741
chr05	liftOver	exon	433444	433515	.	-	.	Name=tH(GUG)E2_noncoding_exon;Parent=tH(GUG)E2_BY4741
chr07	liftOver	exon	110625	110696	.	-	.	Name=tH(GUG)G1_noncoding_exon;Parent=tH(GUG)G1_BY4741
chr07	liftOver	exon	319781	319852	.	+	.	Name=tH(GUG)G2_noncoding_exon;Parent=tH(GUG)G2_BY4741
chr08	liftOver	exon	62755	62826	.	+	.	Name=tH(GUG)H_noncoding_exon;Parent=tH(GUG)H_BY4741
chr11	liftOver	exon	313404	313475	.	-	.	Name=tH(GUG)K_noncoding_exon;Parent=tH(GUG)K_BY4741
chr13	liftOver	exon	363061	363132	.	+	.	Name=tH(GUG)M_noncoding_exon;Parent=tH(GUG)M_BY4741
chr02	liftOver	exon	197494	197567	.	+	.	Name=tI(AAU)B_noncoding_exon;Parent=tI(AAU)B_BY4741
chr04	liftOver	exon	668007	668080	.	-	.	Name=tI(AAU)D_noncoding_exon;Parent=tI(AAU)D_BY4741
chr05	liftOver	exon	442105	442178	.	-	.	Name=tI(AAU)E1_noncoding_exon;Parent=tI(AAU)E1_BY4741
chr05	liftOver	exon	550188	550261	.	-	.	Name=tI(AAU)E2_noncoding_exon;Parent=tI(AAU)E2_BY4741
chr07	liftOver	exon	739122	739195	.	+	.	Name=tI(AAU)G_noncoding_exon;Parent=tI(AAU)G_BY4741
chr09	liftOver	exon	183444	183517	.	-	.	Name=tI(AAU)I1_noncoding_exon;Parent=tI(AAU)I1_BY4741
chr09	liftOver	exon	210669	210742	.	+	.	Name=tI(AAU)I2_noncoding_exon;Parent=tI(AAU)I2_BY4741
chr12	liftOver	exon	732403	732476	.	+	.	Name=tI(AAU)L1_noncoding_exon;Parent=tI(AAU)L1_BY4741
chr12	liftOver	exon	1049672	1049745	.	-	.	Name=tI(AAU)L2_noncoding_exon;Parent=tI(AAU)L2_BY4741
chr14	liftOver	exon	569865	569938	.	-	.	Name=tI(AAU)N1_noncoding_exon;Parent=tI(AAU)N1_BY4741
chr14	liftOver	exon	602310	602383	.	+	.	Name=tI(AAU)N2_noncoding_exon;Parent=tI(AAU)N2_BY4741
chr16	liftOver	exon	819529	819602	.	+	.	Name=tI(AAU)P1_noncoding_exon;Parent=tI(AAU)P1_BY4741
chr16	liftOver	exon	880296	880369	.	-	.	Name=tI(AAU)P2_noncoding_exon;Parent=tI(AAU)P2_BY4741
chr04	liftOver	exon	884361	884397	.	+	.	Name=tI(UAU)D_noncoding_exon;Parent=tI(UAU)D_BY4741
chr04	liftOver	exon	884458	884493	.	+	.	Name=tI(UAU)D_noncoding_exon;Parent=tI(UAU)D_BY4741
chr12	liftOver	exon	605395	605431	.	-	.	Name=tI(UAU)L_noncoding_exon;Parent=tI(UAU)L_BY4741
chr12	liftOver	exon	605299	605334	.	-	.	Name=tI(UAU)L_noncoding_exon;Parent=tI(UAU)L_BY4741
chr04	liftOver	exon	1201750	1201822	.	+	.	Name=tK(CUU)D1_noncoding_exon;Parent=tK(CUU)D1_BY4741
chr04	liftOver	exon	1352466	1352538	.	-	.	Name=tK(CUU)D2_noncoding_exon;Parent=tK(CUU)D2_BY4741
chr05	liftOver	exon	134328	134400	.	-	.	Name=tK(CUU)E1_noncoding_exon;Parent=tK(CUU)E1_BY4741
chr05	liftOver	exon	434655	434727	.	-	.	Name=tK(CUU)E2_noncoding_exon;Parent=tK(CUU)E2_BY4741
chr06	liftOver	exon	226688	226760	.	-	.	Name=tK(CUU)F_noncoding_exon;Parent=tK(CUU)F_BY4741
chr07	liftOver	exon	122269	122341	.	+	.	Name=tK(CUU)G1_noncoding_exon;Parent=tK(CUU)G1_BY4741
chr07	liftOver	exon	185714	185786	.	-	.	Name=tK(CUU)G2_noncoding_exon;Parent=tK(CUU)G2_BY4741
chr07	liftOver	exon	876394	876466	.	+	.	Name=tK(CUU)G3_noncoding_exon;Parent=tK(CUU)G3_BY4741
chr09	liftOver	exon	300231	300303	.	-	.	Name=tK(CUU)I_noncoding_exon;Parent=tK(CUU)I_BY4741
chr10	liftOver	exon	414966	415038	.	+	.	Name=tK(CUU)J_noncoding_exon;Parent=tK(CUU)J_BY4741
chr11	liftOver	exon	203002	203074	.	+	.	Name=tK(CUU)K_noncoding_exon;Parent=tK(CUU)K_BY4741
chr13	liftOver	exon	480618	480690	.	-	.	Name=tK(CUU)M_noncoding_exon;Parent=tK(CUU)M_BY4741
chr16	liftOver	exon	582062	582134	.	+	.	Name=tK(CUU)P_noncoding_exon;Parent=tK(CUU)P_BY4741
chr04	liftOver	exon	359577	359613	.	+	.	Name=tK(UUU)D_noncoding_exon;Parent=tK(UUU)D_BY4741
chr04	liftOver	exon	359637	359672	.	+	.	Name=tK(UUU)D_noncoding_exon;Parent=tK(UUU)D_BY4741
chr07	liftOver	exon	115488	115524	.	+	.	Name=tK(UUU)G1_noncoding_exon;Parent=tK(UUU)G1_BY4741
chr07	liftOver	exon	115548	115583	.	+	.	Name=tK(UUU)G1_noncoding_exon;Parent=tK(UUU)G1_BY4741
chr07	liftOver	exon	701012	701048	.	-	.	Name=tK(UUU)G2_noncoding_exon;Parent=tK(UUU)G2_BY4741
chr07	liftOver	exon	700953	700988	.	-	.	Name=tK(UUU)G2_noncoding_exon;Parent=tK(UUU)G2_BY4741
chr11	liftOver	exon	578968	579004	.	+	.	Name=tK(UUU)K_noncoding_exon;Parent=tK(UUU)K_BY4741
chr11	liftOver	exon	579028	579063	.	+	.	Name=tK(UUU)K_noncoding_exon;Parent=tK(UUU)K_BY4741
chr12	liftOver	exon	872977	873013	.	+	.	Name=tK(UUU)L_noncoding_exon;Parent=tK(UUU)L_BY4741
chr12	liftOver	exon	873037	873072	.	+	.	Name=tK(UUU)L_noncoding_exon;Parent=tK(UUU)L_BY4741
chr15	liftOver	exon	438643	438679	.	+	.	Name=tK(UUU)O_noncoding_exon;Parent=tK(UUU)O_BY4741
chr15	liftOver	exon	438703	438738	.	+	.	Name=tK(UUU)O_noncoding_exon;Parent=tK(UUU)O_BY4741
chr16	liftOver	exon	769266	769302	.	-	.	Name=tK(UUU)P_noncoding_exon;Parent=tK(UUU)P_BY4741
chr16	liftOver	exon	769207	769242	.	-	.	Name=tK(UUU)P_noncoding_exon;Parent=tK(UUU)P_BY4741
chr01	liftOver	exon	181143	181180	.	+	.	Name=tL(CAA)A_noncoding_exon;Parent=tL(CAA)A_BY4741
chr01	liftOver	exon	181213	181256	.	+	.	Name=tL(CAA)A_noncoding_exon;Parent=tL(CAA)A_BY4741
chr01	liftOver	exon	181143	181180	.	+	.	Name=tL(CAA)C_noncoding_exon;Parent=tL(CAA)C_BY4741
chr01	liftOver	exon	181213	181256	.	+	.	Name=tL(CAA)C_noncoding_exon;Parent=tL(CAA)C_BY4741
chr04	liftOver	exon	1461792	1461829	.	-	.	Name=tL(CAA)D_noncoding_exon;Parent=tL(CAA)D_BY4741
chr04	liftOver	exon	1461715	1461758	.	-	.	Name=tL(CAA)D_noncoding_exon;Parent=tL(CAA)D_BY4741
chr07	liftOver	exon	205521	205558	.	+	.	Name=tL(CAA)G1_noncoding_exon;Parent=tL(CAA)G1_BY4741
chr07	liftOver	exon	205591	205634	.	+	.	Name=tL(CAA)G1_noncoding_exon;Parent=tL(CAA)G1_BY4741
chr07	liftOver	exon	423092	423129	.	+	.	Name=tL(CAA)G2_noncoding_exon;Parent=tL(CAA)G2_BY4741
chr07	liftOver	exon	423162	423205	.	+	.	Name=tL(CAA)G2_noncoding_exon;Parent=tL(CAA)G2_BY4741
chr07	liftOver	exon	857454	857491	.	-	.	Name=tL(CAA)G3_noncoding_exon;Parent=tL(CAA)G3_BY4741
chr07	liftOver	exon	857378	857421	.	-	.	Name=tL(CAA)G3_noncoding_exon;Parent=tL(CAA)G3_BY4741
chr11	liftOver	exon	458560	458597	.	+	.	Name=tL(CAA)K_noncoding_exon;Parent=tL(CAA)K_BY4741
chr11	liftOver	exon	458630	458673	.	+	.	Name=tL(CAA)K_noncoding_exon;Parent=tL(CAA)K_BY4741
chr12	liftOver	exon	628382	628419	.	+	.	Name=tL(CAA)L_noncoding_exon;Parent=tL(CAA)L_BY4741
chr12	liftOver	exon	628453	628496	.	+	.	Name=tL(CAA)L_noncoding_exon;Parent=tL(CAA)L_BY4741
chr13	liftOver	exon	504892	504929	.	+	.	Name=tL(CAA)M_noncoding_exon;Parent=tL(CAA)M_BY4741
chr13	liftOver	exon	504962	505005	.	+	.	Name=tL(CAA)M_noncoding_exon;Parent=tL(CAA)M_BY4741
chr14	liftOver	exon	443004	443041	.	+	.	Name=tL(CAA)N_noncoding_exon;Parent=tL(CAA)N_BY4741
chr14	liftOver	exon	443074	443117	.	+	.	Name=tL(CAA)N_noncoding_exon;Parent=tL(CAA)N_BY4741
chr07	liftOver	exon	700675	700756	.	+	.	Name=tL(GAG)G_noncoding_exon;Parent=tL(GAG)G_BY4741
chr02	liftOver	exon	9583	9666	.	+	.	Name=tL(UAA)B1_noncoding_exon;Parent=tL(UAA)B1_BY4741
chr02	liftOver	exon	347603	347686	.	+	.	Name=tL(UAA)B2_noncoding_exon;Parent=tL(UAA)B2_BY4741
chr04	liftOver	exon	519743	519826	.	+	.	Name=tL(UAA)D_noncoding_exon;Parent=tL(UAA)D_BY4741
chr10	liftOver	exon	424432	424515	.	-	.	Name=tL(UAA)J_noncoding_exon;Parent=tL(UAA)J_BY4741
chr11	liftOver	exon	84211	84294	.	+	.	Name=tL(UAA)K_noncoding_exon;Parent=tL(UAA)K_BY4741
chr12	liftOver	exon	960573	960656	.	-	.	Name=tL(UAA)L_noncoding_exon;Parent=tL(UAA)L_BY4741
chr14	liftOver	exon	726132	726215	.	-	.	Name=tL(UAA)N_noncoding_exon;Parent=tL(UAA)N_BY4741
chr10	liftOver	exon	617919	617956	.	+	.	Name=tL(UAG)J_noncoding_exon;Parent=tL(UAG)J_BY4741
chr10	liftOver	exon	617976	618019	.	+	.	Name=tL(UAG)J_noncoding_exon;Parent=tL(UAG)J_BY4741
chr12	liftOver	exon	592581	592618	.	-	.	Name=tL(UAG)L1_noncoding_exon;Parent=tL(UAG)L1_BY4741
chr12	liftOver	exon	592518	592561	.	-	.	Name=tL(UAG)L1_noncoding_exon;Parent=tL(UAG)L1_BY4741
chr12	liftOver	exon	732089	732126	.	+	.	Name=tL(UAG)L2_noncoding_exon;Parent=tL(UAG)L2_BY4741
chr12	liftOver	exon	732146	732189	.	+	.	Name=tL(UAG)L2_noncoding_exon;Parent=tL(UAG)L2_BY4741
chr02	liftOver	exon	307587	308887	.	+	.	Name=TLC1_noncoding_exon;Parent=TLC1_BY4741
chr04	liftOver	exon	1175829	1175901	.	+	.	Name=tM(CAU)D_noncoding_exon;Parent=tM(CAU)D_BY4741
chr05	liftOver	exon	100133	100204	.	+	.	Name=tM(CAU)E_noncoding_exon;Parent=tM(CAU)E_BY4741
chr10	liftOver	exon	391043	391115	.	-	.	Name=tM(CAU)J1_noncoding_exon;Parent=tM(CAU)J1_BY4741
chr10	liftOver	exon	422937	423009	.	+	.	Name=tM(CAU)J2_noncoding_exon;Parent=tM(CAU)J2_BY4741
chr10	liftOver	exon	517813	517884	.	-	.	Name=tM(CAU)J3_noncoding_exon;Parent=tM(CAU)J3_BY4741
chr13	liftOver	exon	572880	572952	.	-	.	Name=tM(CAU)M_noncoding_exon;Parent=tM(CAU)M_BY4741
chr15	liftOver	exon	710201	710272	.	+	.	Name=tM(CAU)O1_noncoding_exon;Parent=tM(CAU)O1_BY4741
chr15	liftOver	exon	976231	976303	.	+	.	Name=tM(CAU)O2_noncoding_exon;Parent=tM(CAU)O2_BY4741
chr16	liftOver	exon	338848	338919	.	-	.	Name=tM(CAU)P_noncoding_exon;Parent=tM(CAU)P_BY4741
chr03	liftOver	exon	120043	120116	.	+	.	Name=tN(GUU)C_noncoding_exon;Parent=tN(GUU)C_BY4741
chr06	liftOver	exon	137486	137559	.	-	.	Name=tN(GUU)F_noncoding_exon;Parent=tN(GUU)F_BY4741
chr07	liftOver	exon	731137	731210	.	+	.	Name=tN(GUU)G_noncoding_exon;Parent=tN(GUU)G_BY4741
chr11	liftOver	exon	74627	74700	.	+	.	Name=tN(GUU)K_noncoding_exon;Parent=tN(GUU)K_BY4741
chr12	liftOver	exon	973584	973657	.	-	.	Name=tN(GUU)L_noncoding_exon;Parent=tN(GUU)L_BY4741
chr14	liftOver	exon	102716	102789	.	+	.	Name=tN(GUU)N1_noncoding_exon;Parent=tN(GUU)N1_BY4741
chr14	liftOver	exon	632597	632670	.	+	.	Name=tN(GUU)N2_noncoding_exon;Parent=tN(GUU)N2_BY4741
chr15	liftOver	exon	228331	228404	.	+	.	Name=tN(GUU)O1_noncoding_exon;Parent=tN(GUU)O1_BY4741
chr15	liftOver	exon	487439	487512	.	+	.	Name=tN(GUU)O2_noncoding_exon;Parent=tN(GUU)O2_BY4741
chr16	liftOver	exon	810676	810749	.	+	.	Name=tN(GUU)P_noncoding_exon;Parent=tN(GUU)P_BY4741
chr03	liftOver	exon	115904	115975	.	-	.	Name=tP(AGG)C_noncoding_exon;Parent=tP(AGG)C_BY4741
chr14	liftOver	exon	631844	631915	.	-	.	Name=tP(AGG)N_noncoding_exon;Parent=tP(AGG)N_BY4741
chr01	liftOver	exon	139154	139189	.	+	.	Name=tP(UGG)A_noncoding_exon;Parent=tP(UGG)A_BY4741
chr01	liftOver	exon	139221	139256	.	+	.	Name=tP(UGG)A_noncoding_exon;Parent=tP(UGG)A_BY4741
chr06	liftOver	exon	101376	101411	.	+	.	Name=tP(UGG)F_noncoding_exon;Parent=tP(UGG)F_BY4741
chr06	liftOver	exon	101443	101478	.	+	.	Name=tP(UGG)F_noncoding_exon;Parent=tP(UGG)F_BY4741
chr08	liftOver	exon	388958	388993	.	-	.	Name=tP(UGG)H_noncoding_exon;Parent=tP(UGG)H_BY4741
chr08	liftOver	exon	388891	388926	.	-	.	Name=tP(UGG)H_noncoding_exon;Parent=tP(UGG)H_BY4741
chr12	liftOver	exon	92548	92583	.	+	.	Name=tP(UGG)L_noncoding_exon;Parent=tP(UGG)L_BY4741
chr12	liftOver	exon	92615	92650	.	+	.	Name=tP(UGG)L_noncoding_exon;Parent=tP(UGG)L_BY4741
chr13	liftOver	exon	196132	196167	.	-	.	Name=tP(UGG)M_noncoding_exon;Parent=tP(UGG)M_BY4741
chr13	liftOver	exon	196065	196100	.	-	.	Name=tP(UGG)M_noncoding_exon;Parent=tP(UGG)M_BY4741
chr14	liftOver	exon	547092	547127	.	+	.	Name=tP(UGG)N1_noncoding_exon;Parent=tP(UGG)N1_BY4741
chr14	liftOver	exon	547159	547194	.	+	.	Name=tP(UGG)N1_noncoding_exon;Parent=tP(UGG)N1_BY4741
chr14	liftOver	exon	568113	568148	.	+	.	Name=tP(UGG)N2_noncoding_exon;Parent=tP(UGG)N2_BY4741
chr14	liftOver	exon	568180	568215	.	+	.	Name=tP(UGG)N2_noncoding_exon;Parent=tP(UGG)N2_BY4741
chr15	liftOver	exon	301097	301132	.	+	.	Name=tP(UGG)O1_noncoding_exon;Parent=tP(UGG)O1_BY4741
chr15	liftOver	exon	301163	301198	.	+	.	Name=tP(UGG)O1_noncoding_exon;Parent=tP(UGG)O1_BY4741
chr15	liftOver	exon	464450	464485	.	+	.	Name=tP(UGG)O2_noncoding_exon;Parent=tP(UGG)O2_BY4741
chr15	liftOver	exon	464516	464551	.	+	.	Name=tP(UGG)O2_noncoding_exon;Parent=tP(UGG)O2_BY4741
chr15	liftOver	exon	980493	980528	.	+	.	Name=tP(UGG)O3_noncoding_exon;Parent=tP(UGG)O3_BY4741
chr15	liftOver	exon	980562	980597	.	+	.	Name=tP(UGG)O3_noncoding_exon;Parent=tP(UGG)O3_BY4741
chr13	liftOver	exon	808243	808314	.	-	.	Name=tQ(CUG)M_noncoding_exon;Parent=tQ(CUG)M_BY4741
chr02	liftOver	exon	350827	350898	.	+	.	Name=tQ(UUG)B_noncoding_exon;Parent=tQ(UUG)B_BY4741
chr03	liftOver	exon	157722	157793	.	-	.	Name=tQ(UUG)C_noncoding_exon;Parent=tQ(UUG)C_BY4741
chr04	liftOver	exon	520972	521043	.	+	.	Name=tQ(UUG)D1_noncoding_exon;Parent=tQ(UUG)D1_BY4741
chr04	liftOver	exon	645153	645224	.	-	.	Name=tQ(UUG)D2_noncoding_exon;Parent=tQ(UUG)D2_BY4741
chr04	liftOver	exon	802731	802802	.	-	.	Name=tQ(UUG)D3_noncoding_exon;Parent=tQ(UUG)D3_BY4741
chr05	liftOver	exon	249189	249260	.	+	.	Name=tQ(UUG)E1_noncoding_exon;Parent=tQ(UUG)E1_BY4741
chr05	liftOver	exon	129985	130056	.	-	.	Name=tQ(UUG)E2_noncoding_exon;Parent=tQ(UUG)E2_BY4741
chr08	liftOver	exon	134319	134390	.	+	.	Name=tQ(UUG)H_noncoding_exon;Parent=tQ(UUG)H_BY4741
chr12	liftOver	exon	448649	448720	.	-	.	Name=tQ(UUG)L_noncoding_exon;Parent=tQ(UUG)L_BY4741
chr04	liftOver	exon	619969	620041	.	-	.	Name=tR(ACG)D_noncoding_exon;Parent=tR(ACG)D_BY4741
chr05	liftOver	exon	491255	491327	.	-	.	Name=tR(ACG)E_noncoding_exon;Parent=tR(ACG)E_BY4741
chr10	liftOver	exon	233939	234011	.	+	.	Name=tR(ACG)J_noncoding_exon;Parent=tR(ACG)J_BY4741
chr11	liftOver	exon	490971	491043	.	+	.	Name=tR(ACG)K_noncoding_exon;Parent=tR(ACG)K_BY4741
chr12	liftOver	exon	374354	374426	.	+	.	Name=tR(ACG)L_noncoding_exon;Parent=tR(ACG)L_BY4741
chr15	liftOver	exon	340299	340371	.	-	.	Name=tR(ACG)O_noncoding_exon;Parent=tR(ACG)O_BY4741
chr12	liftOver	exon	816210	816281	.	+	.	Name=tR(CCG)L_noncoding_exon;Parent=tR(CCG)L_BY4741
chr10	liftOver	exon	538555	538626	.	+	.	Name=tR(CCU)J_noncoding_exon;Parent=tR(CCU)J_BY4741
chr02	liftOver	exon	405879	405950	.	+	.	Name=tR(UCU)B_noncoding_exon;Parent=tR(UCU)B_BY4741
chr04	liftOver	exon	568882	568953	.	+	.	Name=tR(UCU)D_noncoding_exon;Parent=tR(UCU)D_BY4741
chr05	liftOver	exon	137569	137640	.	+	.	Name=tR(UCU)E_noncoding_exon;Parent=tR(UCU)E_BY4741
chr07	liftOver	exon	405470	405541	.	+	.	Name=tR(UCU)G1_noncoding_exon;Parent=tR(UCU)G1_BY4741
chr07	liftOver	exon	828723	828794	.	-	.	Name=tR(UCU)G2_noncoding_exon;Parent=tR(UCU)G2_BY4741
chr07	liftOver	exon	736340	736411	.	+	.	Name=tR(UCU)G3_noncoding_exon;Parent=tR(UCU)G3_BY4741
chr10	liftOver	exon	355374	355445	.	+	.	Name=tR(UCU)J1_noncoding_exon;Parent=tR(UCU)J1_BY4741
chr10	liftOver	exon	374506	374577	.	-	.	Name=tR(UCU)J2_noncoding_exon;Parent=tR(UCU)J2_BY4741
chr11	liftOver	exon	162490	162561	.	-	.	Name=tR(UCU)K_noncoding_exon;Parent=tR(UCU)K_BY4741
chr13	liftOver	exon	747889	747960	.	-	.	Name=tR(UCU)M1_noncoding_exon;Parent=tR(UCU)M1_BY4741
chr13	liftOver	exon	131822	131893	.	-	.	Name=tR(UCU)M2_noncoding_exon;Parent=tR(UCU)M2_BY4741
chr01	liftOver	exon	182524	182605	.	-	.	Name=tS(AGA)A_noncoding_exon;Parent=tS(AGA)A_BY4741
chr02	liftOver	exon	227075	227156	.	+	.	Name=tS(AGA)B_noncoding_exon;Parent=tS(AGA)B_BY4741
chr04	liftOver	exon	437772	437853	.	+	.	Name=tS(AGA)D1_noncoding_exon;Parent=tS(AGA)D1_BY4741
chr04	liftOver	exon	980974	981055	.	-	.	Name=tS(AGA)D2_noncoding_exon;Parent=tS(AGA)D2_BY4741
chr04	liftOver	exon	1305630	1305712	.	+	.	Name=tS(AGA)D3_noncoding_exon;Parent=tS(AGA)D3_BY4741
chr05	liftOver	exon	86604	86685	.	+	.	Name=tS(AGA)E_noncoding_exon;Parent=tS(AGA)E_BY4741
chr07	liftOver	exon	561662	561743	.	-	.	Name=tS(AGA)G_noncoding_exon;Parent=tS(AGA)G_BY4741
chr08	liftOver	exon	133024	133105	.	-	.	Name=tS(AGA)H_noncoding_exon;Parent=tS(AGA)H_BY4741
chr10	liftOver	exon	524012	524093	.	-	.	Name=tS(AGA)J_noncoding_exon;Parent=tS(AGA)J_BY4741
chr12	liftOver	exon	167944	168025	.	-	.	Name=tS(AGA)L_noncoding_exon;Parent=tS(AGA)L_BY4741
chr13	liftOver	exon	259155	259236	.	-	.	Name=tS(AGA)M_noncoding_exon;Parent=tS(AGA)M_BY4741
chr03	liftOver	exon	217258	217294	.	+	.	Name=tS(CGA)C_noncoding_exon;Parent=tS(CGA)C_BY4741
chr03	liftOver	exon	217314	217358	.	+	.	Name=tS(CGA)C_noncoding_exon;Parent=tS(CGA)C_BY4741
chr06	liftOver	exon	191577	191613	.	-	.	Name=tS(GCU)F_noncoding_exon;Parent=tS(GCU)F_BY4741
chr06	liftOver	exon	191513	191557	.	-	.	Name=tS(GCU)F_noncoding_exon;Parent=tS(GCU)F_BY4741
chr12	liftOver	exon	781955	781990	.	+	.	Name=tX(XXX)L_noncoding_exon;Parent=tX(XXX)L_BY4741
chr12	liftOver	exon	782011	782054	.	+	.	Name=tX(XXX)L_noncoding_exon;Parent=tX(XXX)L_BY4741
chr15	liftOver	exon	274673	274709	.	+	.	Name=tS(GCU)O_noncoding_exon;Parent=tS(GCU)O_BY4741
chr15	liftOver	exon	274729	274773	.	+	.	Name=tS(GCU)O_noncoding_exon;Parent=tS(GCU)O_BY4741
chr05	liftOver	exon	287346	287427	.	-	.	Name=tS(UGA)E_noncoding_exon;Parent=tS(UGA)E_BY4741
chr09	liftOver	exon	248854	248935	.	+	.	Name=tS(UGA)I_noncoding_exon;Parent=tS(UGA)I_BY4741
chr16	liftOver	exon	689565	689646	.	+	.	Name=tS(UGA)P_noncoding_exon;Parent=tS(UGA)P_BY4741
chr02	liftOver	exon	266378	266450	.	+	.	Name=tT(AGU)B_noncoding_exon;Parent=tT(AGU)B_BY4741
chr03	liftOver	exon	284800	284872	.	+	.	Name=tT(AGU)C_noncoding_exon;Parent=tT(AGU)C_BY4741
chr04	liftOver	exon	434264	434336	.	-	.	Name=tT(AGU)D_noncoding_exon;Parent=tT(AGU)D_BY4741
chr08	liftOver	exon	116105	116177	.	-	.	Name=tT(AGU)H_noncoding_exon;Parent=tT(AGU)H_BY4741
chr09	liftOver	exon	175035	175107	.	+	.	Name=tT(AGU)I1_noncoding_exon;Parent=tT(AGU)I1_BY4741
chr09	liftOver	exon	325751	325823	.	+	.	Name=tT(AGU)I2_noncoding_exon;Parent=tT(AGU)I2_BY4741
chr10	liftOver	exon	59100	59172	.	-	.	Name=tT(AGU)J_noncoding_exon;Parent=tT(AGU)J_BY4741
chr14	liftOver	exon	104805	104877	.	+	.	Name=tT(AGU)N1_noncoding_exon;Parent=tT(AGU)N1_BY4741
chr14	liftOver	exon	560691	560763	.	-	.	Name=tT(AGU)N2_noncoding_exon;Parent=tT(AGU)N2_BY4741
chr15	liftOver	exon	113802	113874	.	+	.	Name=tT(AGU)O1_noncoding_exon;Parent=tT(AGU)O1_BY4741
chr15	liftOver	exon	354041	354113	.	-	.	Name=tT(AGU)O2_noncoding_exon;Parent=tT(AGU)O2_BY4741
chr11	liftOver	exon	46735	46806	.	-	.	Name=tT(CGU)K_noncoding_exon;Parent=tT(CGU)K_BY4741
chr07	liftOver	exon	661749	661820	.	+	.	Name=tT(UGU)G1_noncoding_exon;Parent=tT(UGU)G1_BY4741
chr07	liftOver	exon	1004216	1004287	.	+	.	Name=tT(UGU)G2_noncoding_exon;Parent=tT(UGU)G2_BY4741
chr08	liftOver	exon	466988	467059	.	+	.	Name=tT(UGU)H_noncoding_exon;Parent=tT(UGU)H_BY4741
chr16	liftOver	exon	744284	744355	.	+	.	Name=tT(UGU)P_noncoding_exon;Parent=tT(UGU)P_BY4741
chr05	liftOver	exon	437603	437676	.	+	.	Name=tV(AAC)E1_noncoding_exon;Parent=tV(AAC)E1_BY4741
chr05	liftOver	exon	468360	468433	.	+	.	Name=tV(AAC)E2_noncoding_exon;Parent=tV(AAC)E2_BY4741
chr07	liftOver	exon	412294	412367	.	-	.	Name=tV(AAC)G1_noncoding_exon;Parent=tV(AAC)G1_BY4741
chr07	liftOver	exon	823482	823555	.	+	.	Name=tV(AAC)G2_noncoding_exon;Parent=tV(AAC)G2_BY4741
chr07	liftOver	exon	73829	73902	.	-	.	Name=tV(AAC)G3_noncoding_exon;Parent=tV(AAC)G3_BY4741
chr08	liftOver	exon	85298	85371	.	-	.	Name=tV(AAC)H_noncoding_exon;Parent=tV(AAC)H_BY4741
chr10	liftOver	exon	378360	378433	.	+	.	Name=tV(AAC)J_noncoding_exon;Parent=tV(AAC)J_BY4741
chr11	liftOver	exon	308147	308220	.	-	.	Name=tV(AAC)K1_noncoding_exon;Parent=tV(AAC)K1_BY4741
chr11	liftOver	exon	379683	379756	.	+	.	Name=tV(AAC)K2_noncoding_exon;Parent=tV(AAC)K2_BY4741
chr12	liftOver	exon	687858	687931	.	-	.	Name=tV(AAC)L_noncoding_exon;Parent=tV(AAC)L_BY4741
chr13	liftOver	exon	372442	372515	.	-	.	Name=tV(AAC)M1_noncoding_exon;Parent=tV(AAC)M1_BY4741
chr13	liftOver	exon	420585	420658	.	-	.	Name=tV(AAC)M2_noncoding_exon;Parent=tV(AAC)M2_BY4741
chr13	liftOver	exon	586633	586706	.	-	.	Name=tV(AAC)M3_noncoding_exon;Parent=tV(AAC)M3_BY4741
chr15	liftOver	exon	663812	663885	.	-	.	Name=tV(AAC)O_noncoding_exon;Parent=tV(AAC)O_BY4741
chr04	liftOver	exon	1075472	1075544	.	-	.	Name=tV(CAC)D_noncoding_exon;Parent=tV(CAC)D_BY4741
chr08	liftOver	exon	475704	475776	.	-	.	Name=tV(CAC)H_noncoding_exon;Parent=tV(CAC)H_BY4741
chr02	liftOver	exon	326792	326865	.	-	.	Name=tV(UAC)B_noncoding_exon;Parent=tV(UAC)B_BY4741
chr04	liftOver	exon	488797	488870	.	-	.	Name=tV(UAC)D_noncoding_exon;Parent=tV(UAC)D_BY4741
chr07	liftOver	exon	287420	287455	.	-	.	Name=tW(CCA)G1_noncoding_exon;Parent=tW(CCA)G1_BY4741
chr07	liftOver	exon	287350	287385	.	-	.	Name=tW(CCA)G1_noncoding_exon;Parent=tW(CCA)G1_BY4741
chr07	liftOver	exon	878780	878815	.	-	.	Name=tW(CCA)G2_noncoding_exon;Parent=tW(CCA)G2_BY4741
chr07	liftOver	exon	878710	878745	.	-	.	Name=tW(CCA)G2_noncoding_exon;Parent=tW(CCA)G2_BY4741
chr10	liftOver	exon	416001	416036	.	-	.	Name=tW(CCA)J_noncoding_exon;Parent=tW(CCA)J_BY4741
chr10	liftOver	exon	415931	415966	.	-	.	Name=tW(CCA)J_noncoding_exon;Parent=tW(CCA)J_BY4741
chr11	liftOver	exon	302921	302956	.	+	.	Name=tW(CCA)K_noncoding_exon;Parent=tW(CCA)K_BY4741
chr11	liftOver	exon	302991	303026	.	+	.	Name=tW(CCA)K_noncoding_exon;Parent=tW(CCA)K_BY4741
chr13	liftOver	exon	379370	379405	.	-	.	Name=tW(CCA)M_noncoding_exon;Parent=tW(CCA)M_BY4741
chr13	liftOver	exon	379300	379335	.	-	.	Name=tW(CCA)M_noncoding_exon;Parent=tW(CCA)M_BY4741
chr16	liftOver	exon	56239	56274	.	-	.	Name=tW(CCA)P_noncoding_exon;Parent=tW(CCA)P_BY4741
chr16	liftOver	exon	56169	56204	.	-	.	Name=tW(CCA)P_noncoding_exon;Parent=tW(CCA)P_BY4741
chr04	liftOver	exon	1150842	1150941	.	-	.	Name=tX(XXX)D_noncoding_exon;Parent=tX(XXX)D_BY4741
chr04	liftOver	exon	946312	946350	.	+	.	Name=tY(GUA)D_noncoding_exon;Parent=tY(GUA)D_BY4741
chr04	liftOver	exon	946365	946400	.	+	.	Name=tY(GUA)D_noncoding_exon;Parent=tY(GUA)D_BY4741
chr06	liftOver	exon	167437	167475	.	+	.	Name=tY(GUA)F1_noncoding_exon;Parent=tY(GUA)F1_BY4741
chr06	liftOver	exon	167490	167525	.	+	.	Name=tY(GUA)F1_noncoding_exon;Parent=tY(GUA)F1_BY4741
chr06	liftOver	exon	210669	210707	.	-	.	Name=tY(GUA)F2_noncoding_exon;Parent=tY(GUA)F2_BY4741
chr06	liftOver	exon	210619	210654	.	-	.	Name=tY(GUA)F2_noncoding_exon;Parent=tY(GUA)F2_BY4741
chr10	liftOver	exon	354244	354282	.	+	.	Name=tY(GUA)J1_noncoding_exon;Parent=tY(GUA)J1_BY4741
chr10	liftOver	exon	354297	354332	.	+	.	Name=tY(GUA)J1_noncoding_exon;Parent=tY(GUA)J1_BY4741
chr10	liftOver	exon	543006	543044	.	-	.	Name=tY(GUA)J2_noncoding_exon;Parent=tY(GUA)J2_BY4741
chr10	liftOver	exon	542956	542991	.	-	.	Name=tY(GUA)J2_noncoding_exon;Parent=tY(GUA)J2_BY4741
chr13	liftOver	exon	168792	168830	.	+	.	Name=tY(GUA)M1_noncoding_exon;Parent=tY(GUA)M1_BY4741
chr13	liftOver	exon	168845	168880	.	+	.	Name=tY(GUA)M1_noncoding_exon;Parent=tY(GUA)M1_BY4741
chr13	liftOver	exon	837925	837963	.	+	.	Name=tY(GUA)M2_noncoding_exon;Parent=tY(GUA)M2_BY4741
chr13	liftOver	exon	837978	838013	.	+	.	Name=tY(GUA)M2_noncoding_exon;Parent=tY(GUA)M2_BY4741
chr15	liftOver	exon	288192	288230	.	+	.	Name=tY(GUA)O_noncoding_exon;Parent=tY(GUA)O_BY4741
chr15	liftOver	exon	288245	288280	.	+	.	Name=tY(GUA)O_noncoding_exon;Parent=tY(GUA)O_BY4741
chr17	liftOver	exon	6546	8194	.	+	.	Name=15S_rRNA_noncoding_exon;Parent=15S_rRNA_BY4741
chr17	liftOver	exon	58012	60727	.	+	.	Name=21S_rRNA_noncoding_exon;Parent=21S_rRNA_BY4741
chr17	liftOver	exon	61871	62450	.	+	.	Name=21S_rRNA_noncoding_exon;Parent=21S_rRNA_BY4741
chr15	liftOver	exon	842213	842416	.	+	.	Name=snR5_noncoding_exon;Parent=snR5_BY4741
chr15	liftOver	exon	407948	408134	.	-	.	Name=snR9_noncoding_exon;Parent=snR9_BY4741
chr13	liftOver	exon	652272	652529	.	+	.	Name=snR11_noncoding_exon;Parent=snR11_BY4741
chr15	liftOver	exon	779917	779930	.	+	.	Name=snR17a_noncoding_exon;Parent=snR17a_BY4741
chr15	liftOver	exon	780088	780406	.	+	.	Name=snR17a_noncoding_exon;Parent=snR17a_BY4741
chr14	liftOver	exon	230105	230672	.	-	.	Name=snR19_noncoding_exon;Parent=snR19_BY4741
chr15	liftOver	exon	841768	841992	.	-	.	Name=snR31_noncoding_exon;Parent=snR31_BY4741
chr08	liftOver	exon	381538	381725	.	+	.	Name=snR32_noncoding_exon;Parent=snR32_BY4741
chr03	liftOver	exon	134691	134873	.	-	.	Name=snR33_noncoding_exon;Parent=snR33_BY4741
chr15	liftOver	exon	759136	759339	.	-	.	Name=snR35_noncoding_exon;Parent=snR35_BY4741
chr15	liftOver	exon	680685	680866	.	-	.	Name=snR36_noncoding_exon;Parent=snR36_BY4741
chr11	liftOver	exon	283188	283282	.	+	.	Name=snR38_noncoding_exon;Parent=snR38_BY4741
chr07	liftOver	exon	365163	365251	.	-	.	Name=snR39_noncoding_exon;Parent=snR39_BY4741
chr07	liftOver	exon	366374	366469	.	-	.	Name=snR39B_noncoding_exon;Parent=snR39B_BY4741
chr14	liftOver	exon	89210	89306	.	+	.	Name=snR40_noncoding_exon;Parent=snR40_BY4741
chr16	liftOver	exon	719148	719242	.	-	.	Name=snR41_noncoding_exon;Parent=snR41_BY4741
chr13	liftOver	exon	298551	298648	.	+	.	Name=snR72_noncoding_exon;Parent=snR72_BY4741
chr13	liftOver	exon	298304	298409	.	+	.	Name=snR73_noncoding_exon;Parent=snR73_BY4741
chr13	liftOver	exon	298135	298222	.	+	.	Name=snR74_noncoding_exon;Parent=snR74_BY4741
chr13	liftOver	exon	297915	298003	.	+	.	Name=snR75_noncoding_exon;Parent=snR75_BY4741
chr13	liftOver	exon	297722	297830	.	+	.	Name=snR76_noncoding_exon;Parent=snR76_BY4741
chr13	liftOver	exon	297503	297590	.	+	.	Name=snR77_noncoding_exon;Parent=snR77_BY4741
chr13	liftOver	exon	297275	297361	.	+	.	Name=snR78_noncoding_exon;Parent=snR78_BY4741
chr12	liftOver	exon	348426	348509	.	-	.	Name=snR79_noncoding_exon;Parent=snR79_BY4741
chr03	liftOver	exon	168031	168219	.	-	.	Name=snR189_noncoding_exon;Parent=snR189_BY4741
chr17	liftOver	exon	69849	69924	.	+	.	Name=tA(UGC)Q_noncoding_exon;Parent=tA(UGC)Q_BY4741
chr17	liftOver	exon	64418	64493	.	+	.	Name=tC(GCA)Q_noncoding_exon;Parent=tC(GCA)Q_BY4741
chr17	liftOver	exon	68325	68399	.	+	.	Name=tD(GUC)Q_noncoding_exon;Parent=tD(GUC)Q_BY4741
chr17	liftOver	exon	35374	35445	.	+	.	Name=tE(UUC)Q_noncoding_exon;Parent=tE(UUC)Q_BY4741
chr17	liftOver	exon	77433	77507	.	+	.	Name=tF(GAA)Q_noncoding_exon;Parent=tF(GAA)Q_BY4741
chr17	liftOver	exon	67471	67545	.	+	.	Name=tG(UCC)Q_noncoding_exon;Parent=tG(UCC)Q_BY4741
chr17	liftOver	exon	64599	64673	.	+	.	Name=tH(GUG)Q_noncoding_exon;Parent=tH(GUG)Q_BY4741
chr17	liftOver	exon	70165	70240	.	+	.	Name=tI(GAU)Q_noncoding_exon;Parent=tI(GAU)Q_BY4741
chr17	liftOver	exon	67064	67137	.	+	.	Name=tK(UUU)Q_noncoding_exon;Parent=tK(UUU)Q_BY4741
chr17	liftOver	exon	66098	66182	.	+	.	Name=tL(UAA)Q_noncoding_exon;Parent=tL(UAA)Q_BY4741
chr17	liftOver	exon	72633	72708	.	+	.	Name=tM(CAU)Q1_noncoding_exon;Parent=tM(CAU)Q1_BY4741
chr17	liftOver	exon	85037	85114	.	+	.	Name=tM(CAU)Q2_noncoding_exon;Parent=tM(CAU)Q2_BY4741
chr17	liftOver	exon	71436	71506	.	+	.	Name=tN(GUU)Q_noncoding_exon;Parent=tN(GUU)Q_BY4741
chr17	liftOver	exon	731	802	.	+	.	Name=tP(UGG)Q_noncoding_exon;Parent=tP(UGG)Q_BY4741
chr17	liftOver	exon	66213	66288	.	+	.	Name=tQ(UUG)Q_noncoding_exon;Parent=tQ(UUG)Q_BY4741
chr17	liftOver	exon	69292	69365	.	+	.	Name=tR(ACG)Q2_noncoding_exon;Parent=tR(ACG)Q2_BY4741
chr17	liftOver	exon	67312	67384	.	+	.	Name=tR(UCU)Q1_noncoding_exon;Parent=tR(UCU)Q1_BY4741
chr17	liftOver	exon	69206	69291	.	+	.	Name=tS(GCU)Q1_noncoding_exon;Parent=tS(GCU)Q1_BY4741
chr17	liftOver	exon	48204	48293	.	+	.	Name=tS(UGA)Q2_noncoding_exon;Parent=tS(UGA)Q2_BY4741
chr17	liftOver	exon	63865	63940	.	+	.	Name=tT(UGU)Q1_noncoding_exon;Parent=tT(UGU)Q1_BY4741
chr17	liftOver	exon	78091	78164	.	-	.	Name=tT(UAG)Q2_noncoding_exon;Parent=tT(UAG)Q2_BY4741
chr17	liftOver	exon	78535	78610	.	+	.	Name=tV(UAC)Q_noncoding_exon;Parent=tV(UAC)Q_BY4741
chr17	liftOver	exon	9374	9447	.	+	.	Name=tW(UCA)Q_noncoding_exon;Parent=tW(UCA)Q_BY4741
chr17	liftOver	exon	70827	70910	.	+	.	Name=tY(GUA)Q_noncoding_exon;Parent=tY(GUA)Q_BY4741
chr14	liftOver	exon	585585	585924	.	+	.	Name=NME1_noncoding_exon;Parent=NME1_BY4741
chr16	liftOver	exon	281504	281517	.	-	.	Name=snR17b_noncoding_exon;Parent=snR17b_BY4741
chr16	liftOver	exon	281056	281373	.	-	.	Name=snR17b_noncoding_exon;Parent=snR17b_BY4741
chr14	liftOver	exon	716118	716282	.	+	.	Name=snR49_noncoding_exon;Parent=snR49_BY4741
chr12	liftOver	exon	198783	199388	.	+	.	Name=snR30_noncoding_exon;Parent=snR30_BY4741
chr07	liftOver	exon	345986	346230	.	+	.	Name=snR10_noncoding_exon;Parent=snR10_BY4741
chr01	liftOver	exon	142369	142470	.	+	.	Name=snR18_noncoding_exon;Parent=snR18_BY4741
chr13	liftOver	exon	499981	500069	.	-	.	Name=snR24_noncoding_exon;Parent=snR24_BY4741
chr04	liftOver	exon	1492477	1493026	.	-	.	Name=snR84_noncoding_exon;Parent=snR84_BY4741
chr07	liftOver	exon	316788	317055	.	+	.	Name=snR82_noncoding_exon;Parent=snR82_BY4741
chr13	liftOver	exon	626346	626651	.	+	.	Name=snR83_noncoding_exon;Parent=snR83_BY4741
chr08	liftOver	exon	212407	213116	.	+	.	Name=RUF5-1_noncoding_exon;Parent=RUF5-1_BY4741
chr08	liftOver	exon	214405	215114	.	+	.	Name=RUF5-2_noncoding_exon;Parent=RUF5-2_BY4741
chr05	liftOver	exon	321115	321665	.	+	.	Name=SRG1_noncoding_exon;Parent=SRG1_BY4741
chr17	liftOver	exon	85297	85779	.	+	.	Name=RPM1_noncoding_exon;Parent=RPM1_BY4741
chr14	liftOver	exon	721936	722209	.	-	.	Name=snR191_noncoding_exon;Parent=snR191_BY4741
chr12	liftOver	exon	482044	482162	.	+	.	Name=RDN5-4_noncoding_exon;Parent=RDN5-4_BY4741
chr12	liftOver	exon	485696	485814	.	+	.	Name=RDN5-5_noncoding_exon;Parent=RDN5-5_BY4741
chr12	liftOver	exon	489348	489468	.	+	.	Name=RDN5-6_noncoding_exon;Parent=RDN5-6_BY4741
chr05	liftOver	exon	52150	52320	.	-	.	Name=snR80_noncoding_exon;Parent=snR80_BY4741
chr15	liftOver	exon	234346	234546	.	+	.	Name=snR81_noncoding_exon;Parent=snR81_BY4741
chr13	liftOver	exon	67765	67935	.	-	.	Name=snR85_noncoding_exon;Parent=snR85_BY4741
chr02	liftOver	exon	307185	307345	.	-	.	Name=snR161_noncoding_exon;Parent=snR161_BY4741
chr13	liftOver	exon	667285	667453	.	-	.	Name=RNA170_noncoding_exon;Parent=RNA170_BY4741
chr11	liftOver	exon	431033	431141	.	-	.	Name=snR87_noncoding_exon;Parent=snR87_BY4741
chr13	liftOver	exon	762107	763110	.	-	.	Name=snR86_noncoding_exon;Parent=snR86_BY4741
chr01	liftOver	exon	99305	99868	.	+	.	Name=HRA1_noncoding_exon;Parent=HRA1_BY4741
chr06	liftOver	exon	131061	131503	.	-	.	Name=RUF20_noncoding_exon;Parent=RUF20_BY4741
chr06	liftOver	exon	57815	58521	.	-	.	Name=RUF21_noncoding_exon;Parent=RUF21_BY4741
chr06	liftOver	exon	199299	199813	.	-	.	Name=RUF22_noncoding_exon;Parent=RUF22_BY4741
chr06	liftOver	exon	221714	221967	.	+	.	Name=RUF23_noncoding_exon;Parent=RUF23_BY4741
chr09	liftOver	exon	393887	397085	.	-	.	Name=ICR1_noncoding_exon;Parent=ICR1_BY4741
chr09	liftOver	exon	396002	396942	.	+	.	Name=PWR1_noncoding_exon;Parent=PWR1_BY4741

In [ ]:


In [ ]:

def get_gene_list(gene_dict): return list(set(gene_dict.keys())) def get_UTR_regions(gene_dict, gene_id, transcript, cds): if len(cds)==0: return [], [] utr5_regions = [] utr3_regions = [] utrs = gene_dict[gene_id][transcript]['UTR'] first_cds = cds[0] last_cds = cds[-1] for utr in utrs: ## Push all cds at once ## Sort later to remove duplicates strand = utr.strand if strand == '+': if utr.stop < first_cds.start: utr.feature_type = 'five_prime_UTR' utr5_regions.append(utr) elif utr.start > last_cds.stop: utr.feature_type = 'three_prime_UTR' utr3_regions.append(utr) else: raise RuntimeError('Error with cds') elif strand == '-': if utr.stop < first_cds.start: utr.feature_type = 'three_prime_UTR' utr3_regions.append(utr) elif utr.start > last_cds.stop: utr.feature_type = 'five_prime_UTR' utr5_regions.append(utr) else: raise RuntimeError('Error with cds') return utr5_regions, utr3_regions def create_bed(regions, bedtype='0'): '''Create bed from list of regions bedtype: 0 or 1 0-Based or 1-based coordinate of the BED ''' bedstr = '' for region in regions: if isinstance(region, defaultdict): continue try: id = len(region.attributes['gene_id']) except: print region ## GTF start is 1-based, so shift by one while writing ## to 0-based BED format if bedtype == '0': start = region.start - 1 else: start = region.start bedstr += '{}\t{}\t{}\t{}\t{}\t{}\n'.format(region.chrom, start, region.stop, re.sub('\.\d+', '', region.attributes['gene_id'][0]), '.', region.strand) return bedstr def rename_regions(regions, gene_id): regions = list(regions) if len(regions) == 0: return [] for region in regions: region.attributes['gene_id'] = gene_id return regions def merge_regions(db, regions): if len(regions) == 0: return [] merged = db.merge(sorted(list(regions), key=lambda x: x.start)) return merged def merge_regions_nostrand(db, regions): if len(regions) == 0: return [] merged = db.merge(sorted(list(regions), key=lambda x: x.start), ignore_strand=True) return merged

In [99]:
utr5_bed = ''
utr3_bed = ''
gene_bed = ''
exon_bed = ''
intron_bed = ''
start_codon_bed = ''
stop_codon_bed = ''
cds_bed = ''

gene_list = []

for gene_id in get_gene_list(gene_dict):
    gene_list.append(gene_dict[gene_id]['gene'])
    
    utr5_regions, utr3_regions = [], []
    exon_regions, intron_regions = [], []
    star_codon_regions, stop_codon_regions = [], []
    cds_regions = []
    
    for feature in gene_dict[gene_id].keys():
        if feature == 'gene':
            continue
        cds = list(gene_dict[gene_id][feature]['CDS'])
        exons = list(gene_dict[gene_id][feature]['exon'])
        merged_exons = merge_regions(db, exons)
        introns = list(gene_dict[gene_id][feature]['intron'])
        merged_introns = merge_regions(db, introns)
        exon_regions += exons
        intron_regions += introns
        cds_regions += cds
    if exon_regions:    
        print gene_id, exon_regions
    merged_exons = merge_regions(db, exon_regions)
    renamed_exons = rename_regions(merged_exons, gene_id)
    
    merged_introns = merge_regions(db, intron_regions)
    renamed_introns = rename_regions(merged_introns, gene_id)
    
    merged_cds = merge_regions_nostrand(db, cds_regions)
    renamed_cds = rename_regions(merged_cds, gene_id)
    
    exon_bed += create_bed(renamed_exons)
    intron_bed += create_bed(renamed_introns)
    cds_bed += create_bed(renamed_cds)
    
    
gene_bed = create_bed(gene_list)
gene_bedtool = pybedtools.BedTool(gene_bed, from_string=True)
exon_bedtool = pybedtools.BedTool(exon_bed, from_string=True)
intron_bedtool = pybedtools.BedTool(intron_bed, from_string=True)
cds_bedtool = pybedtools.BedTool(cds_bed, from_string=True)

gene_bedtool.remove_invalid().sort().saveas('{}.genes.bed'.format(prefix))
exon_bedtool.remove_invalid().sort().saveas('{}.exon.bed'.format(prefix))
intron_bedtool.remove_invalid().sort().saveas('{}.intron.bed'.format(prefix))
cds_bedtool.remove_invalid().sort().saveas('{}.cds.bed'.format(prefix))


Out[99]:
<BedTool(/home/cmb-panasas2/skchoudh/genomes/S_cerevisiae_BY4741/annotation/BY4741_Toronto_2012.gffutils.cds.bed)>

In [ ]:
gene_list

In [14]:
for gene_id in get_gene_list(gene_dict):
    start_codons = []
    stop_codons = []
    for start_codon in db.children(gene_id, featuretype='start_codon'):
        ## 1 -based stop
        ## 0-based start handled while converting to bed
        start_codon.stop = start_codon.start
        start_codons.append(start_codon)
    for stop_codon in db.children(gene_id, featuretype='stop_codon'):
        stop_codon.start = stop_codon.stop
        stop_codon.stop = stop_codon.stop+1
        stop_codons.append(stop_codon)
    merged_start_codons = merge_regions(db, start_codons)
    renamed_start_codons = rename_regions(merged_start_codons, gene_id)
    merged_stop_codons = merge_regions(db, stop_codons)
    renamed_stop_codons = rename_regions(merged_stop_codons, gene_id)
    
    start_codon_bed += create_bed(renamed_start_codons)    
    stop_codon_bed += create_bed(renamed_stop_codons)

    
start_codon_bedtool = pybedtools.BedTool(start_codon_bed, from_string=True)
stop_codon_bedtool = pybedtools.BedTool(stop_codon_bed, from_string=True)
start_codon_bedtool.remove_invalid().sort().saveas('{}.start_codon.bed'.format(prefix))
stop_codon_bedtool.remove_invalid().sort().saveas('{}.stop_codon.bed'.format(prefix))


Out[14]:
<BedTool(/home/cmb-panasas2/skchoudh/genomes/S_cerevisiae/annotation/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.gffutils.stop_codon.bed)>

In [15]:
## TSS
polyA_sites_bed = ''
tss_sites_bed = ''
for gene_id in get_gene_list(gene_dict):
    tss_sites = []
    polyA_sites = []
    for transcript in db.children(gene_id, featuretype='transcript'):
        start_t = copy.deepcopy(transcript)
        stop_t = copy.deepcopy(transcript)
        
        start_t.stop = start_t.start + 1
        
        stop_t.start = stop_t.stop
        
        if transcript.strand == '-':
            start_t, stop_t = stop_t, start_t
        polyA_sites.append(start_t)
        tss_sites.append(stop_t)
    merged_polyA_sites = merge_regions(db, polyA_sites)
    renamed_polyA_sites = rename_regions(merged_polyA_sites, gene_id)    
    merged_tss_sites = merge_regions(db, tss_sites)
    renamed_tss_sites = rename_regions(merged_tss_sites, gene_id)
    polyA_sites_bed += create_bed(renamed_polyA_sites)    
    tss_sites_bed += create_bed(renamed_tss_sites)

polyA_sites_bedtool = pybedtools.BedTool(polyA_sites_bed, from_string=True)
tss_sites_bedtool = pybedtools.BedTool(tss_sites_bed, from_string=True)
polyA_sites_bedtool.remove_invalid().sort().saveas('{}.polyA_sites.bed'.format(prefix))
tss_sites_bedtool.remove_invalid().sort().saveas('{}.tss_sites.bed'.format(prefix))


Out[15]:
<BedTool(/home/cmb-panasas2/skchoudh/genomes/S_cerevisiae/annotation/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.gffutils.tss_sites.bed)>

In [16]:
tss = tsses(db, as_bed6=True, merge_overlapping=True)
tss.remove_invalid().sort().saveas('{}.tss_temp.bed'.format(prefix))
promoter = tss.slop(l=1000, r=1000, s=True, g=chrsizes)
promoter.remove_invalid().sort().saveas('{}.promoter.1000.bed'.format(prefix))


Out[16]:
<BedTool(/home/cmb-panasas2/skchoudh/genomes/S_cerevisiae/annotation/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.gffutils.promoter.1000.bed)>

In [17]:
for l in [1000, 2000, 3000, 4000, 5000]:
    promoter = tss.slop(l=l, r=l, s=True, g=chrsizes)
    promoter.remove_invalid().sort().saveas('{}.promoter.{}.bed'.format(prefix, l))

In [ ]: