notebook.community

Edit and run



In [1]:

    
from collections import defaultdict, OrderedDict
import warnings
import logging
import gffutils
import pybedtools
import pandas as pd
import copy
import re
from gffutils.pybedtools_integration import tsses

logging.basicConfig(level=logging.INFO)



In [2]:

    
gtf = '/home/cmb-panasas2/skchoudh/genomes/drosophila_melanogaster_BDGP6/annotation/Drosophila_melanogaster.BDGP6.91.gtf'
gtf_db = '/home/cmb-panasas2/skchoudh/genomes/drosophila_melanogaster_BDGP6/annotation/Drosophila_melanogaster.BDGP6.91.gtf.db'
prefix = '/home/cmb-panasas2/skchoudh/genomes/drosophila_melanogaster_BDGP6/annotation/Drosophila_melanogaster.BDGP6.91.gffutils'
chrsizes = '/home/cmb-panasas2/skchoudh/genomes/drosophila_melanogaster_BDGP6/fasta/Drosophila_melanogaster.BDGP6.dna.toplevel.sizes'



In [3]:

    
def create_gene_dict(db):
    '''
    Store each feature line db.all_features() as a dict of dicts
    '''
    gene_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
    for line_no, feature in enumerate(db.all_features()):
        gene_ids = feature.attributes['gene_id']
        feature_type = feature.featuretype
        if feature_type == 'gene':
            if len(gene_ids)!=1:
                logging.warning('Found multiple gene_ids on line {} in gtf'.format(line_no))
                break
            else:
                gene_id = gene_ids[0]
                gene_dict[gene_id]['gene'] = feature
        else:
            transcript_ids = feature.attributes['transcript_id']

            for gene_id in gene_ids:
                for transcript_id in transcript_ids:
                    gene_dict[gene_id][transcript_id][feature_type].append(feature)
    return gene_dict



In [4]:

    
db = gffutils.create_db(gtf, dbfn=gtf_db, disable_infer_genes=True, disable_infer_transcripts=True, merge_strategy='merge', force=True)
db = gffutils.FeatureDB(gtf_db, keep_order=True)
gene_dict = create_gene_dict(db)



In [5]:

    
for x in db.featuretypes():
    print(x)









    



CDS
Selenocysteine
exon
five_prime_utr
gene
start_codon
three_prime_utr
transcript



In [6]:

    
def get_gene_list(gene_dict):
    return list(set(gene_dict.keys()))


    
def create_bed(regions, bedtype='0'):
    '''Create bed from list of regions
    bedtype: 0 or 1
        0-Based or 1-based coordinate of the BED
    '''
    bedstr = ''
    for region in regions:
        assert len(region.attributes['gene_id']) == 1
        ## GTF start is 1-based, so shift by one while writing 
        ## to 0-based BED format
        if bedtype == '0':
            start = region.start - 1
        else:
            start = region.start
        bedstr += '{}\t{}\t{}\t{}\t{}\t{}\n'.format(region.chrom,
                                             start,
                                             region.stop,
                                             re.sub('\.\d+', '', region.attributes['gene_id'][0]),
                                             '.',
                                             region.strand)
    return bedstr

def rename_regions(regions, gene_id):
    regions = list(regions)
    if len(regions) == 0:
        return []
    for region in regions:
        region.attributes['gene_id'] = gene_id
    return regions

def merge_regions(db, regions):
    if len(regions) == 0:
        return []
    merged = db.merge(sorted(list(regions), key=lambda x: x.start))
    return merged

def merge_regions_nostrand(db, regions):
    if len(regions) == 0:
        return []
    merged = db.merge(sorted(list(regions), key=lambda x: x.start), ignore_strand=True)
    return merged



In [7]:

    
utr5_bed = ''
utr3_bed = ''
gene_bed = ''
exon_bed = ''
intron_bed = ''
start_codon_bed = ''
stop_codon_bed = ''
cds_bed = ''

gene_list = []

for gene_id in get_gene_list(gene_dict):
    gene_list.append(gene_dict[gene_id]['gene'])
    
    utr5_regions, utr3_regions = [], []
    exon_regions, intron_regions = [], []
    star_codon_regions, stop_codon_regions = [], []
    cds_regions = []
    
    for feature in gene_dict[gene_id].keys():
        if feature == 'gene':
            continue
        cds = list(gene_dict[gene_id][feature]['CDS'])
        exons = list(gene_dict[gene_id][feature]['exon'])
        merged_exons = merge_regions(db, exons)
        introns = db.interfeatures(merged_exons)
        #utr5_region, utr3_region = get_UTR_regions(gene_dict, gene_id, feature, cds)
        utr5_region = list(gene_dict[gene_id][feature]['five_prime_utr'])
        utr3_region = list(gene_dict[gene_id][feature]['three_prime_utr'])
        utr5_regions += utr5_region
        utr3_regions += utr3_region
        exon_regions += exons
        intron_regions += introns
        cds_regions += cds
        
    merged_utr5 = merge_regions(db, utr5_regions)
    renamed_utr5 = rename_regions(merged_utr5, gene_id)
    
    merged_utr3 = merge_regions(db, utr3_regions)
    renamed_utr3 = rename_regions(merged_utr3, gene_id)
    
    merged_exons = merge_regions(db, exon_regions)
    renamed_exons = rename_regions(merged_exons, gene_id)
    
    merged_introns = merge_regions(db, intron_regions)
    renamed_introns = rename_regions(merged_introns, gene_id)
    
    merged_cds = merge_regions(db, cds_regions)
    renamed_cds = rename_regions(merged_cds, gene_id)
    
    utr3_bed += create_bed(renamed_utr3)
    utr5_bed += create_bed(renamed_utr5)
    exon_bed += create_bed(renamed_exons)
    intron_bed += create_bed(renamed_introns)
    cds_bed += create_bed(renamed_cds)
    
    
gene_bed = create_bed(gene_list)
gene_bedtool = pybedtools.BedTool(gene_bed, from_string=True)
utr5_bedtool = pybedtools.BedTool(utr5_bed, from_string=True)
utr3_bedtool = pybedtools.BedTool(utr3_bed, from_string=True)
exon_bedtool = pybedtools.BedTool(exon_bed, from_string=True)
intron_bedtool = pybedtools.BedTool(intron_bed, from_string=True)
cds_bedtool = pybedtools.BedTool(cds_bed, from_string=True)

gene_bedtool.remove_invalid().sort().saveas('{}.genes.bed'.format(prefix))
utr5_bedtool.remove_invalid().sort().saveas('{}.UTR5.bed'.format(prefix))
utr3_bedtool.remove_invalid().sort().saveas('{}.UTR3.bed'.format(prefix))
exon_bedtool.remove_invalid().sort().saveas('{}.exon.bed'.format(prefix))
intron_bedtool.remove_invalid().sort().saveas('{}.intron.bed'.format(prefix))
cds_bedtool.remove_invalid().sort().saveas('{}.cds.bed'.format(prefix))









    Out[7]:





<BedTool(/home/cmb-panasas2/skchoudh/genomes/drosophila_melanogaster_BDGP6/annotation/Drosophila_melanogaster.BDGP6.91.gffutils.cds.bed)>



In [8]:

    
for gene_id in get_gene_list(gene_dict):
    start_codons = []
    stop_codons = []
    for start_codon in db.children(gene_id, featuretype='start_codon'):
        ## 1 -based stop
        ## 0-based start handled while converting to bed
        start_codon.stop = start_codon.start
        start_codons.append(start_codon)
    for stop_codon in db.children(gene_id, featuretype='stop_codon'):
        stop_codon.start = stop_codon.stop
        stop_codon.stop = stop_codon.stop+1
        stop_codons.append(stop_codon)
    merged_start_codons = merge_regions(db, start_codons)
    renamed_start_codons = rename_regions(merged_start_codons, gene_id)
    merged_stop_codons = merge_regions(db, stop_codons)
    renamed_stop_codons = rename_regions(merged_stop_codons, gene_id)
    
    start_codon_bed += create_bed(renamed_start_codons)    
    stop_codon_bed += create_bed(renamed_stop_codons)

    
start_codon_bedtool = pybedtools.BedTool(start_codon_bed, from_string=True)
stop_codon_bedtool = pybedtools.BedTool(stop_codon_bed, from_string=True)
start_codon_bedtool.remove_invalid().sort().saveas('{}.start_codon.bed'.format(prefix))
stop_codon_bedtool.remove_invalid().sort().saveas('{}.stop_codon.bed'.format(prefix))









    Out[8]:





<BedTool(/home/cmb-panasas2/skchoudh/genomes/drosophila_melanogaster_BDGP6/annotation/Drosophila_melanogaster.BDGP6.91.gffutils.stop_codon.bed)>



In [9]:

    
## TSS
polyA_sites_bed = ''
tss_sites_bed = ''
for gene_id in get_gene_list(gene_dict):
    tss_sites = []
    polyA_sites = []
    for transcript in db.children(gene_id, featuretype='transcript'):
        start_t = copy.deepcopy(transcript)
        stop_t = copy.deepcopy(transcript)
        
        start_t.stop = start_t.start + 1
        
        stop_t.start = stop_t.stop
        
        if transcript.strand == '-':
            start_t, stop_t = stop_t, start_t
        polyA_sites.append(start_t)
        tss_sites.append(stop_t)
    merged_polyA_sites = merge_regions(db, polyA_sites)
    renamed_polyA_sites = rename_regions(merged_polyA_sites, gene_id)    
    merged_tss_sites = merge_regions(db, tss_sites)
    renamed_tss_sites = rename_regions(merged_tss_sites, gene_id)
    polyA_sites_bed += create_bed(renamed_polyA_sites)    
    tss_sites_bed += create_bed(renamed_tss_sites)

polyA_sites_bedtool = pybedtools.BedTool(polyA_sites_bed, from_string=True)
tss_sites_bedtool = pybedtools.BedTool(tss_sites_bed, from_string=True)
polyA_sites_bedtool.remove_invalid().sort().saveas('{}.polyA_sites.bed'.format(prefix))
tss_sites_bedtool.remove_invalid().sort().saveas('{}.tss_sites.bed'.format(prefix))









    Out[9]:





<BedTool(/home/cmb-panasas2/skchoudh/genomes/drosophila_melanogaster_BDGP6/annotation/Drosophila_melanogaster.BDGP6.91.gffutils.tss_sites.bed)>



In [10]:

    
tRNA_sites = []
tRNA_bed = ''
for gene_id in get_gene_list(gene_dict):
    for transcript in db.children(gene_id, featuretype='transcript'):
        if 'tRNA' in transcript.attributes['gene_biotype'] or 'Mt_tRNA' in transcript.attributes['transcript_biotype']:
            tRNA_sites.append(transcript)
    #merged_tRNA_sites = merge_regions_nostrand(db, tRNA_sites)
    #renamed_tRNA_sites = rename_regions(merged_tRNA_sites, gene_id)
    tRNA_bed += create_bed(tRNA_sites)

tRNA_bed = '\n'.join(list(OrderedDict.fromkeys(tRNA_bed.split('\n'))))

tRNA_sites_bedtool = pybedtools.BedTool(tRNA_bed, from_string=True)
tRNA_sites_bedtool.remove_invalid().sort().saveas('{}.tRNA_sites.bed'.format(prefix))









    Out[10]:





<BedTool(/home/cmb-panasas2/skchoudh/genomes/drosophila_melanogaster_BDGP6/annotation/Drosophila_melanogaster.BDGP6.91.gffutils.tRNA_sites.bed)>



In [11]:

    
tRNA_sites_bedtool.to_dataframe()









    Out[11]:







  
    
      
      chrom
      start
      end
      name
      score
      strand
    
  
  
    
      0
      2R
      6164875
      6164948
      FBgn0011888
      .
      -
    
    
      1
      2R
      6162680
      6162753
      FBgn0011889
      .
      -
    
    
      2
      2R
      6166150
      6166224
      FBgn0011880
      .
      +
    
    
      3
      2R
      13430281
      13430355
      FBgn0011881
      .
      -
    
    
      4
      2R
      13430983
      13431057
      FBgn0011882
      .
      +
    
    
      5
      2R
      13431284
      13431358
      FBgn0011883
      .
      +
    
    
      6
      2R
      13431776
      13431850
      FBgn0011884
      .
      +
    
    
      7
      2R
      13432092
      13432166
      FBgn0011885
      .
      +
    
    
      8
      2R
      6165983
      6166056
      FBgn0011887
      .
      +
    
    
      9
      3L
      1379933
      1380005
      FBgn0052330
      .
      +
    
    
      10
      3R
      18628466
      18628538
      FBgn0051228
      .
      -
    
    
      11
      3R
      18108636
      18108708
      FBgn0051242
      .
      +
    
    
      12
      3R
      17646367
      17646440
      FBgn0051579
      .
      +
    
    
      13
      2R
      19414844
      19414917
      FBgn0050225
      .
      +
    
    
      14
      X
      20025098
      20025170
      FBgn0052826
      .
      +
    
    
      15
      3L
      1925640
      1925712
      FBgn0052312
      .
      +
    
    
      16
      2R
      20657890
      20657962
      FBgn0050155
      .
      +
    
    
      17
      X
      14104648
      14104730
      FBgn0011981
      .
      +
    
    
      18
      X
      14025108
      14025190
      FBgn0011980
      .
      -
    
    
      19
      X
      14117728
      14117810
      FBgn0011983
      .
      -
    
    
      20
      X
      14025565
      14025647
      FBgn0011982
      .
      -
    
    
      21
      2L
      3169581
      3169663
      FBgn0011985
      .
      +
    
    
      22
      2L
      3173002
      3173084
      FBgn0011984
      .
      -
    
    
      23
      2R
      11357563
      11357650
      FBgn0011987
      .
      -
    
    
      24
      3L
      5342083
      5342165
      FBgn0011986
      .
      +
    
    
      25
      3L
      13211550
      13211622
      FBgn0052128
      .
      +
    
    
      26
      3L
      13211213
      13211285
      FBgn0052129
      .
      +
    
    
      27
      3L
      13212364
      13212436
      FBgn0052126
      .
      +
    
    
      28
      3L
      13214108
      13214233
      FBgn0052127
      .
      +
    
    
      29
      2L
      2010868
      2010940
      FBgn0051940
      .
      -
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      282
      2R
      9661224
      9661296
      FBgn0011921
      .
      +
    
    
      283
      3L
      14674343
      14674415
      FBgn0011928
      .
      +
    
    
      284
      2R
      19414597
      19414679
      FBgn0028981
      .
      +
    
    
      285
      3R
      6822962
      6823035
      FBgn0011904
      .
      -
    
    
      286
      3L
      16349916
      16349989
      FBgn0052357
      .
      +
    
    
      287
      2R
      13430694
      13430815
      FBgn0011909
      .
      -
    
    
      288
      2L
      8290726
      8290799
      FBgn0011902
      .
      +
    
    
      289
      3R
      12131355
      12131427
      FBgn0011905
      .
      +
    
    
      290
      3R
      17631041
      17631114
      FBgn0051569
      .
      -
    
    
      291
      3R
      17645306
      17645379
      FBgn0051568
      .
      -
    
    
      292
      2R
      14984326
      14984408
      FBgn0050241
      .
      -
    
    
      293
      2R
      16220051
      16220123
      FBgn0050240
      .
      +
    
    
      294
      3R
      18108992
      18109064
      FBgn0051518
      .
      +
    
    
      295
      mitochondrion_genome
      3011
      3077
      FBgn0013699
      .
      +
    
    
      296
      mitochondrion_genome
      12669
      12734
      FBgn0013698
      .
      -
    
    
      297
      mitochondrion_genome
      3767
      3838
      FBgn0013697
      .
      +
    
    
      298
      mitochondrion_genome
      0
      65
      FBgn0013696
      .
      +
    
    
      299
      mitochondrion_genome
      8140
      8206
      FBgn0013695
      .
      -
    
    
      300
      mitochondrion_genome
      5542
      5607
      FBgn0013694
      .
      +
    
    
      301
      mitochondrion_genome
      6343
      6408
      FBgn0013693
      .
      -
    
    
      302
      mitochondrion_genome
      6258
      6325
      FBgn0013692
      .
      +
    
    
      303
      mitochondrion_genome
      3839
      3906
      FBgn0013691
      .
      +
    
    
      304
      mitochondrion_genome
      1321
      1383
      FBgn0013690
      .
      -
    
    
      305
      2L
      8481776
      8481848
      FBgn0051896
      .
      +
    
    
      306
      2L
      8570887
      8570960
      FBgn0051895
      .
      +
    
    
      307
      2L
      8683450
      8683522
      FBgn0051890
      .
      +
    
    
      308
      2R
      21161062
      21161134
      FBgn0050206
      .
      -
    
    
      309
      2R
      19728187
      19728259
      FBgn0050454
      .
      -
    
    
      310
      3R
      14044524
      14044606
      FBgn0011971
      .
      -
    
    
      311
      X
      14103362
      14103444
      FBgn0011977
      .
      -
    
  

312 rows × 6 columns



In [12]:

    
rRNA_sites = []
rRNA_bed = ''
for gene_id in get_gene_list(gene_dict):
    for transcript in db.children(gene_id, featuretype='transcript'):
        if 'rRNA' in transcript.attributes['gene_biotype']:
            rRNA_sites.append(transcript)
    #renamed_rRNA_sites = rename_regions(rRNA_sites, gene_id)
    rRNA_bed += create_bed(rRNA_sites)
rRNA_bed = '\n'.join(list(OrderedDict.fromkeys(rRNA_bed.split('\n'))))

rRNA_sites_bedtool = pybedtools.BedTool(rRNA_bed, from_string=True)
rRNA_sites_bedtool.remove_invalid().sort().saveas('{}.rRNA_sites.bed'.format(prefix))









    Out[12]:





<BedTool(/home/cmb-panasas2/skchoudh/genomes/drosophila_melanogaster_BDGP6/annotation/Drosophila_melanogaster.BDGP6.91.gffutils.rRNA_sites.bed)>



In [13]:

    
rRNA_sites_bedtool. to_dataframe()









    Out[13]:







  
    
      
      chrom
      start
      end
      name
      score
      strand
    
  
  
    
      0
      2R
      19731042
      19731177
      FBgn0053448
      .
      -
    
    
      1
      2R
      19730669
      19730804
      FBgn0053449
      .
      -
    
    
      2
      2R
      19731767
      19731902
      FBgn0053446
      .
      -
    
    
      3
      2R
      19731408
      19731543
      FBgn0053447
      .
      -
    
    
      4
      2R
      19732549
      19732684
      FBgn0053444
      .
      -
    
    
      5
      2R
      19733287
      19733422
      FBgn0053442
      .
      -
    
    
      6
      2R
      19732925
      19733060
      FBgn0053443
      .
      -
    
    
      7
      2R
      19734036
      19734171
      FBgn0053440
      .
      -
    
    
      8
      2R
      19733670
      19733805
      FBgn0053441
      .
      -
    
    
      9
      2R
      19758361
      19758496
      FBgn0053374
      .
      -
    
    
      10
      2R
      19757992
      19758127
      FBgn0053375
      .
      -
    
    
      11
      2R
      19759841
      19759976
      FBgn0053370
      .
      -
    
    
      12
      2R
      19759100
      19759235
      FBgn0053372
      .
      -
    
    
      13
      2R
      19758734
      19758869
      FBgn0053373
      .
      -
    
    
      14
      2R
      19764288
      19764423
      FBgn0053358
      .
      -
    
    
      15
      2R
      19764647
      19764782
      FBgn0053357
      .
      -
    
    
      16
      2R
      19765774
      19765909
      FBgn0053354
      .
      -
    
    
      17
      2R
      19765408
      19765543
      FBgn0053355
      .
      -
    
    
      18
      2R
      19766143
      19766278
      FBgn0053353
      .
      -
    
    
      19
      2R
      19756509
      19756644
      FBgn0053379
      .
      -
    
    
      20
      2R
      19753557
      19753692
      FBgn0053387
      .
      -
    
    
      21
      2R
      19753926
      19754061
      FBgn0053386
      .
      -
    
    
      22
      2R
      19749142
      19749277
      FBgn0053399
      .
      -
    
    
      23
      2R
      19751726
      19751861
      FBgn0053392
      .
      -
    
    
      24
      2R
      19751357
      19751492
      FBgn0053393
      .
      -
    
    
      25
      2R
      19752095
      19752229
      FBgn0053391
      .
      -
    
    
      26
      2R
      19750250
      19750385
      FBgn0053396
      .
      -
    
    
      27
      2R
      19749877
      19750012
      FBgn0053397
      .
      -
    
    
      28
      2R
      19750988
      19751123
      FBgn0053394
      .
      -
    
    
      29
      2R
      19750619
      19750754
      FBgn0053395
      .
      -
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      85
      2R
      19744719
      19744854
      FBgn0053411
      .
      -
    
    
      86
      2R
      19745095
      19745230
      FBgn0053410
      .
      -
    
    
      87
      X
      23278006
      23283731
      FBgn0267516
      .
      -
    
    
      88
      2R
      19747676
      19747811
      FBgn0053403
      .
      -
    
    
      89
      2R
      19748773
      19748908
      FBgn0053400
      .
      -
    
    
      90
      2R
      19748404
      19748539
      FBgn0053401
      .
      -
    
    
      91
      2R
      19746200
      19746335
      FBgn0053407
      .
      -
    
    
      92
      2R
      19729561
      19729696
      FBgn0053452
      .
      -
    
    
      93
      rDNA
      58804
      58834
      FBgn0267500
      .
      +
    
    
      94
      2R
      19763912
      19764047
      FBgn0053359
      .
      -
    
    
      95
      rDNA
      46204
      46327
      FBgn0267512
      .
      +
    
    
      96
      2R
      19738451
      19738586
      FBgn0053428
      .
      -
    
    
      97
      2R
      19741405
      19741540
      FBgn0053420
      .
      -
    
    
      98
      2R
      19740663
      19740798
      FBgn0053422
      .
      -
    
    
      99
      2R
      19740304
      19740439
      FBgn0053423
      .
      -
    
    
      100
      2R
      19739935
      19740070
      FBgn0053424
      .
      -
    
    
      101
      2R
      19739193
      19739328
      FBgn0053426
      .
      -
    
    
      102
      X
      23291699
      23291729
      FBgn0267524
      .
      +
    
    
      103
      2R
      19748042
      19748177
      FBgn0053402
      .
      -
    
    
      104
      2R
      19746573
      19746708
      FBgn0053406
      .
      -
    
    
      105
      2R
      19745827
      19745962
      FBgn0053408
      .
      -
    
    
      106
      rDNA
      55103
      59294
      FBgn0267506
      .
      +
    
    
      107
      rDNA
      66806
      74924
      FBgn0267507
      .
      +
    
    
      108
      rDNA
      70954
      74924
      FBgn0267504
      .
      +
    
    
      109
      rDNA
      42622
      49485
      FBgn0267505
      .
      +
    
    
      110
      rDNA
      70388
      70511
      FBgn0267502
      .
      +
    
    
      111
      rDNA
      70539
      70569
      FBgn0267503
      .
      +
    
    
      112
      rDNA
      67667
      69662
      FBgn0267501
      .
      +
    
    
      113
      X
      23280154
      23280277
      FBgn0250731
      .
      -
    
    
      114
      2R
      19757623
      19757758
      FBgn0053376
      .
      -
    
  

115 rows × 6 columns



In [ ]:

	chrom	start	end	name	score	strand
0	2R	6164875	6164948	FBgn0011888	.	-
1	2R	6162680	6162753	FBgn0011889	.	-
2	2R	6166150	6166224	FBgn0011880	.	+
3	2R	13430281	13430355	FBgn0011881	.	-
4	2R	13430983	13431057	FBgn0011882	.	+
5	2R	13431284	13431358	FBgn0011883	.	+
6	2R	13431776	13431850	FBgn0011884	.	+
7	2R	13432092	13432166	FBgn0011885	.	+
8	2R	6165983	6166056	FBgn0011887	.	+
9	3L	1379933	1380005	FBgn0052330	.	+
10	3R	18628466	18628538	FBgn0051228	.	-
11	3R	18108636	18108708	FBgn0051242	.	+
12	3R	17646367	17646440	FBgn0051579	.	+
13	2R	19414844	19414917	FBgn0050225	.	+
14	X	20025098	20025170	FBgn0052826	.	+
15	3L	1925640	1925712	FBgn0052312	.	+
16	2R	20657890	20657962	FBgn0050155	.	+
17	X	14104648	14104730	FBgn0011981	.	+
18	X	14025108	14025190	FBgn0011980	.	-
19	X	14117728	14117810	FBgn0011983	.	-
20	X	14025565	14025647	FBgn0011982	.	-
21	2L	3169581	3169663	FBgn0011985	.	+
22	2L	3173002	3173084	FBgn0011984	.	-
23	2R	11357563	11357650	FBgn0011987	.	-
24	3L	5342083	5342165	FBgn0011986	.	+
25	3L	13211550	13211622	FBgn0052128	.	+
26	3L	13211213	13211285	FBgn0052129	.	+
27	3L	13212364	13212436	FBgn0052126	.	+
28	3L	13214108	13214233	FBgn0052127	.	+
29	2L	2010868	2010940	FBgn0051940	.	-
...	...	...	...	...	...	...
282	2R	9661224	9661296	FBgn0011921	.	+
283	3L	14674343	14674415	FBgn0011928	.	+
284	2R	19414597	19414679	FBgn0028981	.	+
285	3R	6822962	6823035	FBgn0011904	.	-
286	3L	16349916	16349989	FBgn0052357	.	+
287	2R	13430694	13430815	FBgn0011909	.	-
288	2L	8290726	8290799	FBgn0011902	.	+
289	3R	12131355	12131427	FBgn0011905	.	+
290	3R	17631041	17631114	FBgn0051569	.	-
291	3R	17645306	17645379	FBgn0051568	.	-
292	2R	14984326	14984408	FBgn0050241	.	-
293	2R	16220051	16220123	FBgn0050240	.	+
294	3R	18108992	18109064	FBgn0051518	.	+
295	mitochondrion_genome	3011	3077	FBgn0013699	.	+
296	mitochondrion_genome	12669	12734	FBgn0013698	.	-
297	mitochondrion_genome	3767	3838	FBgn0013697	.	+
298	mitochondrion_genome	0	65	FBgn0013696	.	+
299	mitochondrion_genome	8140	8206	FBgn0013695	.	-
300	mitochondrion_genome	5542	5607	FBgn0013694	.	+
301	mitochondrion_genome	6343	6408	FBgn0013693	.	-
302	mitochondrion_genome	6258	6325	FBgn0013692	.	+
303	mitochondrion_genome	3839	3906	FBgn0013691	.	+
304	mitochondrion_genome	1321	1383	FBgn0013690	.	-
305	2L	8481776	8481848	FBgn0051896	.	+
306	2L	8570887	8570960	FBgn0051895	.	+
307	2L	8683450	8683522	FBgn0051890	.	+
308	2R	21161062	21161134	FBgn0050206	.	-
309	2R	19728187	19728259	FBgn0050454	.	-
310	3R	14044524	14044606	FBgn0011971	.	-
311	X	14103362	14103444	FBgn0011977	.	-

	chrom	start	end	name	score	strand
0	2R	19731042	19731177	FBgn0053448	.	-
1	2R	19730669	19730804	FBgn0053449	.	-
2	2R	19731767	19731902	FBgn0053446	.	-
3	2R	19731408	19731543	FBgn0053447	.	-
4	2R	19732549	19732684	FBgn0053444	.	-
5	2R	19733287	19733422	FBgn0053442	.	-
6	2R	19732925	19733060	FBgn0053443	.	-
7	2R	19734036	19734171	FBgn0053440	.	-
8	2R	19733670	19733805	FBgn0053441	.	-
9	2R	19758361	19758496	FBgn0053374	.	-
10	2R	19757992	19758127	FBgn0053375	.	-
11	2R	19759841	19759976	FBgn0053370	.	-
12	2R	19759100	19759235	FBgn0053372	.	-
13	2R	19758734	19758869	FBgn0053373	.	-
14	2R	19764288	19764423	FBgn0053358	.	-
15	2R	19764647	19764782	FBgn0053357	.	-
16	2R	19765774	19765909	FBgn0053354	.	-
17	2R	19765408	19765543	FBgn0053355	.	-
18	2R	19766143	19766278	FBgn0053353	.	-
19	2R	19756509	19756644	FBgn0053379	.	-
20	2R	19753557	19753692	FBgn0053387	.	-
21	2R	19753926	19754061	FBgn0053386	.	-
22	2R	19749142	19749277	FBgn0053399	.	-
23	2R	19751726	19751861	FBgn0053392	.	-
24	2R	19751357	19751492	FBgn0053393	.	-
25	2R	19752095	19752229	FBgn0053391	.	-
26	2R	19750250	19750385	FBgn0053396	.	-
27	2R	19749877	19750012	FBgn0053397	.	-
28	2R	19750988	19751123	FBgn0053394	.	-
29	2R	19750619	19750754	FBgn0053395	.	-
...	...	...	...	...	...	...
85	2R	19744719	19744854	FBgn0053411	.	-
86	2R	19745095	19745230	FBgn0053410	.	-
87	X	23278006	23283731	FBgn0267516	.	-
88	2R	19747676	19747811	FBgn0053403	.	-
89	2R	19748773	19748908	FBgn0053400	.	-
90	2R	19748404	19748539	FBgn0053401	.	-
91	2R	19746200	19746335	FBgn0053407	.	-
92	2R	19729561	19729696	FBgn0053452	.	-
93	rDNA	58804	58834	FBgn0267500	.	+
94	2R	19763912	19764047	FBgn0053359	.	-
95	rDNA	46204	46327	FBgn0267512	.	+
96	2R	19738451	19738586	FBgn0053428	.	-
97	2R	19741405	19741540	FBgn0053420	.	-
98	2R	19740663	19740798	FBgn0053422	.	-
99	2R	19740304	19740439	FBgn0053423	.	-
100	2R	19739935	19740070	FBgn0053424	.	-
101	2R	19739193	19739328	FBgn0053426	.	-
102	X	23291699	23291729	FBgn0267524	.	+
103	2R	19748042	19748177	FBgn0053402	.	-
104	2R	19746573	19746708	FBgn0053406	.	-
105	2R	19745827	19745962	FBgn0053408	.	-
106	rDNA	55103	59294	FBgn0267506	.	+
107	rDNA	66806	74924	FBgn0267507	.	+
108	rDNA	70954	74924	FBgn0267504	.	+
109	rDNA	42622	49485	FBgn0267505	.	+
110	rDNA	70388	70511	FBgn0267502	.	+
111	rDNA	70539	70569	FBgn0267503	.	+
112	rDNA	67667	69662	FBgn0267501	.	+
113	X	23280154	23280277	FBgn0250731	.	-
114	2R	19757623	19757758	FBgn0053376	.	-