In [1]:
import pandas as pd
import bitarray
In [2]:
with open( "../../d9539_asm_v1.2.fa") as fh:
genome = fh.readlines()[1:]
genome = "".join(genome).replace("\n","")
genome_length = len(genome)
print(genome_length)
In [3]:
cds_intervals = pd.read_csv("filt_orf_stats.csv")[["q_cds_start","q_cds_end"]]
#Transform intervals from 1-based coord to 0-based coord
cds_intervals = cds_intervals.apply(lambda row: (row[0]-1,row[1]-1),axis=1).tolist()
cds_intervals
Out[3]:
In [4]:
genome_pos = bitarray.bitarray(genome_length)
genome_pos.setall(False)
In [5]:
for pair in cds_intervals:
genome_pos[pair[0]:(pair[1]+1)] = True
In [6]:
#Coverage
cov = 100.0 * sum(genome_pos) / genome_length
print("Coverage: {}%".format( cov ) )
In [ ]: