In [1]:
! pip install -e ../
Now, we'll import the two libraries.
In [2]:
from Bio import SeqIO
from CAI import CAI, relative_adaptiveness
from Bio.SeqUtils import CodonUsage
We're going to use the highly expressed genes of E. coli for our reference set as well as a test set of 100 3000bp CDSs generated from the Sequence Manipulation Site.
In [3]:
reference = [str(seq.seq) for seq in SeqIO.parse("ecoli.heg.fasta", "fasta")]
sequence = [str(seq.seq) for seq in SeqIO.parse("test.fasta", "fasta")]
In [4]:
bp = CodonUsage.CodonAdaptationIndex()
bp.generate_index("ecoli.heg.fasta")
%timeit [bp.cai_for_gene(seq) for seq in sequence]
In [5]:
weights = relative_adaptiveness(sequences=sequence)
%timeit [CAI(seq, weights=weights) for seq in sequence]