In [2]:
from random import randint, uniform, choice
from Bio import SeqIO
Simple function introducting mutations in a sequence (lower bases) at a given frequency
In [3]:
def mutate_seq (seq, freq):
str= ""
for i in seq:
if uniform(0,1) <= freq:
str+=choice([b for b in ["a","t","c","g"] if b != i.lower()])
else:
str+=i.upper()
return str
In [4]:
mutate_seq ("CTCGATCGCTAGCATGCATCGTCGCATGCTCGATCAGCTAGCAGCATCAGCTAGCTGCATCAGTCA", 0.1)
Out[4]:
Example from a test dataset on the forward on reverse strand with increasing frequency of mutations
In [5]:
a = SeqIO.read("./data/test.fa", "fasta")
In [6]:
with open ("./data/sample.fa", "w") as fp:
for i in range(1, 51):
fp.write(">query_{:03d}%_mutation_forward\n".format(i))
start = randint (0, len(a.seq)-100)
fp.write("{}\n".format(mutate_seq(str(a.seq)[start:start+100], i/100.0)))
for i in range(1, 51):
fp.write(">query_{:03d}%_mutation_reverse\n".format(i))
start = randint (0, len(a.seq)-100)
fp.write("{}\n".format(mutate_seq(str(a.seq.reverse_complement())[start:start+100], i/100.0)))