In [1]:
import matplotlib
import Bio.SeqIO
import Bio.Restriction
import scipy
import matplotlib.pyplot as plt
%matplotlib inline
import math
import random
import numpy as np
In [2]:
def sim(mol):
"Return a simulated set of fragment sizes in bp for a Bio.SeqIO seq"
sites = [biosite - 1 for biosite in Bio.Restriction.XhoI.search(mol)]
sites = [site for site in sites if random.randint(1,5) <= 4] # sim missing sites
allsites = [0] + sites + [len(mol)]
# TODO add random break sites
sizes = [y - x for x,y in zip(allsites[:-1], allsites[1:])]
sigma = .58
sizes = [size + random.gauss(0, math.sqrt(size*sigma**2)) for size in sizes]
sizes = [size/1000.0 for size in sizes] # convert to kb
sizes = [size for size in sizes if size > .5]
error_free_sizes = [size for size in sizes]
sizes = [size for size in sizes if random.randint(1,5) <= 4] # sim missing sites
return [sizes] + [error_free_sizes] + [sites]
In [ ]: