In [1]:
import matplotlib
import Bio.SeqIO
import Bio.Restriction
import scipy
import matplotlib.pyplot as plt
%matplotlib inline
import math
import random
import numpy as np

In [2]:
def sim(mol):
    "Return a simulated set of fragment sizes in bp for a Bio.SeqIO seq"
    sites = [biosite - 1 for biosite in Bio.Restriction.XhoI.search(mol)]
    sites = [site for site in sites if random.randint(1,5) <= 4] # sim missing sites
    allsites = [0] + sites + [len(mol)]
    # TODO add random break sites
    sizes = [y - x for x,y in zip(allsites[:-1], allsites[1:])]    
    sigma = .58
    sizes = [size + random.gauss(0, math.sqrt(size*sigma**2)) for size in sizes]
    sizes = [size/1000.0 for size in sizes] # convert to kb
    sizes = [size for size in sizes if size > .5]
    error_free_sizes = [size for size in sizes]
    sizes = [size for size in sizes if random.randint(1,5) <= 4] # sim missing sites
    return [sizes] + [error_free_sizes] + [sites]

In [ ]: