In [14]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
import numpy as np
import beadpy
import pandas as pd

In [483]:
%matplotlib notebook

In [664]:
def trajectory_simulator(pre_duration = 250, #Mean event start time
                         pre_sigma = 50, #Sigma of event start time distribution
                         post_duration = 250, #The bead stays on for this long at the end of the trajectory
                         mean_duration = 100, #Mean event duration
                         min_duration = 10, #Minimum event duration
                         mean_rate = 500, #Mean rate (distance units/timestep)
                         rate_sigma = 50, #Sigma of the rate distribution
                         noise_sigma = 500, #Mean sigma for the bead movement
                         noise_sigma_sigma = 100, #Sigma of the noise sigma distribution
                         pause_prob = 0.001, #Probability of entering a pause in a given timestep
                         pause_duration_prob = 0.2, #Probability of remaining paused in a given timestep once a pause has begun.
                         rate_change_prob = 0.1, #Probablity that the rate will change in a given timestep
                         DNA_length = 15000, #Length of the DNA - a hard limit on the event length
                         trajectory_number = 0):
    length = int(np.random.exponential(mean_duration)) #Length is drawn from an exponential distribution.
    while length < min_duration:
        length = int(np.random.exponential(mean_duration)) #The length should be at least a certain value.
        
    current_rate = 0
    pre = int(np.random.normal(loc=pre_duration, scale = pre_sigma))
    post = post_duration
    rate = 0
    ratesequence = [0]*pre
    noise_sigmaval = int(np.random.normal(loc=noise_sigma, scale = noise_sigma_sigma))
    position = [0]*pre
    nucleotides = []
    current_position = 0
    for i in range(0,pre):
        nucleotides.append(float(position[i]+np.random.normal(loc=0.0, scale = noise_sigmaval)))
    for i in range(0,length):
        randomnumber = random.random() #generate a random float between 0 and 1
        if i == 0: #Start the event
            rate =  np.random.normal(loc=mean_rate, scale = rate_sigma)            
        elif not rate == 0: #When during an event/no pause.
            if (randomnumber <= pause_prob): #Start a pause.
                rate = 0
            elif (randomnumber > pause_prob) & (randomnumber <= (pause_prob + rate_change_prob)): #Change the rate
                rate = np.random.normal(loc=mean_rate, scale = rate_sigma)
            else: #No rate change
                rate = rate #just FYI!         
        elif (rate == 0) & (not i ==0): #When in a pause.
            if (randomnumber < (1- pause_duration_prob)): #End the pause.
                rate = np.random.normal(loc=mean_rate, scale = rate_sigma)
            else:
                rate = 0 #Continue the pause.
        ratesequence.append(rate)
        current_position = current_position + rate
        position.append(current_position)
        nucleotides.append(float(current_position+np.random.normal(loc=0.0, scale = noise_sigmaval)))
        if current_position > DNA_length:
            length = i
            break
    for i in range(0,post):
        ratesequence.append(0)
        position.append(current_position)
        nucleotides.append(float(current_position+np.random.normal(loc=0.0, scale = noise_sigmaval)))
    time = range(0,len(nucleotides))
    results = pd.DataFrame({'time' : time,
                        'nucleotides' : nucleotides,
                           'rate' : ratesequence,
                           'position' : position})
    results['trajectory'] = trajectory_number
    return results

In [733]:
test = trajectory_simulator(pre_duration = 300,
                         pre_sigma = 5,
                         post_duration = 250, 
                         mean_duration = 100,
                         min_duration = 10,
                         mean_rate = 80,
                         rate_sigma = 30,
                         noise_sigma = 100,
                         noise_sigma_sigma = 20,
                         pause_prob = 0.1,
                         pause_duration_prob = 0.9,
                         rate_change_prob = 0.01,
                         DNA_length = 15000,
                         trajectory_number = 0)
exampletrajseg = beadpy.trajectory_plotter(test, 0, method = ('auto', 'whole'), 
                                           sigma_start = 10, sigma_end = 250,  
                                           eventregion = (200,500), 
                                           segmenttable = 0)



In [734]:
exampletrajseg


Out[734]:
rate intercept x1 x2 y1 y2 displacement duration trajectory
0 0.0 -7.0 0.0 308.0 -7.0 -7.1 -0.1 308.0 0.0
1 36.9 -11310.0 308.0 320.0 59.8 502.7 443.0 12.0 0.0
2 9.9 -2234.0 320.0 360.0 938.2 1334.7 396.5 40.0 0.0
3 1.3 1041.0 360.0 400.0 1525.0 1578.8 53.8 40.0 0.0
4 24.3 -7998.0 400.0 418.0 1725.7 2163.3 437.6 18.0 0.0
5 20.4 -6515.0 418.0 439.0 1994.7 2422.3 427.5 21.0 0.0
6 93.9 -38750.0 439.0 445.0 2486.8 3050.4 563.6 6.0 0.0
7 31.2 -10685.0 445.0 461.0 3181.9 3680.5 498.6 16.0 0.0
8 150.1 -65695.0 461.0 465.0 3488.7 4089.0 600.3 4.0 0.0
9 6.3 1119.0 465.0 500.0 4033.3 4252.6 219.3 35.0 0.0
10 58.4 -24929.0 500.0 514.0 4265.2 5082.7 817.4 14.0 0.0
11 12.5 -1513.0 514.0 519.0 4914.7 4977.2 62.5 5.0 0.0
12 106.8 -50394.0 519.0 533.0 5025.8 6520.7 1495.0 14.0 0.0
13 0.0 6590.0 533.0 797.0 6613.2 6624.6 11.4 264.0 0.0

Compare the table of actual changepoints below (ignoring rate changes of less than 10) with the above table of the discovered changepoints above.


In [735]:
changepoints = []
changepoint_values = []
duration = []
for i in test.time[1:]:
    if abs(test.rate[i] - test.rate[i-1]) > 10:
        changepoints.append(i)
        changepoint_values.append(test.rate[i])
for i in range(0, (len(changepoints) - 1)):
    duration.append(changepoints[i+1]-changepoints[i])
duration.append(0)
cpts = pd.DataFrame({'changepoint' : changepoints,
                    'rate' : changepoint_values,
                    'duration' : duration})
cpts


Out[735]:
changepoint duration rate
0 306 3 70.277313
1 309 5 0.000000
2 314 12 76.769973
3 326 23 0.000000
4 349 1 127.439888
5 350 8 0.000000
6 358 8 39.171484
7 366 34 0.000000
8 400 7 59.906436
9 407 15 0.000000
10 422 10 33.587825
11 432 6 0.000000
12 438 14 84.763355
13 452 10 0.000000
14 462 6 97.203649
15 468 26 0.000000
16 494 4 69.152155
17 498 6 0.000000
18 504 8 77.151217
19 512 8 0.000000
20 520 15 108.943335
21 535 0 0.000000

Simulation of a combined phi29 + E. coli experiment

Generate a large results table.


In [700]:
phi29results = pd.DataFrame()
for j in range(0,1000):
    temp = trajectory_simulator(pre_duration = 300,
                         pre_sigma = 20,
                         post_duration = 250, 
                         mean_duration = 100,
                         min_duration = 10,
                         mean_rate = 80,
                         rate_sigma = 40,
                         noise_sigma = 100,
                         noise_sigma_sigma = 20,
                         pause_prob = 0.1,
                         pause_duration_prob = 0.5,
                         rate_change_prob = 0.01,
                         DNA_length = 15000,
                         trajectory_number = j)
    phi29results = phi29results.append(temp)

In [699]:
coliresults = pd.DataFrame()
for j in range(1001,2000):
    temp = trajectory_simulator(pre_duration = 300,
                         pre_sigma = 20,
                         post_duration = 250, 
                         mean_duration = 150,
                         min_duration = 10,
                         mean_rate = 442,
                         rate_sigma = 198,
                         noise_sigma = 100,
                         noise_sigma_sigma = 20,
                         pause_prob = 0.1,
                         pause_duration_prob = 0.7,
                         rate_change_prob = 0.05,
                         DNA_length = 15000,
                         trajectory_number = j)
    coliresults = coliresults.append(temp)

In [701]:
results = pd.concat([phi29results,coliresults])

In [702]:
combosegments = beadpy.segment_finder(results, method = 'auto', sigma_start=10, sigma_end=200)

In [703]:
beadpy.segmentplotter(combosegments,1000,-2000,15000, 2, 10)


Out[703]:
<matplotlib.axes._subplots.AxesSubplot at 0xc24d1390>

In [719]:
beadpy.ratehist(combosegments,0, 1100, 40, 'displacement')