In [37]:
from ggplot import *
import pandas as pd
import itertools
import scipy
import matplotlib.cm as cm
In [7]:
%pylab inline
In [8]:
def diffusion_simple(frag_len):
return 44.5 / frag_len
In [9]:
frag_lens = pd.DataFrame({'frag_lens' : range(100, 50000, 100)})
In [13]:
ggplot(frag_lens, aes(x='frag_lens')) +\
stat_function(fun=lambda x: 44 / x, color='blue') +\
stat_function(fun=lambda x: 44.5 / x, color='red') +\
stat_function(fun=lambda x: 45 / x, color='green') +\
geom_vline(xintercept=4000) +\
labs(x='fragment length', y='sigma of BD distribution')
Out[13]:
Notes:
In [133]:
frag_lens = range(4000, 50000, 1000)
sigma = [sqrt(44500.0 / x) for x in frag_lens]
data = pd.DataFrame({'x':np.arange(-10, 11)})
cols = cm.rainbow(np.linspace(0,1,len(frag_lens)))
p = ggplot(data, aes(x='x')) +\
labs(x='sigma: [%G+C]', y='total')
for sig,col in zip(sigma,cols):
p = p + stat_function(fun=dnorm, args={'mean':0.0, 'var':sig}, color=col)
p
Out[133]:
In [130]:
frag_lens = range(4000, 50000, 1000)
sigma = [sqrt(44500.0 / x) for x in frag_lens]
sigma = [x/100.0 * 0.098 for x in sigma]
data = pd.DataFrame({'x':np.arange(-0.015,0.016,0.001)})
cols = cm.rainbow(np.linspace(0,1,len(frag_lens)))
p = ggplot(data, aes(x='x')) +\
labs(x='sigma: [BD]', y='total')
for sig,col in zip(sigma,cols):
p = p + stat_function(fun=dnorm, args={'mean':0.0, 'var':sig}, color=col)
p
Out[130]:
In [129]:
frag_lens = range(1000, 14000, 1000)
sigma = [sqrt(44500.0 / x) for x in frag_lens]
data = pd.DataFrame({'x':np.arange(-20,21)})
cols = cm.rainbow(np.linspace(0,1,len(frag_lens)))
p = ggplot(data, aes(x='x')) +\
labs(x='Gaussian of homogenous G+C fragments, [%G+C]', y='total')
for sig,col in zip(sigma,cols):
p = p + stat_function(fun=dnorm, args={'mean':0.0, 'var':sig}, color=col)
p
Out[129]:
Notes:
In [144]:
frag_lens = [500, 1000, 4000, 10000, 50000]
sigma = [sqrt(44500.0 / x) for x in frag_lens]
GC = [50] * len(frag_lens)
data = pd.DataFrame({'x':np.arange(0,101)})
cols = cm.rainbow(np.linspace(0,1,len(frag_lens)))
p = ggplot(data, aes(x='x')) +\
labs(x='%GC', y='total') +\
theme_bw()
for i,BD in enumerate(GC):
p = p + stat_function(fun=dnorm, args={'mean':GC, 'var':sigma[i]}, color=cols[i])
p
Out[144]:
In [143]:
frag_lens = [500, 1000, 4000, 10000, 50000]
sigma = [sqrt(44500.0 / x) for x in frag_lens]
sigma = [x/100.0 * 0.098 for x in sigma]
GC = [0.5] * len(frag_lens)
BD = [x * 0.098 + 1.66 for x in GC]
data = pd.DataFrame({'x':np.arange(1.66,1.76,0.01)})
cols = cm.rainbow(np.linspace(0,1,len(frag_lens)))
p = ggplot(data, aes(x='x')) +\
labs(x='Buoyant density', y='total') +\
theme_bw()
for i,BD in enumerate(BD):
p = p + stat_function(fun=dnorm, args={'mean':BD, 'var':sigma[i]}, color=cols[i])
p
Out[143]:
In [178]:
GC_mean = 50
GC_sigma = 5
frag_lens = [1000, 4000, 10000, 50000, 2000000]
frag_len_sigma = [44500.0/x for x in frag_lens]
data = pd.DataFrame({'x':np.arange(0,101)})
cols = cm.rainbow(np.linspace(0,1,len(frag_lens)))
p = ggplot(data, aes(x='x')) +\
labs(x='%G+C', y='density') +\
theme_bw()
for i in xrange(len(frag_lens)):
sigma_total = GC_sigma + frag_len_sigma[i]
p = p + stat_function(fun=dnorm, args={'mean':GC_mean, 'var':sigma_total}, color=cols[i])
p
Out[178]:
In [182]:
# just 4kb vs 2000kb
GC_mean = 50
GC_sigma = 5
frag_lens = [4000,20000000]
frag_len_sigma = [44500.0/x for x in frag_lens]
data = pd.DataFrame({'x':np.arange(0,101)})
cols = cm.rainbow(np.linspace(0,1,len(frag_lens)))
p = ggplot(data, aes(x='x')) +\
labs(x='%G+C', y='density') +\
theme_bw()
for i in xrange(len(frag_lens)):
sigma_total = GC_sigma + frag_len_sigma[i]
p = p + stat_function(fun=dnorm, args={'mean':GC_mean, 'var':sigma_total}, color=cols[i])
p
Out[182]:
In [14]:
def diffusion(GC, frag_len):
if GC < 30:
return 44 / frag_len
elif GC > 70:
return 45 / frag_len
else:
return 44.5 / frag_len
In [27]:
# fragment GC as a normal distribution
n = 10000
frag_mean = 5000
frag_stdev = 1000
gc_vals = np.random.normal(loc=50, scale=10, size=n)
frag_lens = np.random.normal(loc=frag_mean, scale=frag_stdev, size=n)
In [28]:
# getting sigmas for each fragment
sigmas = itertools.starmap(diffusion, zip(gc_vals, frag_lens))
sigmas = [x for x in sigmas]
In [29]:
# sampling a GC value for each normal distribution with fragment
def GC_wDif(GC, sigma):
return np.random.normal(loc=GC, scale=sigma, size=1)
GC_wDiff = itertools.starmap(GC_wDif, zip(gc_vals, sigmas))
GC_wDiff = [x for x in GC_wDiff]
In [30]:
df = pd.DataFrame({'GC' : gc_vals, 'frag_lens' : frag_lens, 'GC_wDiff' : GC_wDiff})
In [31]:
# plotting
ggplot(df, aes(x='GC', y='GC_wDiff')) +\
stat_smooth()
Out[31]:
In [20]:
#x_vals = pd.DataFrame({'x' : np.arange(100,10000, 100)})
sigma_vals = [44.5 / x for x in [100, 1000, 10000, 50000, 100000]]
print sigma_vals
data = pd.DataFrame({'x':np.arange(-5,6)})
def dnorm(x, mean, var):
return scipy.stats.norm(mean,var).pdf(x)
ggplot(data, aes(x='x')) +\
stat_function(fun=dnorm, args={'mean':0.0, 'var':0.445}, color='blue') +\
stat_function(fun=dnorm, args={'mean':0.0, 'var':0.0445}, color='red') +\
stat_function(fun=dnorm, args={'mean':0.0, 'var':0.00445}, color='green')
Out[20]:
In [21]:
def gc2bd(gc):
return 1.66 + gc/100.0*0.098
gc = np.arange(1,100)
bd = [gc2bd(x) for x in gc]
data = pd.DataFrame({'gc' : gc, 'bd' : bd})
ggplot(data, aes(x='gc', y='bd')) +\
geom_point()
Out[21]:
In [ ]: