In [74]:
import numpy as np
from ggplot import *
import matplotlib as plt
import pandas as pd
import scipy
In [75]:
%load_ext rpy2.ipython
%pylab inline
In [76]:
n = 10000
d1 = np.random.normal(loc=50, scale=1, size=n)
d2 = np.random.normal(loc=0, scale=0.01, size=n) # err
df = pd.DataFrame({'d1':d1, 'd2':d2})
In [77]:
p1 = ggplot(df, aes(d1)) +\
geom_density() +\
labs(x='value', y='density') +\
theme_matplotlib(rc={"figure.figsize": "5, "})
p2 = ggplot(df, aes(d2)) +\
geom_density() +\
labs(x='value', y='density') +\
theme_matplotlib(rc={"figure.figsize": "5, 3"})
print p1
print p2
In [78]:
ggplot(df, aes(d1,d2)) +\
geom_point() +\
labs(x='d1', y='d2')
Out[78]:
In [79]:
d12 = np.convolve(d1, d2)
df12 = pd.DataFrame(d12)
df12.columns = ['d12']
p1.draw()
p2.draw()
ggplot(df12, aes(d12)) +\
geom_density() +\
labs(x='value', y='density') +\
theme_matplotlib(rc={"figure.figsize": "5, 3"})
Out[79]:
In [80]:
def diffusion(GC, frag_len):
if GC < 30:
return 44 / frag_len
elif GC > 70:
return 45 / frag_len
else:
return 44.5 / frag_len
In [81]:
n = 100000
gc_vals = np.random.normal(loc=50, scale=10, size=n)
frag_lens = np.random.normal(loc=10000, scale=500, size=n)
err = np.random.normal(loc=0, scale=44.5 / mean(frag_lens), size=n)
df = pd.DataFrame({'gc':gc_vals, 'frag_len':frag_lens, 'err':err})
In [82]:
ggplot(df, aes(x='gc', y='err')) +\
geom_point()
Out[82]:
In [83]:
# convolving
gc_wDiff = np.convolve(gc_vals, err, mode='same')
data = pd.DataFrame({'x':gc_wDiff})
ggplot(data, aes(x='x')) +\
geom_density()
Out[83]:
In [119]:
gc_vals = scipy.stats.norm(loc=50, scale=10)
errscale = 6
err = scipy.stats.norm(loc=0, scale=errscale)
#NB step-size determines precision of approximation
delta = 1e-4
big_grid = np.arange(-100,100,delta)
pmf1 = gc_vals.pdf(big_grid)*delta
pmf2 = err.pdf(big_grid)*delta
In [120]:
conv_pmf = scipy.signal.fftconvolve(pmf1, pmf2, 'same')
print "Grid length, sum(gauss_pmf), sum(uni_pmf),sum(conv_pmf):"
print len(big_grid), sum(err.pdf(big_grid)*delta), sum(gc_vals.pdf(big_grid)*delta), sum(conv_pmf)
conv_pmf = conv_pmf/sum(conv_pmf)
In [121]:
matplotlib.rcParams['figure.figsize'] = [8,5]
plt.plot(big_grid,pmf1, label='GC values')
plt.plot(big_grid,pmf2, label='Gaussian error')
plt.plot(big_grid,conv_pmf, label='Sum')
plt.xlim(-3,max(big_grid))
plt.legend(loc='best'), plt.suptitle('PMFs')
Out[121]:
In [122]:
matplotlib.rcParams['figure.figsize'] = [8,5]
plt.plot(big_grid,pmf1, label='GC values')
#plt.plot(big_grid,pmf2, label='Gaussian error')
plt.plot(big_grid,conv_pmf, label='Sum')
plt.xlim(-3,max(big_grid))
plt.legend(loc='best'), plt.suptitle('PMFs')
Out[122]:
In [ ]:
gc_vals = scipy.stats.norm(loc=50, scale=10)
errscale = 6
err = scipy.stats.norm(loc=0, scale=errscale)
In [ ]:
In [106]:
import scipy.stats as stats
In [110]:
simple = stats.uniform(loc=50,scale=3)
errscale = 0.25
err = stats.norm(loc=0,scale=errscale)
# NB Kernel support array **MUST** be symmetric about centre of the kernel (error PDF) for this to work right.
# Support also needs to extend about any significant areas of the component PDFs.
# Here, we just define one massive support for both input PDF, and error PDF (kernel)
# But we can do much better (see later)
#NB step-size determines precision of approximation
delta = 1e-4
big_grid = np.arange(-100,100,delta)
# Cannot analytically convolve continuous PDFs, in general.
# So we now make a probability mass function on a fine grid
# - a discrete approximation to the PDF, amenable to FFT...
pmf1 = simple.pdf(big_grid)*delta
pmf2 = err.pdf(big_grid)*delta
conv_pmf = scipy.signal.fftconvolve(pmf1,pmf2,'same') # Convolved probability mass function
print "Grid length, sum(gauss_pmf), sum(uni_pmf),sum(conv_pmf):"
print len(big_grid), sum(err.pdf(big_grid)*delta), sum(simple.pdf(big_grid)*delta), sum(conv_pmf)
conv_pmf = conv_pmf/sum(conv_pmf)
plt.plot(big_grid,pmf1, label='Tophat')
plt.plot(big_grid,pmf2, label='Gaussian error')
plt.plot(big_grid,conv_pmf, label='Sum')
plt.xlim(-3,max(big_grid))
plt.legend(loc='best'), plt.suptitle('PMFs')
Out[110]:
In [ ]: