Description:

  • Playing around with convolution

In [1]:
import numpy as np
from ggplot import *
import matplotlib as plt
import pandas as pd


/opt/anaconda/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module argparse was already imported from /opt/anaconda/lib/python2.7/argparse.pyc, but /opt/anaconda/lib/python2.7/site-packages is being added to sys.path
  from pkg_resources import resource_stream

In [2]:
%load_ext rpy2.ipython
%pylab inline


Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['plt', 'xlim', 'ylim']
`%matplotlib` prevents importing * from pylab and numpy

convolution of 2 normal distributions


In [46]:
n = 10000
d1 = np.random.normal(loc=50, scale=1, size=n)   
d2 = np.random.normal(loc=0, scale=0.01, size=n)  # err

df = pd.DataFrame({'d1':d1, 'd2':d2})

In [47]:
matplotlib.rcParams['figure.figsize'] = [1,1]

p1 = ggplot(df, aes(d1)) +\
    geom_density() +\
    labs(x='value', y='density') +\
    theme_matplotlib(rc={"figure.figsize": "5, 3"})

p2 = ggplot(df, aes(d2)) +\
    geom_density() +\
    labs(x='value', y='density') +\
    theme_matplotlib(rc={"figure.figsize": "5, 3"})

print p1
print p2


<ggplot: (8744866508433)>
<ggplot: (8744866514449)>

In [48]:
ggplot(df, aes(d1,d2)) +\
    geom_point() +\
    labs(x='d1', y='d2')


Out[48]:
<ggplot: (8744866483185)>

In [49]:
d12 = np.convolve(d1, d2)
df12 = pd.DataFrame(d12)
df12.columns = ['d12']

p1.draw()
p2.draw()

ggplot(df12, aes(d12)) +\
    geom_density() +\
    labs(x='value', y='density') +\
    theme_matplotlib(rc={"figure.figsize": "5, 3"})


Out[49]:
<ggplot: (8744864823993)>

Trying convolution of GC values with diffusion


In [50]:
def diffusion(GC, frag_len):
    if GC < 30:
        return 44 / frag_len
    elif GC > 70:
        return 45 / frag_len
    else:
        return 44.5 / frag_len

In [51]:
n = 100000
gc_vals = np.random.normal(loc=50, scale=10, size=n)   
frag_lens = np.random.normal(loc=10000, scale=500, size=n)   
err = np.random.normal(loc=0, scale=44.5 / mean(frag_lens), size=n)


df = pd.DataFrame({'gc':gc_vals, 'frag_len':frag_lens, 'err':err})

In [52]:
ggplot(df, aes(x='gc', y='err')) +\
    geom_point()


Out[52]:
<ggplot: (8744866481773)>

In [59]:
# convolving
gc_wDiff = np.convolve(gc_vals, err, mode='same')

data = pd.DataFrame({'x':gc_wDiff})

ggplot(data, aes(x='x')) +\
    geom_density()


[  6.95718325  13.30180726  15.73933109 ..., -84.96988446 -77.88147643
 -94.06946579]
Out[59]:
<ggplot: (8744866520389)>

In [ ]: