Description:

Determine which is faster:
- creating a bin for each point in order to blur it
- initializing distributions for bins of values and selecting them via an interval tree



In [30]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib






    



WARNING: pylab import has clobbered these variables: ['xlim', 'size', 'ylim']
`%matplotlib` prevents importing * from pylab and numpy



In [31]:

    
from ggplot import *
import numpy as np
import pandas as pd
import scipy

Data set



In [164]:

    
size=100000
gc_vals = np.random.normal(loc=50, scale=10, size=size)
frag_lens = np.random.normal(loc=10000, scale=500, size=size)

Creating distribution for each point



In [165]:

    
def gc_wDiff(gc, frag_len):
    return gc + np.random.normal(loc=0, scale=44500/frag_len, size=1)[0]


gc_wDiff_vals = [gc_wDiff(gc, frag_len) for gc,frag_len in zip(gc_vals, frag_lens)]

Adding error term --> cauchy distribution function



In [166]:

    
errScale = 0.001

# adding noise as cauchy
gc_wDiff_wNoise_vals = np.array(gc_wDiff_vals) + scipy.stats.cauchy(loc=0,scale=errScale).rvs(len(gc_wDiff_vals))



In [167]:

    
data = pd.DataFrame({'gc':gc_vals, 'gc_wDiff':gc_wDiff_vals, 'gc_wDiff_wNoise':gc_wDiff_wNoise_vals})

mpl.rcParams['figure.figsize'] = [0.5,1]

p1 = ggplot(data, aes()) +\
    geom_density(aes(x='gc', color='blue'), ) 
    
p2 = ggplot(data, aes()) +\
    geom_density(aes(x='gc_wDiff', color='red'), ) 

p3 = ggplot(data, aes()) +\
    geom_density(aes(x='gc_wDiff_wNoise', color='green'), ) 
    
print p1
print p2
print p3
    
#    geom_density(aes(x='gc_wDiff', color='red'), ) +\
#    geom_density(aes(x='gc_wDiff_wNoise', color='green'), )
#    theme_matplotlib(rc={"figure.figsize": "5, 3"})









    












    



<ggplot: (8739959097945)>






    












    



<ggplot: (8739964223133)>






    












    



<ggplot: (8739959098045)>



In [168]:

    
print np.std(gc_vals)
print np.std(gc_wDiff_vals)
print np.std(gc_wDiff_wNoise_vals)









    



10.0210860831
10.9510003974
10.9829727914



In [159]:



In [ ]: