Plotting kde objects


In [82]:
import scipy.stats as stats
import matplotlib.pyplot as plt
%matplotlib inline

1d kde


In [124]:
kde = stats.gaussian_kde(np.random.normal(loc=50, scale=5, size=100000))

In [125]:
x = np.arange(0, 100, 1)
plt.plot(x, kde(x))
plt.show()


2d kde


In [138]:
from scipy import stats
def measure(n):
    "Measurement model, return two coupled measurements."
    m1 = np.random.normal(size=n)
    m2 = np.random.normal(scale=0.5, size=n)
    return m1+m2, m1-m2

In [139]:
m1, m2 = measure(2000)
xmin = m1.min()
xmax = m1.max()
ymin = m2.min()
ymax = m2.max()

In [140]:
X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([m1, m2])
kernel = stats.gaussian_kde(values)
Z = np.reshape(kernel(positions).T, X.shape)

In [149]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kde

np.random.seed(1977)

# Generate 200 correlated x,y points
data = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 3]], 200)
x, y = data.T

nbins = 20

fig, axes = plt.subplots(ncols=2, nrows=2, sharex=True, sharey=True)

axes[0, 0].set_title('Scatterplot')
axes[0, 0].plot(x, y, 'ko')

axes[0, 1].set_title('Hexbin plot')
axes[0, 1].hexbin(x, y, gridsize=nbins)

axes[1, 0].set_title('2D Histogram')
axes[1, 0].hist2d(x, y, bins=nbins)

# Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents
k = kde.gaussian_kde(data.T)
xi, yi = np.mgrid[x.min():x.max():nbins*1j, y.min():y.max():nbins*1j]
zi = k(np.vstack([xi.flatten(), yi.flatten()]))

axes[1, 1].set_title('Gaussian KDE')
axes[1, 1].pcolormesh(xi, yi, zi.reshape(xi.shape))

fig.tight_layout()
plt.show()



In [189]:
size = 1000
kde = stats.gaussian_kde(
        [np.random.normal(loc=40, scale=10, size=size),
         np.random.normal(loc=55, scale=3, size=size)]
        )

In [208]:
font = {'family' : 'normal',
        'size'   : 14}

plt.rc('font', **font)

In [209]:
start = 0
end = 100
step = 1

i = np.arange(start, end, step)
nbins = len(i)

xi,yi = np.mgrid[i.min():i.max():nbins*1j, i.min():i.max():nbins*1j]
zi = kde(np.vstack([xi.flatten(), yi.flatten()]))

fig = plt.figure(1)
plt.pcolormesh(xi, yi, zi.reshape(xi.shape))
plt.title('2d-KDE')
plt.xlabel('x')
plt.ylabel('y')
plt.show()
#fig.savefig('/home/nick/test.png', bbox_inches='tight')



In [193]:
size = 1000
kde = stats.gaussian_kde(
        [np.random.normal(loc=40, scale=10, size=size),
         np.random.normal(loc=55, scale=3, size=size)]
        )

In [218]:
f, axarr = plt.subplots(2)

start = 0
end = 100
step = 1

i = np.arange(start, end, step)
nbins = len(i)

xi,yi = np.mgrid[i.min():i.max():nbins*1j, i.min():i.max():nbins*1j]
zi = kde(np.vstack([xi.flatten(), yi.flatten()]))

#fig = plt.figure(1)
axarr[0].pcolormesh(xi, yi, zi.reshape(xi.shape))

#plt.title('2d-KDE')
#plt.xlabel('x')
#plt.ylabel('y')
#plt.show()
#fig.savefig('/home/nick/test.png', bbox_inches='tight')


Out[218]:
<matplotlib.collections.QuadMesh at 0x7fec4c3b5d50>

Plotting sandbox


In [115]:
plt.figure(1)
plt.subplot(211)
plt.plot(range(10), lw=10, alpha=0.1)
plt.subplot(212)
plt.plot(range(10), 'ro', alpha=0.5)
plt.show()



In [113]:
plt.subplot?

In [105]:
x = np.arange(0, 10, 0.1)
vals = kde.resample(size=100)
plt.figure(1)
plt.hist(vals[0,], 30)
plt.plot(x, kde(x))
plt.show()


KDE intersection


In [225]:
size = 1000
kde1 = stats.gaussian_kde(
        [np.random.normal(loc=40, scale=10, size=size),
         np.random.normal(loc=55, scale=3, size=size)]
        )

kde2 = stats.gaussian_kde(
        [np.random.normal(loc=55, scale=10, size=size),
         np.random.normal(loc=70, scale=3, size=size)]
        )

kde3 = stats.gaussian_kde(
        [np.random.normal(loc=40, scale=10, size=size),
         np.random.normal(loc=55, scale=3, size=size)]
        )

In [226]:
print kde1.integrate_kde(kde2)
print kde1.integrate_kde(kde3)


6.39154205405e-06
0.00240118058531

In [244]:
kde1 = stats.gaussian_kde(np.random.normal(loc=30, scale=10, size=size))
kde2 = stats.gaussian_kde(np.random.normal(loc=70, scale=10, size=size))

In [245]:
print kde1.integrate_kde(kde1)
print kde1.integrate_kde(kde2)


0.0257390380528
0.000806341714814

In [381]:
# calculating intersection

def kde_intersect(kde1, kde2, start=0, end=100, step=0.1):
    # evalution grid
    x = np.arange(start,end,step)
    
    # calculate intersection densities
    pmin = np.min(np.c_[kde1(x),kde2(x)], axis=1)
    
    # integrate areas under curves
    total = kde1.integrate_box_1d(start,end) + kde2.integrate_box_1d(start,end)
    #total = np.trapz(y=kde1(x), x=x) + np.trapz(y=kde2(x), x=x)
    intersection = np.trapz(y=pmin,x=x)
    
    print 'kde1 max: {}'.format(np.max(kde1(x)))
    print 'kde2 max: {}'.format(np.max(kde2(x)))
    print 'pmin max: {}'.format(np.max(pmin))
    print 'total: {}'.format(total)
    print 'int:   {}'.format(intersection)
    
    # overlap coefficient
    return 2 * intersection / float(total)

kde1 = stats.gaussian_kde(np.random.normal(loc=1.67, scale=0.01, size=size))
kde2 = stats.gaussian_kde(np.random.normal(loc=1.68, scale=0.01, size=size))
print kde_intersect(kde1, kde1)
#print kde_intersect(kde1, kde2)


kde1 max: 1.20477869774
kde2 max: 1.20477869774
pmin max: 1.20477869774
total: 2.0
int:   0.120477869774
0.120477869774

In [365]:
# calculating intersection

def kde_intersect(kde1, kde2, start=0, end=100, step=0.1):
    # evalution grid
    x = np.arange(start,end,step)
    
    # kde integrations 
    int1 = kde1.integrate_box_1d(start,end) 
    int2 = kde2.integrate_box_1d(start,end)
    
    # kde scaled evaluated values 
    s1 = int1 / np.max(kde1(x)) * kde1(x) 
    s2 = int2 / np.max(kde2(x)) * kde2(x)
        
    # calculate intersection densities
    pmin = np.min(np.c_[s1,s2], axis=1)
    
    # integrate areas under curves
    total = kde1.integrate_box_1d(start,end) + kde2.integrate_box_1d(start,end)
    intersection = np.trapz(y=pmin,x=x)
        
    print 'kde1 max: {}'.format(np.max(kde1(x)))
    print 'kde2 max: {}'.format(np.max(kde2(x)))
    print 'pmin max: {}'.format(np.max(pmin))
    print 'total: {}'.format(total)
    print 'inter: {}'.format(intersection)
    
    # overlap coefficient
    return 2 * intersection / float(total)

kde1 = stats.gaussian_kde(np.random.normal(loc=1.67, scale=0.01, size=size))
kde2 = stats.gaussian_kde(np.random.normal(loc=1.68, scale=0.01, size=size))
print kde_intersect(kde1, kde1)
#print kde_intersect(kde1, kde2)


kde1 max: 0.512556047288
kde2 max: 0.512556047288
pmin max: 1.0
total: 2.0
inter: 0.1
0.1

In [358]:


In [345]:
# calculating BD shift as 1 - kde_intersection

kde1 = stats.gaussian_kde(np.random.normal(loc=1.67, scale=0.01, size=size))
kde2 = stats.gaussian_kde(np.random.normal(loc=1.68, scale=0.01, size=size))

x = np.arange(1.6,1.76,0.001)
plt.figure(1)
plt.fill_between(x, kde1(x), color='b', alpha=0.3)
plt.fill_between(x, kde2(x), color='r', alpha=0.3)
plt.show()

BD_shift = 1 - kde_intersect(kde1, kde2, start=0, end=2, step=0.01)
print 'BD shift (1 - kde_intersection): {0:.3f}'.format(BD_shift)


[  0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   6.32594044e-317
   8.05458485e-256   2.73976259e-201   2.48962764e-153   6.04376487e-112
   3.91951579e-077   6.79061289e-049   3.14294867e-027   3.88624353e-012
   1.29180352e-003   4.21238667e-001   5.37085745e+000   2.77009115e+001
   2.59254658e+001   5.58079775e+000   4.61054573e-001   8.13625691e-003
   9.25298874e-011   2.15790407e-025   1.00985354e-046   9.31114890e-075
   1.66948298e-109   5.77285214e-151   3.83084533e-199   4.86495415e-254
   1.18049676e-315   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
   0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000]
BD shift (1 - kde_intersection): 0.345

In [339]:
# calculating BD shift as 1 - kde_intersection

kde1 = stats.gaussian_kde(np.random.normal(loc=1.67, scale=0.01, size=size))
kde2 = stats.gaussian_kde(np.random.normal(loc=1.695, scale=0.01, size=size))

x = np.arange(1.6,1.76,0.001)
plt.figure(1)
plt.fill_between(x, kde1(x), color='b', alpha=0.3)
plt.fill_between(x, kde2(x), color='r', alpha=0.3)
plt.show()

BD_shift = 1 - kde_intersect(kde1, kde2, start=0, end=2, step=0.01)
print 'BD shift (1 - kde_intersection): {0:.3f}'.format(BD_shift)


BD shift (1 - kde_intersection): 0.800

In [29]:
KernelDensity(kernel='gaussian').fit(vals)


Out[29]:
KernelDensity(algorithm='auto', atol=0, bandwidth=1.0, breadth_first=True,
       kernel='gaussian', leaf_size=40, metric='euclidean',
       metric_params=None, rtol=0)

In [ ]: