# Plotting kde objects

``````

In [82]:

import scipy.stats as stats
import matplotlib.pyplot as plt
%matplotlib inline

``````

# 1d kde

``````

In [124]:

kde = stats.gaussian_kde(np.random.normal(loc=50, scale=5, size=100000))

``````
``````

In [125]:

x = np.arange(0, 100, 1)
plt.plot(x, kde(x))
plt.show()

``````
``````

``````

## 2d kde

``````

In [138]:

from scipy import stats
def measure(n):
"Measurement model, return two coupled measurements."
m1 = np.random.normal(size=n)
m2 = np.random.normal(scale=0.5, size=n)
return m1+m2, m1-m2

``````
``````

In [139]:

m1, m2 = measure(2000)
xmin = m1.min()
xmax = m1.max()
ymin = m2.min()
ymax = m2.max()

``````
``````

In [140]:

X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([m1, m2])
kernel = stats.gaussian_kde(values)
Z = np.reshape(kernel(positions).T, X.shape)

``````
``````

In [149]:

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kde

np.random.seed(1977)

# Generate 200 correlated x,y points
data = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 3]], 200)
x, y = data.T

nbins = 20

fig, axes = plt.subplots(ncols=2, nrows=2, sharex=True, sharey=True)

axes[0, 0].set_title('Scatterplot')
axes[0, 0].plot(x, y, 'ko')

axes[0, 1].set_title('Hexbin plot')
axes[0, 1].hexbin(x, y, gridsize=nbins)

axes[1, 0].set_title('2D Histogram')
axes[1, 0].hist2d(x, y, bins=nbins)

# Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents
k = kde.gaussian_kde(data.T)
xi, yi = np.mgrid[x.min():x.max():nbins*1j, y.min():y.max():nbins*1j]
zi = k(np.vstack([xi.flatten(), yi.flatten()]))

axes[1, 1].set_title('Gaussian KDE')
axes[1, 1].pcolormesh(xi, yi, zi.reshape(xi.shape))

fig.tight_layout()
plt.show()

``````
``````

``````
``````

In [189]:

size = 1000
kde = stats.gaussian_kde(
[np.random.normal(loc=40, scale=10, size=size),
np.random.normal(loc=55, scale=3, size=size)]
)

``````
``````

In [208]:

font = {'family' : 'normal',
'size'   : 14}

plt.rc('font', **font)

``````
``````

In [209]:

start = 0
end = 100
step = 1

i = np.arange(start, end, step)
nbins = len(i)

xi,yi = np.mgrid[i.min():i.max():nbins*1j, i.min():i.max():nbins*1j]
zi = kde(np.vstack([xi.flatten(), yi.flatten()]))

fig = plt.figure(1)
plt.pcolormesh(xi, yi, zi.reshape(xi.shape))
plt.title('2d-KDE')
plt.xlabel('x')
plt.ylabel('y')
plt.show()
#fig.savefig('/home/nick/test.png', bbox_inches='tight')

``````
``````

``````
``````

In [193]:

size = 1000
kde = stats.gaussian_kde(
[np.random.normal(loc=40, scale=10, size=size),
np.random.normal(loc=55, scale=3, size=size)]
)

``````
``````

In [218]:

f, axarr = plt.subplots(2)

start = 0
end = 100
step = 1

i = np.arange(start, end, step)
nbins = len(i)

xi,yi = np.mgrid[i.min():i.max():nbins*1j, i.min():i.max():nbins*1j]
zi = kde(np.vstack([xi.flatten(), yi.flatten()]))

#fig = plt.figure(1)
axarr[0].pcolormesh(xi, yi, zi.reshape(xi.shape))

#plt.title('2d-KDE')
#plt.xlabel('x')
#plt.ylabel('y')
#plt.show()
#fig.savefig('/home/nick/test.png', bbox_inches='tight')

``````
``````

Out[218]:

``````

# Plotting sandbox

``````

In [115]:

plt.figure(1)
plt.subplot(211)
plt.plot(range(10), lw=10, alpha=0.1)
plt.subplot(212)
plt.plot(range(10), 'ro', alpha=0.5)
plt.show()

``````
``````

``````
``````

In [113]:

plt.subplot?

``````
``````

In [105]:

x = np.arange(0, 10, 0.1)
vals = kde.resample(size=100)
plt.figure(1)
plt.hist(vals[0,], 30)
plt.plot(x, kde(x))
plt.show()

``````
``````

``````

# KDE intersection

``````

In [225]:

size = 1000
kde1 = stats.gaussian_kde(
[np.random.normal(loc=40, scale=10, size=size),
np.random.normal(loc=55, scale=3, size=size)]
)

kde2 = stats.gaussian_kde(
[np.random.normal(loc=55, scale=10, size=size),
np.random.normal(loc=70, scale=3, size=size)]
)

kde3 = stats.gaussian_kde(
[np.random.normal(loc=40, scale=10, size=size),
np.random.normal(loc=55, scale=3, size=size)]
)

``````
``````

In [226]:

print kde1.integrate_kde(kde2)
print kde1.integrate_kde(kde3)

``````
``````

6.39154205405e-06
0.00240118058531

``````
``````

In [244]:

kde1 = stats.gaussian_kde(np.random.normal(loc=30, scale=10, size=size))
kde2 = stats.gaussian_kde(np.random.normal(loc=70, scale=10, size=size))

``````
``````

In [245]:

print kde1.integrate_kde(kde1)
print kde1.integrate_kde(kde2)

``````
``````

0.0257390380528
0.000806341714814

``````
``````

In [407]:

# calculating intersection

def kde_intersect(kde1, kde2, start=1.66, end=1.76, step=0.001):
# evalution grid
x = np.arange(start,end,step)

# calculate intersection densities
pmin = np.min(np.c_[kde1(x),kde2(x)], axis=1)

# integrate areas under curves
#total = kde1.integrate_box_1d(start,end) + kde2.integrate_box_1d(start,end)
total = np.trapz(y=kde1(x), x=x) + np.trapz(y=kde2(x), x=x)
intersection = np.trapz(y=pmin,x=x)

# overlap coefficient
return 2 * intersection / float(total)

size=1000
k1 = stats.gaussian_kde(np.random.normal(loc=1.67, scale=0.01, size=size))
k2 = stats.gaussian_kde(np.random.normal(loc=1.71, scale=0.01, size=size))

x = np.arange(1.6,1.8,0.001)
plt.figure(1)
plt.fill_between(x, k1(x), color='b', alpha=0.3)
plt.fill_between(x, k2(x), color='r', alpha=0.3)
plt.show()

sect1 = kde_intersect(k1, k1)
sect2 = kde_intersect(k1, k2)

print 'BD shift (1 - kde_intersection): {0:.3f}'.format(1 - sect1)
print 'BD shift (1 - kde_intersection): {0:.3f}'.format(1 - sect2)

``````
``````

BD shift (1 - kde_intersection): 0.000
BD shift (1 - kde_intersection): 0.939

``````
``````

In [ ]:

# calculating BD shift as 1 - kde_intersection

x = np.arange(1.6,1.76,0.001)
plt.figure(1)
plt.fill_between(x, k1(x), color='b', alpha=0.3)
plt.fill_between(x, k2(x), color='r', alpha=0.3)
plt.show()

BD_shift = 1 - kde_intersect(kde1, kde2, start=0, end=2, step=0.01)
print 'BD shift (1 - kde_intersection): {0:.3f}'.format(BD_shift)

``````
``````

In [389]:

# calculating intersection

def kde_intersect(kde1, kde2, start=0, end=100, step=0.1):
# evalution grid
x = np.arange(start,end,step)

# calculate intersection densities
pmin = np.min(np.c_[kde1(x),kde2(x)], axis=1)

# integrate areas under curves
total = kde1.integrate_box_1d(start,end) + kde2.integrate_box_1d(start,end)
#total = np.trapz(y=kde1(x), x=x) + np.trapz(y=kde2(x), x=x)
intersection = np.trapz(y=pmin,x=x)

#    print 'kde1 max: {}'.format(np.max(kde1(x)))
#    print 'kde2 max: {}'.format(np.max(kde2(x)))
#    print 'pmin max: {}'.format(np.max(pmin))
#    print 'total: {}'.format(total)
#    print 'int:   {}'.format(intersection)

# overlap coefficient
return 2 * intersection / float(total)

size=10000
k1 = stats.gaussian_kde(np.random.normal(loc=1.67, scale=0.01, size=size))
k2 = stats.gaussian_kde(np.random.normal(loc=1.68, scale=0.01, size=size))
print kde_intersect(k1, k1)
print kde_intersect(k2, k2)

``````
``````

0.0488913030483
0.500494435229

``````
``````

In [358]:

``````
``````

In [383]:

# calculating BD shift as 1 - kde_intersection

kde1 = stats.gaussian_kde(np.random.normal(loc=1.67, scale=0.01, size=size))
kde2 = stats.gaussian_kde(np.random.normal(loc=1.68, scale=0.01, size=size))

x = np.arange(1.6,1.76,0.001)
plt.figure(1)
plt.fill_between(x, kde1(x), color='b', alpha=0.3)
plt.fill_between(x, kde2(x), color='r', alpha=0.3)
plt.show()

BD_shift = 1 - kde_intersect(kde1, kde2, start=0, end=2, step=0.01)
print 'BD shift (1 - kde_intersection): {0:.3f}'.format(BD_shift)

``````
``````

kde1 max: 40.8478839685
kde2 max: 37.6137733711
pmin max: 25.620665399
total: 2.0
int:   0.637028017085
BD shift (1 - kde_intersection): 0.363

``````
``````

In [339]:

# calculating BD shift as 1 - kde_intersection

kde1 = stats.gaussian_kde(np.random.normal(loc=1.67, scale=0.01, size=size))
kde2 = stats.gaussian_kde(np.random.normal(loc=1.695, scale=0.01, size=size))

x = np.arange(1.6,1.76,0.001)
plt.figure(1)
plt.fill_between(x, kde1(x), color='b', alpha=0.3)
plt.fill_between(x, kde2(x), color='r', alpha=0.3)
plt.show()

BD_shift = 1 - kde_intersect(kde1, kde2, start=0, end=2, step=0.01)
print 'BD shift (1 - kde_intersection): {0:.3f}'.format(BD_shift)

``````
``````

BD shift (1 - kde_intersection): 0.800

``````
``````

In [29]:

KernelDensity(kernel='gaussian').fit(vals)

``````
``````

Out[29]:

kernel='gaussian', leaf_size=40, metric='euclidean',
metric_params=None, rtol=0)

``````
``````

In [ ]:

``````