In [82]:
import scipy.stats as stats
import matplotlib.pyplot as plt
%matplotlib inline
In [124]:
kde = stats.gaussian_kde(np.random.normal(loc=50, scale=5, size=100000))
In [125]:
x = np.arange(0, 100, 1)
plt.plot(x, kde(x))
plt.show()
In [138]:
from scipy import stats
def measure(n):
"Measurement model, return two coupled measurements."
m1 = np.random.normal(size=n)
m2 = np.random.normal(scale=0.5, size=n)
return m1+m2, m1-m2
In [139]:
m1, m2 = measure(2000)
xmin = m1.min()
xmax = m1.max()
ymin = m2.min()
ymax = m2.max()
In [140]:
X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([m1, m2])
kernel = stats.gaussian_kde(values)
Z = np.reshape(kernel(positions).T, X.shape)
In [149]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kde
np.random.seed(1977)
# Generate 200 correlated x,y points
data = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 3]], 200)
x, y = data.T
nbins = 20
fig, axes = plt.subplots(ncols=2, nrows=2, sharex=True, sharey=True)
axes[0, 0].set_title('Scatterplot')
axes[0, 0].plot(x, y, 'ko')
axes[0, 1].set_title('Hexbin plot')
axes[0, 1].hexbin(x, y, gridsize=nbins)
axes[1, 0].set_title('2D Histogram')
axes[1, 0].hist2d(x, y, bins=nbins)
# Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents
k = kde.gaussian_kde(data.T)
xi, yi = np.mgrid[x.min():x.max():nbins*1j, y.min():y.max():nbins*1j]
zi = k(np.vstack([xi.flatten(), yi.flatten()]))
axes[1, 1].set_title('Gaussian KDE')
axes[1, 1].pcolormesh(xi, yi, zi.reshape(xi.shape))
fig.tight_layout()
plt.show()
In [189]:
size = 1000
kde = stats.gaussian_kde(
[np.random.normal(loc=40, scale=10, size=size),
np.random.normal(loc=55, scale=3, size=size)]
)
In [208]:
font = {'family' : 'normal',
'size' : 14}
plt.rc('font', **font)
In [209]:
start = 0
end = 100
step = 1
i = np.arange(start, end, step)
nbins = len(i)
xi,yi = np.mgrid[i.min():i.max():nbins*1j, i.min():i.max():nbins*1j]
zi = kde(np.vstack([xi.flatten(), yi.flatten()]))
fig = plt.figure(1)
plt.pcolormesh(xi, yi, zi.reshape(xi.shape))
plt.title('2d-KDE')
plt.xlabel('x')
plt.ylabel('y')
plt.show()
#fig.savefig('/home/nick/test.png', bbox_inches='tight')
In [193]:
size = 1000
kde = stats.gaussian_kde(
[np.random.normal(loc=40, scale=10, size=size),
np.random.normal(loc=55, scale=3, size=size)]
)
In [218]:
f, axarr = plt.subplots(2)
start = 0
end = 100
step = 1
i = np.arange(start, end, step)
nbins = len(i)
xi,yi = np.mgrid[i.min():i.max():nbins*1j, i.min():i.max():nbins*1j]
zi = kde(np.vstack([xi.flatten(), yi.flatten()]))
#fig = plt.figure(1)
axarr[0].pcolormesh(xi, yi, zi.reshape(xi.shape))
#plt.title('2d-KDE')
#plt.xlabel('x')
#plt.ylabel('y')
#plt.show()
#fig.savefig('/home/nick/test.png', bbox_inches='tight')
Out[218]:
In [115]:
plt.figure(1)
plt.subplot(211)
plt.plot(range(10), lw=10, alpha=0.1)
plt.subplot(212)
plt.plot(range(10), 'ro', alpha=0.5)
plt.show()
In [113]:
plt.subplot?
In [105]:
x = np.arange(0, 10, 0.1)
vals = kde.resample(size=100)
plt.figure(1)
plt.hist(vals[0,], 30)
plt.plot(x, kde(x))
plt.show()
In [225]:
size = 1000
kde1 = stats.gaussian_kde(
[np.random.normal(loc=40, scale=10, size=size),
np.random.normal(loc=55, scale=3, size=size)]
)
kde2 = stats.gaussian_kde(
[np.random.normal(loc=55, scale=10, size=size),
np.random.normal(loc=70, scale=3, size=size)]
)
kde3 = stats.gaussian_kde(
[np.random.normal(loc=40, scale=10, size=size),
np.random.normal(loc=55, scale=3, size=size)]
)
In [226]:
print kde1.integrate_kde(kde2)
print kde1.integrate_kde(kde3)
In [244]:
kde1 = stats.gaussian_kde(np.random.normal(loc=30, scale=10, size=size))
kde2 = stats.gaussian_kde(np.random.normal(loc=70, scale=10, size=size))
In [245]:
print kde1.integrate_kde(kde1)
print kde1.integrate_kde(kde2)
In [381]:
# calculating intersection
def kde_intersect(kde1, kde2, start=0, end=100, step=0.1):
# evalution grid
x = np.arange(start,end,step)
# calculate intersection densities
pmin = np.min(np.c_[kde1(x),kde2(x)], axis=1)
# integrate areas under curves
total = kde1.integrate_box_1d(start,end) + kde2.integrate_box_1d(start,end)
#total = np.trapz(y=kde1(x), x=x) + np.trapz(y=kde2(x), x=x)
intersection = np.trapz(y=pmin,x=x)
print 'kde1 max: {}'.format(np.max(kde1(x)))
print 'kde2 max: {}'.format(np.max(kde2(x)))
print 'pmin max: {}'.format(np.max(pmin))
print 'total: {}'.format(total)
print 'int: {}'.format(intersection)
# overlap coefficient
return 2 * intersection / float(total)
kde1 = stats.gaussian_kde(np.random.normal(loc=1.67, scale=0.01, size=size))
kde2 = stats.gaussian_kde(np.random.normal(loc=1.68, scale=0.01, size=size))
print kde_intersect(kde1, kde1)
#print kde_intersect(kde1, kde2)
In [365]:
# calculating intersection
def kde_intersect(kde1, kde2, start=0, end=100, step=0.1):
# evalution grid
x = np.arange(start,end,step)
# kde integrations
int1 = kde1.integrate_box_1d(start,end)
int2 = kde2.integrate_box_1d(start,end)
# kde scaled evaluated values
s1 = int1 / np.max(kde1(x)) * kde1(x)
s2 = int2 / np.max(kde2(x)) * kde2(x)
# calculate intersection densities
pmin = np.min(np.c_[s1,s2], axis=1)
# integrate areas under curves
total = kde1.integrate_box_1d(start,end) + kde2.integrate_box_1d(start,end)
intersection = np.trapz(y=pmin,x=x)
print 'kde1 max: {}'.format(np.max(kde1(x)))
print 'kde2 max: {}'.format(np.max(kde2(x)))
print 'pmin max: {}'.format(np.max(pmin))
print 'total: {}'.format(total)
print 'inter: {}'.format(intersection)
# overlap coefficient
return 2 * intersection / float(total)
kde1 = stats.gaussian_kde(np.random.normal(loc=1.67, scale=0.01, size=size))
kde2 = stats.gaussian_kde(np.random.normal(loc=1.68, scale=0.01, size=size))
print kde_intersect(kde1, kde1)
#print kde_intersect(kde1, kde2)
In [358]:
In [345]:
# calculating BD shift as 1 - kde_intersection
kde1 = stats.gaussian_kde(np.random.normal(loc=1.67, scale=0.01, size=size))
kde2 = stats.gaussian_kde(np.random.normal(loc=1.68, scale=0.01, size=size))
x = np.arange(1.6,1.76,0.001)
plt.figure(1)
plt.fill_between(x, kde1(x), color='b', alpha=0.3)
plt.fill_between(x, kde2(x), color='r', alpha=0.3)
plt.show()
BD_shift = 1 - kde_intersect(kde1, kde2, start=0, end=2, step=0.01)
print 'BD shift (1 - kde_intersection): {0:.3f}'.format(BD_shift)
In [339]:
# calculating BD shift as 1 - kde_intersection
kde1 = stats.gaussian_kde(np.random.normal(loc=1.67, scale=0.01, size=size))
kde2 = stats.gaussian_kde(np.random.normal(loc=1.695, scale=0.01, size=size))
x = np.arange(1.6,1.76,0.001)
plt.figure(1)
plt.fill_between(x, kde1(x), color='b', alpha=0.3)
plt.fill_between(x, kde2(x), color='r', alpha=0.3)
plt.show()
BD_shift = 1 - kde_intersect(kde1, kde2, start=0, end=2, step=0.01)
print 'BD shift (1 - kde_intersection): {0:.3f}'.format(BD_shift)
In [29]:
KernelDensity(kernel='gaussian').fit(vals)
Out[29]:
In [ ]: