notebook.community

Edit and run



In [2]:

    
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy.stats import norm
from sklearn.neighbors import KernelDensity


#----------------------------------------------------------------------
# Plot the progression of histograms to kernels
np.random.seed(1)
N = 20
X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
                    np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]
bins = np.linspace(-5, 10, 10)

fig, ax = plt.subplots(2, 2, sharex=True, sharey=True)
fig.subplots_adjust(hspace=0.05, wspace=0.05)

# histogram 1
ax[0, 0].hist(X[:, 0], bins=bins, fc='#AAAAFF', normed=True)
ax[0, 0].text(-3.5, 0.31, "Histogram")

# histogram 2
ax[0, 1].hist(X[:, 0], bins=bins + 0.75, fc='#AAAAFF', normed=True)
ax[0, 1].text(-3.5, 0.31, "Histogram, bins shifted")

# tophat KDE
kde = KernelDensity(kernel='tophat', bandwidth=0.75).fit(X)
log_dens = kde.score_samples(X_plot)
ax[1, 0].fill(X_plot[:, 0], np.exp(log_dens), fc='#AAAAFF')
ax[1, 0].text(-3.5, 0.31, "Tophat Kernel Density")

# Gaussian KDE
kde = KernelDensity(kernel='gaussian', bandwidth=0.75).fit(X)
log_dens = kde.score_samples(X_plot)
ax[1, 1].fill(X_plot[:, 0], np.exp(log_dens), fc='#AAAAFF')
ax[1, 1].text(-3.5, 0.31, "Gaussian Kernel Density")

for axi in ax.ravel():
    axi.plot(X[:, 0], np.zeros(X.shape[0]) - 0.01, '+k')
    axi.set_xlim(-4, 9)
    axi.set_ylim(-0.02, 0.34)

for axi in ax[:, 0]:
    axi.set_ylabel('Normalized Density')

for axi in ax[1, :]:
    axi.set_xlabel('x')

#----------------------------------------------------------------------
# Plot all available kernels
X_plot = np.linspace(-6, 6, 1000)[:, None]
X_src = np.zeros((1, 1))

fig, ax = plt.subplots(2, 3, sharex=True, sharey=True)
fig.subplots_adjust(left=0.05, right=0.95, hspace=0.05, wspace=0.05)


def format_func(x, loc):
    if x == 0:
        return '0'
    elif x == 1:
        return 'h'
    elif x == -1:
        return '-h'
    else:
        return '%ih' % x

for i, kernel in enumerate(['gaussian', 'tophat', 'epanechnikov',
                            'exponential', 'linear', 'cosine']):
    axi = ax.ravel()[i]
    log_dens = KernelDensity(kernel=kernel).fit(X_src).score_samples(X_plot)
    axi.fill(X_plot[:, 0], np.exp(log_dens), '-k', fc='#AAAAFF')
    axi.text(-2.6, 0.95, kernel)

    axi.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
    axi.xaxis.set_major_locator(plt.MultipleLocator(1))
    axi.yaxis.set_major_locator(plt.NullLocator())

    axi.set_ylim(0, 1.05)
    axi.set_xlim(-2.9, 2.9)

ax[0, 1].set_title('Available Kernels')

#----------------------------------------------------------------------
# Plot a 1D density example
N = 1000
np.random.seed(1)

X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
                    np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]

X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]

true_dens = (0.3 * norm(0, 1).pdf(X_plot[:, 0])
             + 0.7 * norm(5, 1).pdf(X_plot[:, 0]))

fig, ax = plt.subplots()
ax.fill(X_plot[:, 0], true_dens, fc='black', alpha=0.2,
        label='input distribution')

for kernel in ['gaussian', 'tophat', 'epanechnikov']:
    kde = KernelDensity(kernel=kernel, bandwidth=0.5).fit(X)
    log_dens = kde.score_samples(X_plot)
    ax.plot(X_plot[:, 0], np.exp(log_dens), '-',
            label="kernel = '{0}'".format(kernel))

ax.text(6, 0.38, "N={0} points".format(N))

ax.legend(loc='upper left')
ax.plot(X[:, 0], -0.005 - 0.01 * np.random.random(X.shape[0]), '+k')

ax.set_xlim(-4, 9)
ax.set_ylim(-0.02, 0.4)
plt.show()



In [227]:

    
N = 2000
X = np.concatenate((np.random.normal(0, 1, 0.3 * N), ((np.random.randn(N)) * 2*(np.random.randn(N))-5),
                    np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
Y = np.concatenate((np.random.normal(0.5, 0.4, 0.3 * N), ((np.random.randn(N)) * 2*(np.random.randn(N))-5),
                    np.random.normal(6, 1.5, 0.7 * N)))[:, np.newaxis]



In [228]:

    
Z = np.concatenate((X,Y),axis=1)
Z.shape









    Out[228]:





(4000, 2)



In [228]:



In [228]:



In [231]:

    
a = np.histogram2d(X[:,0],Y[:,0],bins=100,range=((-10,10),(-10,10)))[0]
plt.imshow(a);



In [243]:

    
#b = plt.hist2d(lol[:,0],lol[:,1],bins=100,range=((-10,10),(-10,10)));
kde = KernelDensity(kernel='gaussian', bandwidth=0.00000000002).fit(Z)
lol = kde.sample(n_samples=2*N)
b = np.histogram2d(lol[:,0],lol[:,1],bins=100,range=((-10,10),(-10,10)))[0]
plt.imshow(b);



In [204]:

    
np.sum((a-b)**2)









    Out[204]:





1114661.0



In [205]:

    
np.sum((a-b)**2) / 2250026.0









    Out[205]:





0.49539916427632391



In [165]:

    
plt.hist(b.reshape(np.product(b.shape)),bins=50);



In [ ]:



In [ ]:

    
def weirdDistro(params,n_samples=1000):
    """ params = [phi,theta_1,theta_2,theta_3,theta_4] 
    p(x) = 
    """
    inputs = np.random.rand(n_samples)
    outputs = np.zeros((n_samples,4))
    
    
    
    for i in range(n_samples):
        outputs[i] = np