$\newcommand{\Reals}{\mathbb{R}} \newcommand{\Nats}{\mathbb{N}} \newcommand{\PDK}{{k}} \newcommand{\IS}{\mathcal{X}} \newcommand{\FM}{\Phi} \newcommand{\Gram}{G} \newcommand{\RKHS}{\mathcal{H}} \newcommand{\prodDot}[2]{\left\langle#1,#2\right\rangle} \DeclareMathOperator*{\argmin}{arg\,min} \DeclareMathOperator*{\argmax}{arg\,max}$
In [14]:
### FIRST SOME CODE ####
from __future__ import division, print_function, absolute_import
from IPython.display import SVG, display, Image, HTML
import numpy as np, scipy as sp, pylab as pl, matplotlib.pyplot as plt, scipy.stats as stats, sklearn, sklearn.datasets
from scipy.spatial.distance import squareform, pdist, cdist
import rkhs_operators as ro
import distributions as dist #commit 480cf98 of https://github.com/ingmarschuster/distributions
pl.style.use(u'seaborn-talk')
In [15]:
display(Image(filename="GP_uq.png", width=630)) #source: http://scikit-learn.org/0.17/modules/gaussian_process.html
In [16]:
display(Image(filename="SVM.png", width=700)) #source: https://en.wikipedia.org/wiki/Support_vector_machine
In [17]:
display(Image(filename="monomials_small.jpg", width=800)) #source: Berhard Schölkopf
In [18]:
figkw = {"figsize":(4,4), "dpi":150}
np.random.seed(5)
samps_per_distr = 20
data = np.vstack([stats.multivariate_normal(np.array([-2,0]), np.eye(2)*1.5).rvs(samps_per_distr),
stats.multivariate_normal(np.array([2,0]), np.eye(2)*1.5).rvs(samps_per_distr)])
distr_idx = np.r_[[0]*samps_per_distr, [1]*samps_per_distr]
In [19]:
f = pl.figure(**figkw);
for (idx, c, marker) in [(0,'r', (0,3,0)), (1, "b", "x")]:
plt.scatter(*data[distr_idx==idx,:].T, c=c, marker=marker, alpha = 0.4)
pl.show()
In [20]:
pl.figure(**figkw)
for (idx, c, marker) in [(0,'r', (0,3,0)), (1, "b", "x")]:
pl.scatter(*data[distr_idx==idx,:].T, c=c, marker=marker, alpha=0.2)
pl.arrow(0, 0, *data[distr_idx==idx,:].mean(0), head_width=0.3, width=0.05, head_length=0.3, fc=c, ec=c)
pl.title(r"Mean embeddings for $\Phi(x)=x$");
In [21]:
pl.figure(**figkw)
for (idx, c, marker) in [(0,'r', (0,3,0)), (1, "b", "x")]:
pl.scatter(*data[distr_idx==idx,:].T, c=c, marker=marker, alpha=0.2)
pl.arrow(0, 0, *data[distr_idx==idx,:].mean(0), head_width=0.3, width=0.05, head_length=0.3, fc=c, ec=c)
pl.title(r"Mean embeddings for $\Phi(x)=x$");
pl.scatter(np.ones(1), np.ones(1), c='k', marker='D', alpha=0.8);
In [22]:
# Some plotting code
def apply_to_mg(func, *mg):
#apply a function to points on a meshgrid
x = np.vstack([e.flat for e in mg]).T
return np.array([func(i.reshape((1,2))) for i in x]).reshape(mg[0].shape)
def plot_with_contour(samps, data_idx, cont_func, method_name = None, delta = 0.025, pl = pl, colormesh_cmap = pl.cm.Pastel2, contour_classif = True):
x = np.arange(samps.T[0].min()-delta, samps.T[1].max()+delta, delta)
y = np.arange(samps.T[1].min()-delta, samps.T[1].max()+delta, delta)
X, Y = np.meshgrid(x, y)
Z = apply_to_mg(cont_func, X,Y)
Z = Z.reshape(X.shape)
fig = pl.figure(**figkw)
if colormesh_cmap is not None:
bound = np.abs(Z).max()
pl.pcolor(X, Y, Z , cmap=colormesh_cmap, alpha=0.5, edgecolors=None, vmin=-bound, vmax=bound)
if contour_classif is True:
c = pl.contour(X, Y, Z, colors=['k', ],
alpha = 0.5,
linestyles=[ '--'],
levels=[0],
linewidths=0.7)
else:
pl.contour(X, Y, Z, linewidths=0.7)
if method_name is not None:
pl.title(method_name)
for (idx, c, marker) in [(0,'r', (0,3,0)), (1, "b", "x")]:
pl.scatter(*data[distr_idx==idx,:].T, c=c, marker=marker, alpha = 0.4)
pl.show()
pl.close()
In [23]:
est_dens_1 = dist.mixt(2, [dist.mvnorm(x, np.eye(2)*0.1) for x in data[:4]], [1./4]*4)
plot_with_contour(data, distr_idx,
lambda x: exp(est_dens_1.logpdf(x)),
colormesh_cmap=None, contour_classif=False)
In [24]:
est_dens_1 = dist.mixt(2, [dist.mvnorm(x, np.eye(2)*0.1,10) for x in data[:samps_per_distr]], [1./samps_per_distr]*samps_per_distr)
plot_with_contour(data, distr_idx,
lambda x: exp(est_dens_1.logpdf(x)),
colormesh_cmap=None, contour_classif=False)
Plan
pick $\FM(x) = \PDK(\cdot, x)$
let linear combinations of features also be in $\RKHS$ $$f(\cdot)=\sum_{i=1}^m a_i \PDK(\cdot, x_i) \in \RKHS$$ for $a_i \in \Reals$
Lets look at example classification output
In [25]:
class KMEclassification(object):
def __init__(self, samps1, samps2, kernel):
self.de1 = ro.RKHSDensityEstimator(samps1, kernel, 0.1)
self.de2 = ro.RKHSDensityEstimator(samps2, kernel, 0.1)
def classification_score(self, test):
return (self.de1.eval_kme(test) - self.de2.eval_kme(test))
In [26]:
data, distr_idx = sklearn.datasets.make_circles(n_samples=400, factor=.3, noise=.05)
kc = KMEclassification(data[distr_idx==0,:], data[distr_idx==1,:], ro.LinearKernel())
plot_with_contour(data, distr_idx, kc.classification_score, 'Inner Product classif. '+"Linear", pl = plt, contour_classif = True, colormesh_cmap = pl.cm.bwr)
In [27]:
kc = KMEclassification(data[distr_idx==0,:], data[distr_idx==1,:], ro.GaussianKernel(0.3))
plot_with_contour(data, distr_idx, kc.classification_score, 'Inner Product classif. '+"Gaussian", pl = plt, contour_classif = True, colormesh_cmap = pl.cm.bwr)
In [28]:
out_samps = data[distr_idx==0,:1] + 1
inp_samps = data[distr_idx==0,1:] + 1
def plot_mean_embedding(cme, inp_samps, out_samps, p1 = 0., p2 = 1., offset = 0.5):
x = np.linspace(inp_samps.min()-offset,inp_samps.max()+offset,200)
fig = pl.figure(figsize=(10, 5))
ax = [pl.subplot2grid((2, 2), (0, 1)),
pl.subplot2grid((2, 2), (0, 0), rowspan=2),
pl.subplot2grid((2, 2), (1, 1))]
ax[1].scatter(out_samps, inp_samps, alpha=0.3, color = 'r')
ax[1].set_xlabel('Output')
ax[1].set_ylabel('Input')
ax[1].axhline(p1, 0, 8, color='g', linestyle='--')
ax[1].axhline(p2, 0, 8, color='b', linestyle='--')
ax[1].set_title("%d input-output pairs"%len(out_samps))
ax[1].set_yticks((p1, p2))
e = cme.lhood(np.array([[p1], [p2]]), x[:, None]).T
#ax[0].plot(x, d[0], '-', label='cond. density')
ax[2].plot(x, e[0], 'g--', label='cond. mean emb.')
ax[2].set_title(r"p(outp | inp=%.1f)"%p1)
ax[0].plot(x, e[1], 'b--', label='cond. mean emb.')
ax[0].set_title(r"p(outp | inp=%.1f)"%p2)
#ax[2].legend(loc='best')
fig.tight_layout()
In [29]:
cme = ro.ConditionMeanEmbedding(inp_samps, out_samps, ro.GaussianKernel(0.3), ro.GaussianKernel(0.3), 5)
plot_mean_embedding(cme, inp_samps, out_samps, 0.3, 2.,)
closed form estimate given samples from input and output $$\begin{bmatrix}\PDK_Y(y_1, \cdot),& \dots &, \PDK_Y(y_N, \cdot)\end{bmatrix} \Gram_X^{-1} \begin{bmatrix}\PDK_X(x_1, \cdot)\\ \vdots \\ \PDK_X(x_N, \cdot)\end{bmatrix}$$
closed form estimate of output embedding for new input $x^*$
In [30]:
HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/MpzaCCbX-z4?rel=0&showinfo=0&start=148" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>')
Out[30]:
In [31]:
display(Image(filename="Pendulum_eigenfunctions.png", width=700))
In [32]:
display(Image(filename="KeywordClustering.png", width=700))
Get slides at ingmarschuster.com