In [1]:
from numpy.linalg import inv
import numpy as np
from scipy.linalg import eig
from sklearn.datasets import make_blobs
from sklearn.metrics import pairwise_distances
from diffmaps_util import k, diag
Diffusion Distance
A distance function between any two points based on the random walk on the graph [1].
Diffusion map
Low dimensional description of the data by the first few eigenvectors [1].
In [2]:
n = 3
In [3]:
X, y = make_blobs(n_samples=n, cluster_std=.1, centers=[[1,1]])
X
Out[3]:
Define a pairwise similarity matrix between points...
In [4]:
L = k(X, .9)
and a diagonal normalization matrix $D_{i,i} = \sum_j L_{i,j}$
In [5]:
D = diag(L)
Matrix M
$M = D^{-1}L$
In [6]:
M = inv(D).dot(L)
The matrix M is adjoint to a symmetric matrix
$M_s = D^{1/2}MD^{-1/2}$
M and Ms share the same eigenvalues.
Since Ms is symmetric, it is diagonalizable and has a set of n real eigenvalues {$\lambda_{j=0}^{n-1}$} whose corresponding eigenvectors form an orthonormal basis of $\mathbf{R}^n$.
The left and right eigenvectors of M, denoted $\phi_j$ and $\psi_j$ are related to those of Ms.
In [7]:
Ms = diag(D, .5).dot(M).dot(diag(D,-.5))
Now we utilize the fact that by constrution M is a stochastic matrix
In [8]:
p0 = np.eye(n)
The stationary probability distribution $\Phi_0$
In [10]:
e = p0
for i in range(1000):
e = e.dot(M)
print e
In [20]:
p1 = p0.dot(M)
p1
Out[20]:
In [15]:
w, v = eig(M)
w = w.real
print w
print v
In [16]:
# sorting the eigenvalues and vectors
temp = {_:(w[_], v[:,_]) for _ in range(len(w))}
w = []
v = []
for _ in sorted(temp.items(), key=lambda x:x[1], reverse=True):
w.append(_[1][0])
v.append(_[1][1])
w = np.array(w)
v = np.array(v).T
print w
print v
In [17]:
psi = v / v[:,0]
print psi
In [25]:
diffmap = (w.reshape(-1,1) * psi.T).T[:,1:]
diffmap
Out[25]:
In [28]:
dt0 = pairwise_distances(diffmap)**2
dt0
Out[28]:
In [27]:
dt = []
for i in range(n):
_ = []
for j in range(n):
_.append(sum((p1[i]-p1[j])**2 / v[:,0]**2))
dt.append(_)
dt = np.array(dt)
dt
Out[27]:
In [124]:
(dt0 - dt)
Out[124]:
In [147]:
print M
M.sum(axis=1)
Out[147]:
In [153]:
w, v = eig(M)
w = w.real
print w
print v
In [189]:
p0*w[0]*v[:,0]**2 + p0*w[1]*v[:,1]**2 + p0*w[2]*v[:,2]**2
Out[189]: