In [1]:
from numpy.linalg import inv
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from scipy.linalg import eig
from sklearn.metrics import pairwise_distances
from diffmaps_util import k, diag

In [2]:
X = np.array([1.0, 1.1, 1.0, 1.2, 1.2, 1.3]).reshape(3,2)
X


Out[2]:
array([[ 1. ,  1.1],
       [ 1. ,  1.2],
       [ 1.2,  1.3]])

In [5]:
X, y = make_blobs(n_samples=3, cluster_std=.1, centers=[[1,1]])
X


Out[5]:
array([[ 1.05250045,  1.10949366],
       [ 1.09966087,  1.10298056],
       [ 0.86643734,  0.85708148]])

In [6]:
L = k(X, .7)
D = diag(L)
M = inv(D).dot(L)
Ms = diag(D, .5).dot(M).dot(diag(D,-.5))

print 'D\n %s' % D
print 'M\n %s' % M
print 'Ms\n %s' % Ms


D
 [[ 2.86571411  0.          0.        ]
 [ 0.          2.84543714  0.        ]
 [ 0.          0.          2.71761657]]
M
 [[ 0.34895316  0.34782512  0.30322172]
 [ 0.35030376  0.35143985  0.29825639]
 [ 0.3197459   0.3122846   0.3679695 ]]
Ms
 [[ 0.34895316  0.34906224  0.31137422]
 [ 0.34906224  0.35143985  0.3051899 ]
 [ 0.31137422  0.3051899   0.3679695 ]]

In [7]:
p0 = np.array([1,0,0])

In [8]:
e = p0
for i in range(100):
    e = e.dot(M)
print e, sum(e)


[ 0.33999206  0.33758637  0.32242157] 1.0

Markov chain stationary distribution

$ \pi(y) = \frac{d(y)}{\sum_{z \in X} d(z)} $


In [9]:
d = np.diag(D/ np.diag(D).sum())
d


Out[9]:
array([ 0.33999206,  0.33758637,  0.32242157])

The chain is reversible following the detailed balanced condition

$ \pi(x) p(x,y) = \pi(y) p(y,x) $


In [10]:
print d[0] * p[0,1]
print d[1] * p[1,0]


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-c4c7a464d479> in <module>()
----> 1 print d[0] * p[0,1]
      2 print d[1] * p[1,0]

NameError: name 'p' is not defined


In [11]:
e = p0
for i in range(1):
    e = e.dot(M)
print e, sum(e)


[ 0.34895316  0.34782512  0.30322172] 1.0

In [12]:
w, v = eig(Ms)
w = w.real

print 'phi\n %s' % (v * (d ** .5))
print 'psi\n %s' % (v * (d ** -.5))


phi
 [[-0.33999206 -0.42458984 -0.20155027]
 [-0.33878708  0.39537503 -0.25352945]
 [-0.33109028  0.03143923  0.46639241]]
psi
 [[-1.         -1.25772211 -0.62511409]
 [-0.99645586  1.17118184 -0.78632907]
 [-0.97381769  0.09312944  1.44652979]]

In [13]:
w, v = eig(Ms)
w = w.real
print w
print v


[  1.00000000e+00   8.52845667e-04   6.75096615e-02]
[[-0.58308838 -0.73076401 -0.35495339]
 [-0.58102183  0.68048222 -0.44649477]
 [-0.56782178  0.05411024  0.82137112]]

In [16]:
c = inv(v).dot(np.eye(3))
#c = inv(v).dot(np.array([1,0,0]))
c


Out[16]:
array([[-0.58308838, -0.58102183, -0.56782178],
       [-0.73076401,  0.68048222,  0.05411024],
       [-0.35495339, -0.44649477,  0.82137112]])

In [17]:
c.dot(p0)


Out[17]:
array([-0.58308838, -0.73076401, -0.35495339])

In [18]:
e


Out[18]:
array([ 0.34895316,  0.34782512,  0.30322172])

In [19]:
c[0]*v[:,0] + c[2]*w[2]*v[:,2] + c[1]*w[1]*v[:,1]


Out[19]:
array([ 0.34895316,  0.35143985,  0.3679695 ])

$ p(x,y) = \frac{k(x,y)}{d(x)} $


In [20]:
p = []
for x in range(3):
    _ = []
    for y in range(3):
        _.append(L[x,y]/d[x])
    p.append(_)
p = np.array(p)
print p


[[ 2.94124518  2.93173714  2.5557855 ]
 [ 2.9526291   2.96220489  2.51393383]
 [ 2.695064    2.63217438  3.10152945]]

$ a(x,y) = \frac{k(x,y)}{\sqrt{\pi(x)} \sqrt{\pi(y)}} $


In [21]:
a = []
for i in range(3):
    b = []
    for j in range(3):
        b.append(L[i,j]/(np.sqrt(d[i])*np.sqrt(d[j])))
    a.append(b)
a = np.array(a)
print a


[[ 2.94124518  2.94216458  2.624501  ]
 [ 2.94216458  2.96220489  2.57237482]
 [ 2.624501    2.57237482  3.10152945]]

$ a(x,y) = \frac{\sqrt{\pi(x)}}{\sqrt{\pi(y)}} p(x, y) $


In [22]:
a = []
for x in range(3):
    _ = []
    for y in range(3):
        _.append(np.sqrt(e[x])/np.sqrt(e[y])*p[x,y])
    a.append(_)
a = np.array(a)
print a


[[ 2.94124518  2.93648731  2.74174977]
 [ 2.94785282  2.96220489  2.69249036]
 [ 2.51226628  2.4576178   3.10152945]]

$ \phi_j = \mathbf{v}_j D^{1/2} $


In [23]:
phi = v * (d ** .5)
print phi


[[-0.33999206 -0.42458984 -0.20155027]
 [-0.33878708  0.39537503 -0.25352945]
 [-0.33109028  0.03143923  0.46639241]]

$ \phi_l(x) = \Phi_l(x) \times \pi(x) $


In [24]:
print v * v[:,0]


[[ 0.33999206  0.42458984  0.20155027]
 [ 0.33878708 -0.39537503  0.25352945]
 [ 0.33109028 -0.03143923 -0.46639241]]

$ \psi_j = \mathbf{v}_j D^{-1/2} $


In [25]:
psi = v * (d**-.5)
print psi


[[-1.         -1.25772211 -0.62511409]
 [-0.99645586  1.17118184 -0.78632907]
 [-0.97381769  0.09312944  1.44652979]]

$ \psi_l(x) = \Phi_l(x) / \pi(x) $


In [26]:
psi = v / v[:,0]
print psi


[[ 1.          1.25772211  0.62511409]
 [ 0.99645586 -1.17118184  0.78632907]
 [ 0.97381769 -0.09312944 -1.44652979]]

$ a(x,y) = \sum_{l \geq 0} \lambda_l \phi_l(x) \phi_l(y) $


In [27]:
a = []
for _ in range(3):
    a.append(w[_] * phi[:,0] * phi[:,0])
a = np.array(a)
a


Out[27]:
array([[  1.15594599e-01,   1.14776685e-01,   1.09620773e-01],
       [  9.85843528e-05,   9.78867983e-05,   9.34896016e-05],
       [  7.80375225e-03,   7.74853514e-03,   7.40046131e-03]])

$ p(t,y|x) = \phi_0(y) + \sum_{j \geq 1} a_j(x) \lambda_j^t \phi_j(y) $


In [28]:
e = p0
for i in range(1):
    e = e.dot(Ms)
print e


[ 0.34895316  0.34906224  0.31137422]

In [29]:
p = []
for _ in range(0,3):
    p.append(inv(v).dot(p0)[_] * w[_] * v[:,_])
p = np.array(p)
print p
print p.sum(axis=0)


[[  3.39992057e-01   3.38787079e-01   3.31090280e-01]
 [  4.55433258e-04  -4.24096195e-04  -3.37230658e-05]
 [  8.50567128e-03   1.06992574e-02  -1.96823382e-02]]
[ 0.34895316  0.34906224  0.31137422]

Diffusion map

$ \Psi_t(x) = (\lambda_1^t \psi_1(x), \lambda_2^t \psi_2(x), ..., \lambda_k^t \psi_k(x), ) $


In [30]:
diffmap = w * psi.T
diffmap


Out[30]:
array([[  1.00000000e+00,   8.49823065e-04,   6.57421027e-02],
       [  1.25772211e+00,  -9.98837355e-04,  -6.28713714e-03],
       [  6.25114088e-01,   6.70617337e-04,  -9.76547363e-02]])

The diffusion distance is equal to Euclidean distance in the diffusion map space with all (n-1) eigenvectors.


In [32]:
pairwise_distances(diffmap[:,1:])


Out[32]:
array([[ 0.        ,  0.07205296,  0.16339694],
       [ 0.07205296,  0.        ,  0.09138285],
       [ 0.16339694,  0.09138285,  0.        ]])

In [ ]: