A Method of Moments for Mixture Models and Hidden Markov Models
by A. Anandkumar, D. Hsu, and S.M. Kakade http://arxiv.org/abs/1203.0683
In [ ]:
import numpy as np
import numpy.random as npr
import scipy.linalg
In [ ]:
def pairwise_probabilities(X):
return X.T.dot(X)
def triplewise_probabilities(X):
# inefficient, will revisit later
return sum([np.einsum('i,j,k->ijk',x,x,x) for x in X])
def uniformly_sample_unit_sphere(k):
'''
Parameters
----------
k : int
Dimensionality of sphere
Returns
-------
theta : (k,), numpy.ndarray
'''
X = npr.rand(k)-0.5
norm = np.sqrt(np.sum(X**2))
theta = X / norm
return theta
def create_triples_operator(triples):
'''
Represent a third-order tensor of triple-wise probabilities as a linear operator
Parameters
----------
triples : (d,d,d), numpy.ndarray
Returns
-------
triples_operator : (d,k), numpy.ndarray
each column represents a topic by a vector of word probabilities,
'''
def triples_operator(eta):
''' '''
return sum([triples[:,:,i]*eta[i] for i in range(len(eta))])
return triples_operator
def algorithm_A(pairs,triples,k):
'''
Given low-order moments, recover mixture probabilities
Parameters
----------
pairs : (d,d), numpy.ndarray
triples : (d,d,d), numpy.ndarray
Returns
-------
M : (d,k), numpy.ndarray
each column represents a topic by a vector of word probabilities,
'''
d = len(pairs)
# truncated svd of pairs
U,s,V = np.linalg.svd(pairs)
U_hat = U[:,:k]
V_hat = V[:,:k]
# pick eta from range(U_hat) in R^d
theta = uniformly_sample_unit_sphere(k)
eta = U_hat.dot(theta)
# compute value of observable operator
triples_operator = create_triples_operator(triples)
oo_eta = np.dot( U_hat.T.dot(triples_operator(eta)).dot(V_hat), np.linalg.inv(U_hat.T.dot(pairs).dot(V_hat)))
# compute top-k right eigenvectors of oo_eta
# evals,evecs=np.linalg.eigh(oo_eta)
evals,evecs = scipy.linalg.eigh(oo_eta)
# compute mu's
mus = [U_hat.dot(evec) for evec in evecs.T]
mus = [mu/np.sum(mu) for mu in mus]
# concatenate and return M
M = np.vstack(mus).T
return M
In [ ]:
d=100
k=10
U_hat = npr.rand(d,k)
evecs = npr.rand(d,k)
evec = evecs[:,0]
U_hat.dot(evec).shape
In [ ]:
# computing low-order moments of temporally ordered data
# generic, e.g. continuous valued vectors or one-hot-encoded discrete variables
def onestep_probabilities(X):
return sum([np.einsum('i,j->ij',X[i],X[i+1]) for i in xrange(len(X)-1)])
def twostep_probabilities(X):
return sum([np.einsum('i,j,k->ijk',X[i],X[i+1],X[i+2]) for i in xrange(len(X)-2)])
# specific to lists of discrete trajectories
def discrete_onestep_probabilities(dtrajs):
'''
Parameters
----------
dtrajs : list of array-like
each element in dtrajs is a flat list/array of integers
Returns
-------
P_12 : (d,d), numpy.ndarray
'''
d = np.max(np.hstack(dtrajs))+1
P_12 = np.zeros((d,d))
for traj in dtrajs:
for i in xrange(len(traj)-1):
P_12[traj[i],traj[i+1]] += 1
return P_12 / len(np.hstack(dtrajs))
def discrete_twostep_probabilities(dtrajs):
'''
Parameters
----------
dtrajs : list of array-like
each element in dtrajs is a flat list/array of integers
Returns
-------
P_123 : (d,d,d), numpy.ndarray
'''
d = np.max(np.hstack(dtrajs))+1
P_123 = np.zeros((d,d,d))
for traj in dtrajs:
for i in xrange(len(traj)-2):
P_123[traj[i],traj[i+1],traj[i+2]] += 1
return P_123 / len(np.hstack(dtrajs))
Possible errors in method of moments paper:
In [ ]:
def sample_rotation_matrix(k):
raise NotImplementedError
In [ ]:
def algorithm_B(P_12, P_13, P_123, k):
'''
General method of moments estimator.
Parameters
----------
P_12 : (d,d), numpy.ndarray
Empirical average of tensor product of x_1 and x_2, (x_1 \otimes x_2)
P_13 : (d,d), numpy.ndarray
Empirical average of tensor product of x_1 and x_3, (x_1 \otimes x_3)
P_123 : (d,d,d), numpy.ndarray
Empirical average of tensor product of x_1, x_2, and x_3, (x_1 \otimes x_2 \otimes x_3)
k : int
number of latent mixture components
Returns
-------
M_3 : (d,k), numpy.ndarray
'''
# check inputs are compatible shapes
d = len(P_12)
assert(P_12.shape==(d,d))
assert(P_13.shape==(d,d))
assert(P_123.shape==(d,d,d))
assert(k<=d)
# compute top-k left and right singular vectors of P_12
U1,_,U2=np.linalg.svd(P_12)
U1 = U1[:,:k]
U2 = U2[:,:k]
# compute top-k right singular vectors of P_13
_,_,U3=np.linalg.svd(P_13)
U3 = U3[:,:k]
# pick invertible theta
theta = sample_rotation_matrix(k)
# form B_123(U3 theta[0])
B_123 = (U1.T.dot(P_123).dot(U3.dot(theta[0])).dot(U2)).dot(np.linalg.inv(U1.T.dot(P_12).dot(U_2)))
# compute R1 that diagonalizes B_123(U3 theta[0])
raise NotImplementedError
# form matrix L
raise NotImplementedError
# form and return M3
M3 = U3.dot(np.linalg.inv(theta)).dot(L)
return M3
In [ ]:
# generate a random instance
npr.seed(0)
k = 10 # number of topics
d = 100 # number of words
# distribution over topics
w = npr.rand(k)
w /= np.sum(w)
# conditional distributions over words, given topics
M = npr.rand(d,k)
M /= M.sum(0)
In [ ]:
%%time
d=100
k=10
U = npr.rand(d,k)
V = npr.rand(d,k)
pairs = npr.rand(d,d)
triples = npr.rand(d,d,d)
eta = npr.rand(d)
def triples_operator(triples,eta):
return sum([triples[:,:,i]*eta[i] for i in range(len(eta))])
triples_eta = triples_operator(triples,eta)
oo = np.dot( U.T.dot(triples_eta).dot(V), np.linalg.inv(U.T.dot(pairs).dot(V)))