In [1]:
from __future__ import division
import numpy as np
from stochmatrix import StochMatrix, stationary_dists, gth_solve
The routine mc_compute_stationary
in the current mc_tools
is not very efficient
in that it calls a routine for the general eigenvalue problem
despite the fact that we know that the matrix in consideration has an eigenvalue 1
and we look for an eigenvector associated to it.
The routine gth_solve
directly solves for a nontrivial solution to
the linear system $x (P - I) = 0$ for an irreducible stochastic matrix $P$,
based on an algorithm called the Grassmann-Taksar-Heyman (GTH) algorithm,
a numerically stable variant of Gaussian elimination.
It does not involve subtraction,
and it turns out that it is accurate enough to give almost correct stationary distributions
even for KMR matrices.
In [2]:
from test_stochmatrix import KMR_Markov_matrix_sequential
In [3]:
P = KMR_Markov_matrix_sequential(N=27, p=1./3, epsilon=1e-2)
In [4]:
x_P = gth_solve(P-np.identity(28))
In [5]:
print x_P
In [6]:
print np.dot(x_P, P)
In [7]:
np.linalg.norm(np.dot(x_P, P)-x_P, ord=np.inf)
Out[7]:
In [8]:
Q = KMR_Markov_matrix_sequential(N=3, p=1./3, epsilon=1e-14)
In [9]:
print Q
In [10]:
x_Q = gth_solve(Q-np.identity(4))
In [11]:
print x_Q
In [12]:
print np.dot(x_Q, Q)
In [13]:
np.dot(x_Q, Q) == x_Q
Out[13]:
In fact, in solving $x A = 0$, where $A = P - I$,
the algorithm only uses the values of the off-diagonal entries of $A$
under the assumption that $a_{ii} = \sum_{j \neq i} a_{ij}$.
It is therefore irrelevant whether to pass $P - I$ or $P$ to gth_solve
:
In [14]:
gth_solve(np.array([[0.4, 0.6], [0.2, 0.8]])-np.identity(2))
Out[14]:
In [15]:
gth_solve(np.array([[0.4, 0.6], [0.2, 0.8]]))
Out[15]:
In [16]:
np.all(gth_solve(P) == gth_solve(P-np.identity(28)))
Out[16]:
The routine gth_solve
above is designed to deal only with irreducible matrices.
To find all stationary distributions of a reducible stochastic matrix,
we need to decompose it into irreducible components.
It is what the class StochMatrix
does.
Implemented as a subclass of numpy.ndarray
,
it adds to the input matrix a structure as a directed graph
to compute its communication classes (strongly connected components)
and recurrent classes (closed communication classes).
Internally, it calls
scipy.sparse.csgraph.connected_components
;
see also
Strongly Connected Components | timl.blog.
In [17]:
A = StochMatrix([[1, 0], [0, 1]])
In [18]:
A
Out[18]:
In [19]:
print A
In [20]:
StochMatrix.__bases__
Out[20]:
In [21]:
A.is_irreducible
Out[21]:
In [22]:
A.num_comm_classes
Out[22]:
In [23]:
A.comm_classes()
Out[23]:
In [24]:
A.num_rec_classes
Out[24]:
In [25]:
A.rec_classes()
Out[25]:
Nontrivial example:
In [26]:
# Taken from www.math.wustl.edu/~feres/Math450Lect04.pdf
P = np.zeros((10, 10))
P[[0, 0], [0, 2]] = 1./2
P[1, 1], P[1, 6] = 1./3, 2./3
P[2, 0] = 1
P[3, 4] = 1
P[[4, 4, 4], [3, 4, 8]] = 1./3
P[5, 5] = 1
P[6, 6], P[6, 8] = 1./4, 3./4
P[[7, 7, 7, 7], [2, 3, 7, 9]] = 1./4
P[8, 1] = 1
P[[9, 9, 9], [1, 4, 9]] = 1./3
In [27]:
np.set_printoptions(precision=3)
In [28]:
print P
In [29]:
P = StochMatrix(P)
In [30]:
print P
Whether P
is irreducible
In [31]:
P.is_irreducible
Out[31]:
Communication classes of P
In [32]:
P.num_comm_classes
Out[32]:
In [33]:
P.comm_classes()
Out[33]:
Recurrent classes of P
:
In [34]:
P.num_rec_classes
Out[34]:
In [35]:
P.rec_classes()
Out[35]:
Recurrent states of P
In [36]:
rec_states = [i for rec_class in P.rec_classes() for i in rec_class]
print rec_states
Transient states of P
In [37]:
trans_states = [i for i in range(10) if i not in rec_states]
print trans_states
Canonical form of P
In [38]:
permutation = rec_states + trans_states
print permutation
In [39]:
print P[permutation, :][:, permutation]
Decompose P
into irreducible submatrices
In [40]:
for i, rec_class in enumerate(P.rec_classes()):
print 'P{0} ='.format(i)
print P[rec_class, :][:, rec_class], '(indices = {0})'.format(rec_class)
Now the routine stationary_dists
, converting the input matrix P
to a StochMatrix if not,
passes the irrecucible submatrices of P
to gth_solve
to obtain all the stationary distributions (as rows of the output array).
In [41]:
stationary_dists([[1, 0], [0, 1]])
Out[41]:
In [42]:
stationary_dists([[0.4, 0.6], [0.2, 0.8]])
Out[42]:
In [43]:
print P
In [44]:
dists = stationary_dists(P)
print dists
In [45]:
print np.dot(dists, P)
In [46]:
np.linalg.norm(np.dot(dists, P)-dists, ord=np.inf)
Out[46]:
Let us compare performance of stationary_dists
with that of the current mc_tools
which uses scipy.linalg.eig
.
In [47]:
import scipy.linalg as la
def stationary_dists_by_scipy_eig(P):
eigvals, eigvecs = la.eig(P, left=True, right=False)
index = np.where(abs(eigvals - 1.) < 1e-12)[0]
uniteigvecs = eigvecs[:, index]
return uniteigvecs/np.sum(uniteigvecs, axis=0)
In [48]:
stationary_dists_by_scipy_eig(np.array([[0.4, 0.6], [0.2, 0.8]]))
Out[48]:
Reducible matrix:
In [49]:
print P
In [50]:
dists0 = stationary_dists(P)
print dists0
In [51]:
dists1 = stationary_dists_by_scipy_eig(P).T
print dists1
In [52]:
np.linalg.norm(dists0-dists1, ord=np.inf)
Out[52]:
In [53]:
np.linalg.norm(np.dot(dists0, P)-dists0, ord=np.inf)
Out[53]:
In [54]:
np.linalg.norm(np.dot(dists1, P)-dists1, ord=np.inf)
Out[54]:
In [55]:
%timeit stationary_dists(P)
%timeit stationary_dists_by_scipy_eig(P)
Larger matrices:
In [56]:
sizes = [10, 100, 1000]
for n in sizes:
print 'identity({0})'.format(n)
%timeit stationary_dists(np.identity(n))
%timeit stationary_dists_by_scipy_eig(np.identity(n))
Irreducible matrices:
In [57]:
sizes = [10, 50, 100, 1000]
rand_matrices = []
for n in sizes:
Q = np.random.rand(n, n)
Q /= np.sum(Q, axis=1, keepdims=True)
rand_matrices.append(Q)
In [58]:
for i, Q in enumerate(rand_matrices):
dists0 = stationary_dists(Q)
dists1 = stationary_dists_by_scipy_eig(Q).T
print 'rand_matrices[{0}] ({1} x {2})'.format(i, Q.shape[0], Q.shape[1])
for dists in [dists0, dists1]:
print np.linalg.norm(np.dot(dists, Q)-dists, ord=np.inf)
In [59]:
for i, Q in enumerate(rand_matrices):
print 'rand_matrices[{0}] ({1} x {2})'.format(i, Q.shape[0], Q.shape[1])
%timeit gth_solve(Q)
%timeit stationary_dists(Q)
%timeit stationary_dists_by_scipy_eig(Q)
stationary_dists
seems to perform better than scipy.linalg.eig
for large matrices.
In [60]:
import platform
platform.platform()
Out[60]:
In [61]:
import sys
print sys.version
In [62]:
np.__version__
Out[62]:
In [63]:
import scipy
scipy.__version__
Out[63]:
In [63]: