In [1]:
import numpy as np
import numpy.random as npr
import pylab as pl
from scipy import stats
import cython
%load_ext cython
%matplotlib inline
In [2]:
# Walker alias method implementation
# adapted from https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/
def alias_setup(probs):
K = len(probs)
q = np.zeros(K)
J = np.zeros(K, dtype=np.int)
# Sort the data into the outcomes with probabilities
# that are larger and smaller than 1/K.
smaller = []
larger = []
for kk, prob in enumerate(probs):
q[kk] = K*prob
if q[kk] < 1.0:
smaller.append(kk)
else:
larger.append(kk)
# Loop though and create little binary mixtures that
# appropriately allocate the larger outcomes over the
# overall uniform mixture.
while len(smaller) > 0 and len(larger) > 0:
small = smaller.pop()
large = larger.pop()
J[small] = large
q[large] = q[large] - (1.0 - q[small])
if q[large] < 1.0:
smaller.append(large)
else:
larger.append(large)
return (J, q)
In [28]:
%%cython
def c_alias_draw(J, q, K, rand1, rand2):
# Draw from the overall uniform mixture.
kk = int(rand1*K)
# Draw from the binary mixture, either keeping the
# small one, or choosing the associated larger one.
if rand2 < q[kk]:
return kk
else:
return J[kk]
In [29]:
class Alias():
def __init__(self,probs):
self.probs = probs
self.J,self.q = alias_setup(probs)
self.K = len(probs)
self.rvs = stats.
def draw(self):
return c_alias_draw(self.J,self.q,self.K,rand1=npr.rand(),rand2=npr.rand())
def __repr__(self):
return "Alias(" + str(self.probs / sum(self.probs)) + ")"
In [94]:
class MSM():
def __init__(self,T=None,size=None):
if type(T) == type(None):
T = self.gen_stoch_matrix(size)
else:
assert(T.shape[0] == T.shape[1])
self.T = T
self.sampler = np.empty(len(T),dtype=object)
for i in range(len(T)):
self.sampler[i] = Alias(T[i])
def draw(self,num_samples=1000):
samples = np.zeros(num_samples,dtype=int)
samples[0] = npr.randint(len(self.T))
for i in range(1,num_samples):
samples[i] = self.sampler[samples[i-1]].draw()
return samples
def gen_stoch_matrix(self,size=5,num_iter=20):
" Generates a random doubly-stochastic matrix "
T = npr.rand(size,size)
for i in range(len(T)):
T[i] /= sum(T[i])
#for _ in range(num_iter):
# for i in range(len(T)):
# T[i] /= sum(T[i])
# for i in range(len(T)):
# T[:,i] /= sum(T[:,i])
return T
def gen_double_stoch_matrix(self,size=5,num_iter=20):
" Generates a random doubly-stochastic matrix "
T = npr.rand(size,size)
for _ in range(num_iter):
for i in range(len(T)):
T[i] /= sum(T[i])
for i in range(len(T)):
T[:,i] /= sum(T[:,i])
return T
def compute_stationary_prob(self):
" CURRENTLY BROKEN "
return -np.linalg.eigh(self.T)[1][-1]
def __repr__(self):
return str(len(self.T)) + "-state MSM"
In [33]:
size = 50
m=MSM(size=size)
m
Out[33]:
In [34]:
samples = m.draw(num_samples=10000)
pl.hist(samples,bins=size,normed=True);
In [8]:
n = 50
pl.scatter(range(n),samples[-n:]+npr.rand(n)*0.2-0.1,alpha=0.5)
pl.plot(samples[-n:],alpha=0.1)
pl.xlabel('Time')
pl.ylabel('State')
Out[8]:
In [165]:
m.T
Out[165]:
In [156]:
m.T
Out[156]:
In [133]:
m.sampler
Out[133]:
In [134]:
np.linalg.eigh(m.T)#[1][-1]
Out[134]:
In [16]:
#for i in range(len(m.T)):
# pl.figure()
pl.bar(range(len(m.T)),np.linalg.eigh(m.T.T)[1][-1])
Out[16]:
In [19]:
np.linalg.eigh(m.T.T)[1][-1]
Out[19]:
In [136]:
pl.bar(range(len(m.T)),m.compute_stationary_prob())
Out[136]:
In [137]:
samples = m.draw(num_samples=10000)
pl.hist(samples,bins=5,normed=True);
In [138]:
In [138]:
In [24]:
m.sampler[0].probs
Out[24]:
In [28]:
pl.bar(range(len(m.T)),m.sampler[0].probs)
Out[28]:
In [29]:
h = pl.hist([m.sampler[0].draw() for i in range(100000)],bins=5);
In [31]:
h[0] / sum(h[0])
Out[31]:
In [32]:
m.T[0]
Out[32]:
In [61]:
a = Alias(np.ones(1000))
In [62]:
a_results = [a.draw() for _ in xrange(1000000)]
In [64]:
a;
In [65]:
pl.hist(a_results,bins=100);
In [48]:
max(a_results)
Out[48]:
In [74]:
def transition_count_matrix(sequence):
n = len(set(sequence)) # number of states
C = np.zeros((n,n),dtype=int)
for t in range(1,len(sequence)):
i,j = sequence[t-1:t+1]
C[i,j] += 1
return C
In [77]:
example_T = np.array([[0.86207, 0.12931, 0.00862],[0.15625, 0.83333, 0.01041],[0.00199, 0.00199, 0.99602]]) # from eq. 5.38 of MSM book
In [95]:
m = MSM(example_T)
In [96]:
m_samples = m.draw(1000000)
In [102]:
C = np.array(transition_count_matrix(m_samples),dtype=float)
In [103]:
C.max()
Out[103]:
In [104]:
C.min()
Out[104]:
In [105]:
for i in range(len(C)):
C[i]/= sum(C[i])
In [106]:
C.max()
Out[106]:
In [107]:
C.min()
Out[107]:
In [108]:
C
Out[108]:
In [110]:
example_T
Out[110]:
In [111]:
((C-example_T)**2).sum()
Out[111]:
In [ ]: