In [1]:
import numpy as np
import numpy.random as npr
import pylab as pl
from scipy import stats
import cython

%load_ext cython
%matplotlib inline

In [2]:
# Walker alias method implementation
# adapted from https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/

def alias_setup(probs):
    K       = len(probs)
    q       = np.zeros(K)
    J       = np.zeros(K, dtype=np.int)
 
    # Sort the data into the outcomes with probabilities
    # that are larger and smaller than 1/K.
    smaller = []
    larger  = []
    for kk, prob in enumerate(probs):
        q[kk] = K*prob
        if q[kk] < 1.0:
            smaller.append(kk)
        else:
            larger.append(kk)
 
    # Loop though and create little binary mixtures that
    # appropriately allocate the larger outcomes over the
    # overall uniform mixture.
    while len(smaller) > 0 and len(larger) > 0:
        small = smaller.pop()
        large = larger.pop()
 
        J[small] = large
        q[large] = q[large] - (1.0 - q[small])
 
        if q[large] < 1.0:
            smaller.append(large)
        else:
            larger.append(large)
 
    return (J, q)

In [28]:
%%cython
def c_alias_draw(J, q, K, rand1, rand2):
 
    # Draw from the overall uniform mixture.
    kk = int(rand1*K)
 
    # Draw from the binary mixture, either keeping the
    # small one, or choosing the associated larger one.
    if rand2 < q[kk]:
        return kk
    else:
        return J[kk]

In [29]:
class Alias():
    def __init__(self,probs):
        self.probs = probs
        self.J,self.q = alias_setup(probs)
        self.K = len(probs)
        self.rvs = stats.
    
    def draw(self):
        return c_alias_draw(self.J,self.q,self.K,rand1=npr.rand(),rand2=npr.rand())
    
    def __repr__(self):
        return "Alias(" + str(self.probs / sum(self.probs)) + ")"

In [94]:
class MSM():
    def __init__(self,T=None,size=None):
        if type(T) == type(None):
            T = self.gen_stoch_matrix(size)
        else:
            assert(T.shape[0] == T.shape[1])
        self.T = T
        self.sampler = np.empty(len(T),dtype=object)
        for i in range(len(T)):
            self.sampler[i] = Alias(T[i])
    
    def draw(self,num_samples=1000):
        samples = np.zeros(num_samples,dtype=int)
        samples[0] = npr.randint(len(self.T))
        for i in range(1,num_samples):
            samples[i] = self.sampler[samples[i-1]].draw()
        return samples
    
    def gen_stoch_matrix(self,size=5,num_iter=20):
        " Generates a random doubly-stochastic matrix "
        T = npr.rand(size,size)
        for i in range(len(T)):
                T[i] /= sum(T[i])
        
        #for _ in range(num_iter):
        #    for i in range(len(T)):
        #        T[i] /= sum(T[i])
        #    for i in range(len(T)):
        #        T[:,i] /= sum(T[:,i])
        return T
    
    def gen_double_stoch_matrix(self,size=5,num_iter=20):
        " Generates a random doubly-stochastic matrix "
        T = npr.rand(size,size)
        
        for _ in range(num_iter):
            for i in range(len(T)):
                T[i] /= sum(T[i])
            for i in range(len(T)):
                T[:,i] /= sum(T[:,i])
        return T
    
    def compute_stationary_prob(self):
        " CURRENTLY BROKEN "
        return -np.linalg.eigh(self.T)[1][-1]
    
    def __repr__(self):
        return str(len(self.T)) + "-state MSM"

In [33]:
size = 50
m=MSM(size=size)
m


Out[33]:
50-state MSM

In [34]:
samples = m.draw(num_samples=10000)
pl.hist(samples,bins=size,normed=True);



In [8]:
n = 50
pl.scatter(range(n),samples[-n:]+npr.rand(n)*0.2-0.1,alpha=0.5)
pl.plot(samples[-n:],alpha=0.1)
pl.xlabel('Time')
pl.ylabel('State')


Out[8]:
<matplotlib.text.Text at 0x10fdac750>

In [165]:
m.T


Out[165]:
array([[ 0.06579527,  0.22500948,  0.22847765,  0.29126046,  0.18945715],
       [ 0.29213069,  0.11412216,  0.08825712,  0.26801864,  0.23747139],
       [ 0.28028382,  0.13166307,  0.26402081,  0.18071704,  0.14331526],
       [ 0.29635928,  0.04007357,  0.18683545,  0.28003945,  0.19669226],
       [ 0.1712578 ,  0.29857132,  0.16530479,  0.13383697,  0.23102912]])

In [156]:
m.T


Out[156]:
array([[  6.09300173e-01,   5.35064121e-02,   9.55452149e-01,
          6.38156531e-02,   5.85711541e-01,   4.79035379e-01,
          3.06468447e-01,   5.09662441e-01,   3.43994847e-01,
          9.31313678e-01],
       [  4.76607149e-01,   7.46623433e-01,   9.31175289e-01,
          7.00204506e-01,   1.35893979e-01,   2.86944487e-01,
          3.67215252e-01,   1.50956516e-02,   4.01244776e-01,
          2.19200758e-01],
       [  3.24859460e-01,   4.46195639e-01,   5.50952779e-01,
          2.09950648e-01,   1.63638529e-01,   7.35155867e-01,
          6.99929483e-02,   2.74283423e-01,   2.65954141e-01,
          4.65480248e-01],
       [  6.08174526e-01,   1.43992115e-01,   3.72393190e-02,
          4.06659691e-01,   5.48883361e-01,   2.49035102e-01,
          1.28722393e-01,   4.60222721e-02,   7.98983179e-01,
          9.08080782e-01],
       [  7.82716209e-01,   8.80577670e-01,   2.21033417e-01,
          8.74032110e-01,   3.99887346e-01,   3.87627388e-01,
          5.45637474e-01,   9.86262223e-01,   4.50504652e-01,
          3.90136105e-01],
       [  6.84061683e-01,   7.63385129e-01,   6.18015848e-01,
          9.22745669e-01,   3.33173653e-01,   2.06099962e-01,
          2.29477667e-01,   9.25350949e-01,   8.92460851e-01,
          6.66084401e-01],
       [  2.40989238e-01,   5.29908945e-01,   9.04216309e-04,
          4.76114132e-02,   5.83659089e-01,   6.22385295e-01,
          2.32674959e-01,   8.03563193e-01,   9.15353596e-01,
          1.04670646e-01],
       [  9.16895930e-01,   6.51769088e-01,   7.20133460e-01,
          3.05244130e-01,   4.12600271e-01,   2.21839400e-01,
          8.71524465e-01,   6.71450706e-01,   3.22481610e-01,
          8.74070350e-01],
       [  1.29515769e-01,   3.86158564e-01,   2.48030791e-01,
          2.31366679e-01,   8.78695544e-01,   2.79007731e-01,
          1.75996447e-01,   5.30830477e-01,   8.77455458e-01,
          2.31082600e-01],
       [  4.34739045e-01,   8.86596860e-02,   6.32646576e-01,
          3.43277425e-01,   5.26509546e-01,   4.57769106e-02,
          8.76394438e-02,   7.30869286e-01,   1.17684384e-01,
          6.21311107e-01]])

In [133]:
m.sampler


Out[133]:
array([Alias([ 0.34878554  0.27896958  0.02964633  0.23807262  0.10452593]),
       Alias([ 0.14397713  0.29403461  0.33644248  0.07236608  0.15317971]),
       Alias([ 0.13049957  0.11436246  0.21722283  0.23618502  0.30173013]),
       Alias([ 0.18720485  0.03952384  0.29867679  0.26674322  0.20785131]),
       Alias([ 0.18953291  0.27310951  0.11801158  0.18663306  0.23271293])], dtype=object)

In [134]:
np.linalg.eigh(m.T)#[1][-1]


Out[134]:
(array([-0.11136979,  0.01597526,  0.1770128 ,  0.32912822,  0.94875263]),
 array([[-0.04203304,  0.18551268,  0.85618901, -0.02053496, -0.47993433],
        [ 0.4042168 ,  0.39508642, -0.30665904, -0.65190594, -0.40186364],
        [-0.51358777,  0.51291144, -0.35844475,  0.41623375, -0.4140245 ],
        [ 0.60665791, -0.31341516, -0.14986687,  0.54328776, -0.46488218],
        [-0.45059492, -0.66946587, -0.14818196, -0.32586015, -0.46972005]]))

In [16]:
#for i in range(len(m.T)):
#    pl.figure()
pl.bar(range(len(m.T)),np.linalg.eigh(m.T.T)[1][-1])


Out[16]:
<Container object of 5 artists>

In [19]:
np.linalg.eigh(m.T.T)[1][-1]


Out[19]:
array([ 0.70116926,  0.41597399, -0.26345487, -0.22990918, -0.46158488])

In [136]:
pl.bar(range(len(m.T)),m.compute_stationary_prob())


Out[136]:
<Container object of 5 artists>

In [137]:
samples = m.draw(num_samples=10000)
pl.hist(samples,bins=5,normed=True);

In [138]:


In [138]:


In [24]:
m.sampler[0].probs


Out[24]:
array([ 0.22307062,  0.21244712,  0.14354619,  0.18207363,  0.23886244])

In [28]:
pl.bar(range(len(m.T)),m.sampler[0].probs)


Out[28]:
<Container object of 5 artists>

In [29]:
h = pl.hist([m.sampler[0].draw() for i in range(100000)],bins=5);



In [31]:
h[0] / sum(h[0])


Out[31]:
array([ 0.28095,  0.21996,  0.18003,  0.2273 ,  0.09176])

In [32]:
m.T[0]


Out[32]:
array([ 0.22307062,  0.21244712,  0.14354619,  0.18207363,  0.23886244])

In [61]:
a = Alias(np.ones(1000))

In [62]:
a_results = [a.draw() for _ in xrange(1000000)]

In [64]:
a;

In [65]:
pl.hist(a_results,bins=100);



In [48]:
max(a_results)


Out[48]:
9

In [74]:
def transition_count_matrix(sequence):
    n = len(set(sequence)) # number of states
    C = np.zeros((n,n),dtype=int)
    for t in range(1,len(sequence)):
        i,j = sequence[t-1:t+1]
        C[i,j] += 1
    return C

In [77]:
example_T = np.array([[0.86207, 0.12931, 0.00862],[0.15625, 0.83333, 0.01041],[0.00199, 0.00199, 0.99602]]) # from eq. 5.38 of MSM book

In [95]:
m = MSM(example_T)

In [96]:
m_samples = m.draw(1000000)

In [102]:
C = np.array(transition_count_matrix(m_samples),dtype=float)

In [103]:
C.max()


Out[103]:
700761.0

In [104]:
C.min()


Out[104]:
1363.0

In [105]:
for i in range(len(C)):
    C[i]/= sum(C[i])

In [106]:
C.max()


Out[106]:
0.99604431279351402

In [107]:
C.min()


Out[107]:
0.0019415985354149848

In [108]:
C


Out[108]:
array([[ 0.86204213,  0.12918648,  0.00877139],
       [ 0.15544161,  0.83442946,  0.01012893],
       [ 0.00201409,  0.0019416 ,  0.99604431]])

In [110]:
example_T


Out[110]:
array([[ 0.86207,  0.12931,  0.00862],
       [ 0.15625,  0.83333,  0.01041],
       [ 0.00199,  0.00199,  0.99602]])

In [111]:
((C-example_T)**2).sum()


Out[111]:
1.9837684627936391e-06

In [ ]: