In [1]:
from __future__ import division

import numpy as np
import quantecon as qe

In [2]:
import warnings

numba_installed = True
try:
    from numba import jit
except ImportError:
    numba_installed = False
    numba_warning_message = "Numba import failed.  Falling back to non-optimized routine."
    warnings.warn(numba_warning_message, UserWarning)

In [3]:
def mc_sample_path(P, init, sample_size):
    # CDFs, one for each row of P
    cdfs = np.cumsum(P, axis=-1)
    
    # Random values, uniformly sampled from [0, 1)
    u = np.random.random(size=sample_size)
    
    # === set up array to store output === #
    X = np.empty(sample_size, dtype=int)
    if isinstance(init, int):
        X[0] = init
    else:
        cdf0 = np.cumsum(init)
        X[0] = cdf0.searchsorted(u[0], side='right')

    # === generate the sample path === #
    n = len(cdfs)
    for t in range(sample_size-1):
        lo = -1
        hi = n - 1
        while(lo < hi-1):
            m = (lo + hi) // 2
            if u[t+1] < cdfs[X[t], m]:
                hi = m
            else:
                lo = m
        X[t+1] = hi

    return X


def mc_sample_path_numpy(P, init, sample_size):
    # CDFs, one for each row of P
    cdfs = np.cumsum(P, axis=-1)
    
    # Random values, uniformly sampled from [0, 1)
    u = np.random.random(size=sample_size)
    
    # === set up array to store output === #
    X = np.empty(sample_size, dtype=int)
    if isinstance(init, int):
        X[0] = init
    else:
        cdf0 = np.cumsum(init)
        X[0] = cdf0.searchsorted(u[0], side='right')

    # === generate the sample path === #
    for t in range(sample_size-1):
        X[t+1] = cdfs[X[t]].searchsorted(u[t+1], side='right')

    return X


if numba_installed:
    mc_sample_path = jit(mc_sample_path)
else:
    mc_sample_path = mc_sample_path_numpy

In [4]:
P = np.array([[0.4, 0.6], [0.2, 0.8]])
init = (0.25, 0.75)
sample_size = 10

In [5]:
qe.mc_sample_path(P, init=init, sample_size=sample_size)


Out[5]:
array([1, 0, 1, 0, 1, 1, 1, 1, 1, 0])

In [6]:
mc_sample_path_numpy(P, init=init, sample_size=sample_size)


Out[6]:
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [7]:
mc_sample_path(P, init=init, sample_size=sample_size)


Out[7]:
array([1, 1, 1, 1, 1, 1, 1, 0, 0, 0])

In [8]:
mc_sample_path(P, init, sample_size)


Out[8]:
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 0])

In [9]:
sample_size = 10**4
for func in [qe.mc_sample_path, mc_sample_path_numpy, mc_sample_path]:
    X = func(P, init, sample_size)
    print X.sum() / sample_size  # Should be close to 0.75


0.7476
0.754
0.7483

In [10]:
P = np.array([[0.4, 0.6], [0.2, 0.8]])
init = (0.25, 0.75)
sample_size = 10**5 * 2
for func in [qe.mc_sample_path, mc_sample_path_numpy, mc_sample_path]:
    %timeit func(P, init, sample_size)


1 loops, best of 3: 871 ms per loop
1 loops, best of 3: 351 ms per loop
100 loops, best of 3: 2.77 ms per loop

In [11]:
def random_probvec(k, m):
    """
    Create probability vectors.
    Parameters
    ----------
    k : scalar(int)
        Dimension of each probability vectors.
    m : scalar(int)
        Number of probability vectors.
    Returns
    -------
    ndarray(float, ndim=2)
        Array of shape (m, k) containing probability vectors as rows.
    """
    x = np.empty((m, k+1))
    r = np.random.rand(m, k-1)
    r.sort(axis=-1)
    x[:, 0], x[:, 1:k], x[:, k] = 0, r, 1
    return np.diff(x, axis=-1)

def random_stochmat(k):
    return random_probvec(k, k)

In [12]:
sizes = [10, 100, 1000, 3000]
rand_matrices = []

for n in sizes:
    Q = random_stochmat(n)
    rand_matrices.append(Q)

In [13]:
sample_size = 10**5 * 2
init = 0
for i, Q in enumerate(rand_matrices):
    print 'rand_matrices[{0}] ({1} x {2})'.format(i, Q.shape[0], Q.shape[1])
    for func in [qe.mc_sample_path, mc_sample_path_numpy, mc_sample_path]:
        %timeit func(Q, init, sample_size)


rand_matrices[0] (10 x 10)
1 loops, best of 3: 881 ms per loop
1 loops, best of 3: 360 ms per loop
1 loops, best of 3: 5.69 ms per loop
rand_matrices[1] (100 x 100)
1 loops, best of 3: 903 ms per loop
1 loops, best of 3: 356 ms per loop
100 loops, best of 3: 11.1 ms per loop
rand_matrices[2] (1000 x 1000)
1 loops, best of 3: 912 ms per loop
1 loops, best of 3: 385 ms per loop
10 loops, best of 3: 29.6 ms per loop
rand_matrices[3] (3000 x 3000)
1 loops, best of 3: 1.01 s per loop
1 loops, best of 3: 450 ms per loop
10 loops, best of 3: 93.8 ms per loop

In [14]:
sample_sizes = [10**i for i in range(3, 6)]
Q = rand_matrices[0]
init = 0
for sample_size in sample_sizes:
    print 'sample_size = {0}'.format(sample_size)
    for func in [qe.mc_sample_path, mc_sample_path_numpy, mc_sample_path]:
        %timeit func(Q, init, sample_size)


sample_size = 1000
100 loops, best of 3: 4.42 ms per loop
1000 loops, best of 3: 1.82 ms per loop
10000 loops, best of 3: 39.4 µs per loop
sample_size = 10000
10 loops, best of 3: 43.4 ms per loop
100 loops, best of 3: 18 ms per loop
1000 loops, best of 3: 291 µs per loop
sample_size = 100000
1 loops, best of 3: 428 ms per loop
10 loops, best of 3: 181 ms per loop
100 loops, best of 3: 2.91 ms per loop

In [15]:
def mc_sample_path_2(P, init, sample_size):
    # CDFs, one for each row of P
    cdfs = np.cumsum(P, axis=-1)
    
    # Random values, uniformly sampled from [0, 1)
    u = np.random.random(size=sample_size)
    
    # === set up array to store output === #
    X = np.empty(sample_size, dtype=int)
    if isinstance(init, int):
        X[0] = init
    else:
        cdf0 = np.cumsum(init)
        X[0] = cdf0.searchsorted(u[0], side='right')

    # === generate the sample path === #
    if numba_installed:
        _mc_sample_path_jit(cdfs, u, X)
        return X
    
    # if not numba_installed
    for t in range(sample_size-1):
        X[t+1] = cdfs[X[t]].searchsorted(u[t+1], side='right')

    return X


if numba_installed:
    @jit(nopython=True)
    def _mc_sample_path_jit(cdfs, u, out):
        n = len(cdfs)
        sample_size = len(u)

        for t in range(sample_size-1):
            lo = -1
            hi = n - 1
            while(lo < hi-1):
                m = (lo + hi) // 2
                if u[t+1] < cdfs[out[t], m]:
                    hi = m
                else:
                    lo = m
            out[t+1] = hi

In [16]:
P = np.array([[0.4, 0.6], [0.2, 0.8]])
sample_size = 10**4

X = mc_sample_path_2(P, init=init, sample_size=sample_size)
print X.sum() / sample_size  # Should be close to 0.75


0.7526

In [17]:
sample_sizes = [10**i for i in range(3, 6)]
Q = rand_matrices[0]
init = 0
for sample_size in sample_sizes:
    print 'sample_size = {0}'.format(sample_size)
    %timeit mc_sample_path_2(Q, init=init, sample_size=sample_size)


sample_size = 1000
10000 loops, best of 3: 39 µs per loop
sample_size = 10000
1000 loops, best of 3: 312 µs per loop
sample_size = 100000
100 loops, best of 3: 2.96 ms per loop

In [18]:
import platform
print platform.platform()


Darwin-13.4.0-x86_64-i386-64bit

In [19]:
import sys
print sys.version


2.7.8 (default, Jul  2 2014, 10:14:46) 
[GCC 4.2.1 Compatible Apple LLVM 5.1 (clang-503.0.40)]

In [20]:
print np.__version__


1.9.0

In [21]:
import numba
print numba.__version__


0.17.0

In [21]: