Import libraries

Standard libraries


In [22]:
#you have to make sure that you have all of these installed
import cProfile
import re
import math
import numpy as np
import scipy as sp
from scipy import stats
from scipy import optimize as opt
import pandas as pd
import random as rnd
from matplotlib import pyplot as plt
import time
import numpy.random
import warnings
warnings.filterwarnings('ignore')
import multiprocessing as mp

Custom build


In [2]:
import chen_utils as ch
import user_simulation_utils as sim

Function to generate observed data


In [3]:
rate_input = 0.3
share_observed = 0.7
observations_total = 7
print(rate_input, share_observed, observations_total)


0.3 0.7 7

In [4]:
type(sim.simulate_data()[0])


Out[4]:
numpy.ndarray

Set of functions for Chen Model, including

  • Custom gamma function
  • Chen pdf and cdf
  • Chen pdf summing over all possible k's
  • Function accounting for right censoring
  • Likelihood function accounting for all IPT observations
  • Solver function (performing grid search)
  • Plot function
  • Pipeline function executing all functions in the respective order

In [5]:
x = sim.simulate_data()
p = 0.5
beta = 0.5
#runtime estimate
x


Out[5]:
[array([ 10.89137374,   1.20842404,   6.45357491,  14.88010635,
          4.36227465,   4.25281117,  16.64814006,  18.2935035 ,
          2.76672231,   5.40805486,   4.96089087,   0.75862208,
          9.18711358,   6.13814847,  21.1624289 ,   8.59584475,
         12.72168178,   8.22119479,   7.81925312,  16.61838199]), 0]

In [6]:
#check function call
ch._gamma_calc(x[1],2 * (np.array(range(101))+1),0.5).shape


Out[6]:
(1, 101)

In [9]:
np.random.seed(1234)
true_param = [0.5,0.5]
x = sim.simulate_data(true_param[0], true_param[1], 100)
ch.maximum_likelihood_estimate(x)


Out[9]:
[[0.52500000000000002, 0.47500000000000003], 3.3544381804858493e-133]

In [10]:
ch.total_pipeline(x, true_param)


True SOW 0.5
Chen SOW 0.525
True beta 0.5
Chen beta 0.475
Naive beta 0.246483402217

Metropolis Simulation

so from here onwards we are simulating parameter pairs using the Metropolis-Hastings algorithm


In [11]:
p = 0.6
beta = 0.4
x = sim.simulate_data(beta,p, 60)
traj = ch.metropolis(x, starting_point = [0.5,0.5],chain_length = 5000, burn_in = 0.2)
print(traj)


[[ 0.42697008  0.57276997]
 [ 0.42697008  0.57276997]
 [ 0.42697008  0.57276997]
 ..., 
 [ 0.35929237  0.65031639]
 [ 0.35929237  0.65031639]
 [ 0.35929237  0.65031639]]

In [10]:
x


Out[10]:
[array([ 24.71062137,   6.99326157,   6.50827662,   1.9969418 ,
         23.77869001,  11.15403862,   3.37967397,   5.90454673,
         11.43553058,   5.52678083,  24.08431148,   2.72254472,
          8.21647734,   9.1819978 ,   7.10399645,   2.64806385,
          2.7460171 ,   4.3089925 ,   7.1743395 ,   7.14308843,
          3.8961428 ,   7.24042932,   1.20979013,   7.42027775,
          6.57617247,   1.09111557,   0.61863018,   7.20347059,
         42.5941273 ,   6.06569325,   5.37880198,   2.7763459 ,
          8.09561917,   8.77648786,  10.98405096,   7.63436722,
          7.55571838,  20.62609736,  12.57206735,   3.36336209,
          2.92946916,   5.62146593,   9.19075908,  18.89254115,
          0.43744348,  18.27340094,  16.47262318,  10.12417296,
         10.1960705 ,   2.06710341,  19.4184893 ,   5.47342536,
         11.71397339,   4.33197612,   2.79820984,  14.70799679,
          7.5835689 ,   6.29638479,   1.84747889,   2.55871926]),
 4.1593177726786656]

In [11]:
print(np.mean(traj[:,0]), np.mean(traj[:,1]))


0.474804857128 0.577202839161

In [12]:
traj_new = ch.metropolis_new(x, starting_point = [0.5,0.5],chain_length = 5000, burn_in = 0.2)

In [13]:
np.mean(traj_new, axis = 0)


Out[13]:
array([ 0.45335536,  0.57649189])

In [14]:
x


Out[14]:
[array([ 24.71062137,   6.99326157,   6.50827662,   1.9969418 ,
         23.77869001,  11.15403862,   3.37967397,   5.90454673,
         11.43553058,   5.52678083,  24.08431148,   2.72254472,
          8.21647734,   9.1819978 ,   7.10399645,   2.64806385,
          2.7460171 ,   4.3089925 ,   7.1743395 ,   7.14308843,
          3.8961428 ,   7.24042932,   1.20979013,   7.42027775,
          6.57617247,   1.09111557,   0.61863018,   7.20347059,
         42.5941273 ,   6.06569325,   5.37880198,   2.7763459 ,
          8.09561917,   8.77648786,  10.98405096,   7.63436722,
          7.55571838,  20.62609736,  12.57206735,   3.36336209,
          2.92946916,   5.62146593,   9.19075908,  18.89254115,
          0.43744348,  18.27340094,  16.47262318,  10.12417296,
         10.1960705 ,   2.06710341,  19.4184893 ,   5.47342536,
         11.71397339,   4.33197612,   2.79820984,  14.70799679,
          7.5835689 ,   6.29638479,   1.84747889,   2.55871926]),
 4.1593177726786656]

In [29]:
cProfile.run('ch.metropolis_new(x, starting_point = [0.5,0.5],chain_length = 5000, burn_in = 0.1)')


         5673274 function calls (5662449 primitive calls) in 12.673 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:989(_handle_fromlist)
        1    0.000    0.000   12.735   12.735 <string>:1(<module>)
    10825    0.004    0.000    0.004    0.000 <string>:2(_parse_args)
        1    0.000    0.000    0.000    0.000 <string>:5(_parse_args_rvs)
    10825    0.147    0.000    0.147    0.000 _continuous_distns.py:1978(_cdf)
    21644    0.058    0.000    0.495    0.000 _continuous_distns.py:379(_pdf)
    21644    0.437    0.000    0.437    0.000 _continuous_distns.py:382(_logpdf)
        1    0.000    0.000    0.000    0.000 _continuous_distns.py:4893(_rvs)
    21650    0.991    0.000    5.707    0.000 _distn_infrastructure.py:1619(pdf)
    10825    0.554    0.000    2.275    0.000 _distn_infrastructure.py:1702(cdf)
    32469    0.226    0.000    3.129    0.000 _distn_infrastructure.py:520(argsreduce)
    32469    0.508    0.000    2.412    0.000 _distn_infrastructure.py:545(<listcomp>)
        1    0.000    0.000    0.000    0.000 _distn_infrastructure.py:785(_argcheck_rvs)
        2    0.000    0.000    0.000    0.000 _distn_infrastructure.py:799(squeeze_left)
        1    0.000    0.000    0.000    0.000 _distn_infrastructure.py:815(<listcomp>)
        1    0.000    0.000    0.000    0.000 _distn_infrastructure.py:844(<listcomp>)
    32476    0.404    0.000    0.461    0.000 _distn_infrastructure.py:859(_argcheck)
    21650    0.085    0.000    0.085    0.000 _distn_infrastructure.py:871(_support_mask)
    10825    0.047    0.000    0.047    0.000 _distn_infrastructure.py:874(_open_support_mask)
        1    0.000    0.000    0.001    0.001 _distn_infrastructure.py:905(rvs)
    21650    0.015    0.000    0.155    0.000 _methods.py:31(_sum)
    32476    0.017    0.000    0.193    0.000 _methods.py:37(_any)
        6    0.000    0.000    0.000    0.000 _methods.py:40(_all)
        1    0.000    0.000    0.000    0.000 _multivariate.py:201(_get_random_state)
        1    0.000    0.000    0.000    0.000 _multivariate.py:351(_process_parameters)
        1    0.000    0.000    0.000    0.000 _multivariate.py:36(_squeeze_output)
        1    0.000    0.000    0.002    0.002 _multivariate.py:508(rvs)
        1    0.000    0.000    0.000    0.000 _util.py:182(_asarray_validated)
        1    0.000    0.000    0.000    0.000 base.py:1081(isspmatrix)
        1    0.000    0.000    0.000    0.000 blas.py:177(find_best_blas_type)
        1    0.000    0.000    0.000    0.000 blas.py:206(<listcomp>)
        1    0.000    0.000    0.000    0.000 blas.py:226(_get_funcs)
    10825    0.519    0.000    1.130    0.000 chen_utils.py:376(_gamma_logcalc)
    10825    0.843    0.000    2.029    0.000 chen_utils.py:395(_dchen_sum)
    10825    0.096    0.000    2.326    0.000 chen_utils.py:419(_loglikelihood)
    10825    0.318    0.000    2.593    0.000 chen_utils.py:435(_pchen)
    10825    0.117    0.000    4.115    0.000 chen_utils.py:443(_right_censoring)
    10825    0.013    0.000    2.606    0.000 chen_utils.py:453(<lambda>)
    10825    0.185    0.000    5.891    0.001 chen_utils.py:531(_prior)
    11110    0.226    0.000   12.659    0.001 chen_utils.py:564(log_posterior_new)
    22220    0.024    0.000    0.024    0.000 chen_utils.py:568(<genexpr>)
        1    0.074    0.074   12.735   12.735 chen_utils.py:671(metropolis_new)
        1    0.000    0.000    0.000    0.000 core.py:6192(isMaskedArray)
        1    0.000    0.000    0.000    0.000 decomp_svd.py:16(svd)
   216452    0.257    0.000    0.847    0.000 fromnumeric.py:1380(ravel)
   108226    0.068    0.000    0.280    0.000 fromnumeric.py:1487(nonzero)
    32475    0.020    0.000    0.020    0.000 fromnumeric.py:1565(shape)
    21650    0.086    0.000    0.251    0.000 fromnumeric.py:1730(sum)
    32476    0.092    0.000    0.406    0.000 fromnumeric.py:1886(any)
        5    0.000    0.000    0.000    0.000 fromnumeric.py:1973(all)
    21650    0.031    0.000    0.081    0.000 fromnumeric.py:504(transpose)
   238102    0.171    0.000    0.599    0.000 fromnumeric.py:55(_wrapfunc)
   108226    0.112    0.000    0.448    0.000 fromnumeric.py:70(take)
        1    0.000    0.000    0.000    0.000 function_base.py:1152(asarray_chkfinite)
   108226    0.329    0.000    1.904    0.000 function_base.py:2278(extract)
    54119    0.059    0.000    0.180    0.000 function_base.py:2329(place)
        1    0.000    0.000    0.000    0.000 getlimits.py:507(__init__)
        1    0.000    0.000    0.000    0.000 getlimits.py:532(max)
    10825    0.126    0.000    0.594    0.000 index_tricks.py:566(__init__)
    10825    0.003    0.000    0.003    0.000 index_tricks.py:574(__iter__)
    21650    0.028    0.000    0.040    0.000 index_tricks.py:585(__next__)
        1    0.000    0.000    0.000    0.000 lapack.py:417(get_lapack_funcs)
        1    0.000    0.000    0.000    0.000 lapack.py:459(_compute_lwork)
        1    0.000    0.000    0.000    0.000 lapack.py:478(<listcomp>)
        1    0.000    0.000    0.000    0.000 misc.py:169(_datacopied)
    10825    0.300    0.000    0.336    0.000 numeric.py:1076(outer)
        1    0.000    0.000    0.000    0.000 numeric.py:2135(isscalar)
        1    0.000    0.000    0.000    0.000 numeric.py:2397(allclose)
        1    0.000    0.000    0.000    0.000 numeric.py:2463(isclose)
        1    0.000    0.000    0.000    0.000 numeric.py:2522(within_tol)
        2    0.000    0.000    0.000    0.000 numeric.py:2667(seterr)
        2    0.000    0.000    0.000    0.000 numeric.py:2767(geterr)
        1    0.000    0.000    0.000    0.000 numeric.py:3060(__init__)
        1    0.000    0.000    0.000    0.000 numeric.py:3064(__enter__)
        1    0.000    0.000    0.000    0.000 numeric.py:3069(__exit__)
   270629    0.107    0.000    0.564    0.000 numeric.py:463(asarray)
   378810    0.144    0.000    0.627    0.000 numeric.py:534(asanyarray)
    10825    0.019    0.000    0.124    0.000 numeric.py:87(zeros_like)
    32476    0.044    0.000    0.044    0.000 numerictypes.py:1015(<listcomp>)
    32476    0.007    0.000    0.007    0.000 numerictypes.py:1016(<listcomp>)
    64952    0.616    0.000    0.829    0.000 numerictypes.py:942(_can_coerce_all)
   454650    0.139    0.000    0.139    0.000 numerictypes.py:951(<listcomp>)
    32476    0.093    0.000    0.972    0.000 numerictypes.py:964(find_common_type)
    21650    0.046    0.000    0.087    0.000 shape_base.py:109(<genexpr>)
    32470    0.173    0.000    0.483    0.000 shape_base.py:11(atleast_1d)
    10825    0.308    0.000    3.884    0.000 shape_base.py:23(apply_along_axis)
        1    0.000    0.000    0.000    0.000 stride_tricks.py:176(_broadcast_shape)
        1    0.000    0.000    0.000    0.000 stride_tricks.py:195(broadcast_arrays)
    10825    0.013    0.000    0.013    0.000 stride_tricks.py:20(__init__)
        1    0.000    0.000    0.000    0.000 stride_tricks.py:247(<listcomp>)
    10825    0.009    0.000    0.009    0.000 stride_tricks.py:25(_maybe_view_as_subclass)
        3    0.000    0.000    0.000    0.000 stride_tricks.py:251(<genexpr>)
    10825    0.178    0.000    0.300    0.000 stride_tricks.py:38(as_strided)
        2    0.000    0.000    0.000    0.000 {built-in method builtins.abs}
    22222    0.022    0.000    0.101    0.000 {built-in method builtins.all}
        1    0.000    0.000   12.735   12.735 {built-in method builtins.exec}
   238107    0.062    0.000    0.062    0.000 {built-in method builtins.getattr}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.hasattr}
   357170    0.131    0.000    0.131    0.000 {built-in method builtins.isinstance}
   562901    0.082    0.000    0.082    0.000 {built-in method builtins.len}
32475/21650    0.019    0.000    0.054    0.000 {built-in method builtins.next}
        2    0.000    0.000    0.000    0.000 {built-in method math.ceil}
    54119    0.102    0.000    0.102    0.000 {built-in method numpy.core.multiarray._insert}
    10825    0.051    0.000    0.051    0.000 {built-in method numpy.core.multiarray.arange}
   703570    0.983    0.000    0.983    0.000 {built-in method numpy.core.multiarray.array}
    10825    0.026    0.000    0.026    0.000 {built-in method numpy.core.multiarray.copyto}
    10825    0.054    0.000    0.054    0.000 {built-in method numpy.core.multiarray.empty_like}
    10825    0.007    0.000    0.007    0.000 {built-in method numpy.core.multiarray.normalize_axis_index}
    21650    0.052    0.000    0.052    0.000 {built-in method numpy.core.multiarray.putmask}
        1    0.000    0.000    0.000    0.000 {built-in method numpy.core.multiarray.result_type}
    64953    0.119    0.000    0.119    0.000 {built-in method numpy.core.multiarray.zeros}
        4    0.000    0.000    0.000    0.000 {built-in method numpy.core.umath.geterrobj}
        2    0.000    0.000    0.000    0.000 {built-in method numpy.core.umath.seterrobj}
    10825    0.003    0.000    0.003    0.000 {method '__array_prepare__' of 'numpy.ndarray' objects}
    10825    0.002    0.000    0.002    0.000 {method '__array_wrap__' of 'numpy.ndarray' objects}
        6    0.000    0.000    0.000    0.000 {method 'all' of 'numpy.ndarray' objects}
    32476    0.049    0.000    0.242    0.000 {method 'any' of 'numpy.ndarray' objects}
   108229    0.025    0.000    0.025    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {method 'astype' of 'numpy.ndarray' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
    10825    0.056    0.000    0.056    0.000 {method 'dot' of 'numpy.ndarray' objects}
        4    0.000    0.000    0.000    0.000 {method 'get' of 'dict' objects}
        1    0.000    0.000    0.000    0.000 {method 'index' of 'list' objects}
        1    0.001    0.001    0.002    0.002 {method 'multivariate_normal' of 'mtrand.RandomState' objects}
   108226    0.108    0.000    0.108    0.000 {method 'nonzero' of 'numpy.ndarray' objects}
    10825    0.249    0.000    0.249    0.000 {method 'outer' of 'numpy.ufunc' objects}
        4    0.000    0.000    0.000    0.000 {method 'pop' of 'dict' objects}
   238102    0.215    0.000    0.215    0.000 {method 'ravel' of 'numpy.ndarray' objects}
    54132    0.315    0.000    0.315    0.000 {method 'reduce' of 'numpy.ufunc' objects}
    97402    0.192    0.000    0.192    0.000 {method 'reshape' of 'numpy.ndarray' objects}
        1    0.000    0.000    0.000    0.000 {method 'squeeze' of 'numpy.ndarray' objects}
   108226    0.233    0.000    0.233    0.000 {method 'take' of 'numpy.ndarray' objects}
    21650    0.025    0.000    0.025    0.000 {method 'transpose' of 'numpy.ndarray' objects}
        1    0.000    0.000    0.000    0.000 {method 'uniform' of 'mtrand.RandomState' objects}



In [30]:
%timeit ch.metropolis_new(x, starting_point = [0.5,0.5],chain_length = 5000, burn_in = 0.1)


1 loop, best of 3: 8.79 s per loop

In [15]:
np.max(traj_new[:,1])


Out[15]:
1.8161243307592783

In [16]:
ch.plot_trajectory(traj)



In [17]:
ch.plot_trajectory(traj_new)



In [18]:
ch.plot_region(traj, x, 80.0)


[-188.26867712 -188.26867712 -188.26867712 ..., -186.01082084 -186.01082084
 -186.01082084]

In [19]:
p = 0.6
beta = 0.3
x = sim.simulate_data(beta,p, 60)
time0 = time.time()
traj = ch.metropolis(x, starting_point = [0.5,0.5],chain_length = 100000, burn_in = 0.2)
print(time.time()-time0)
print(traj)


181.61224484443665
[[ 0.09646851  1.95949828]
 [ 0.09646851  1.95949828]
 [ 0.09646851  1.95949828]
 ..., 
 [ 0.29242333  0.65080326]
 [ 0.29242333  0.65080326]
 [ 0.22866486  0.68407161]]

In [20]:
ch.plot_trajectory(traj)
ch.plot_region(traj, x, 90.0)


[-207.19229174 -207.19229174 -207.19229174 ..., -203.1138449  -203.1138449
 -203.62981182]

In [56]:
acceptedTraj = np.column_stack((sim.logit(traj[:,0]), np.log(traj[:,1])))

# Display the sampled points
"""Consider square shaped plot"""
# par( pty="s" ) # makes plots in square axes.
XY = np.transpose(acceptedTraj)
plt.plot(XY[0], XY[1], 'b')
plt.xlim([-3.0, 3.0])
plt.ylim([-3.0, 3.0])
plt.xlabel('P')
plt.ylabel(r'$\beta$')
# Display means and rejected/accepted ratio in plot.
if (meanTraj[0] > .5):
    xpos, xadj = 0.01, 0.01
else:
    xpos, xadj = 0.95, 0.95
if (meanTraj[1] > .5):
    ypos, yadj = 0.01, 0.01
else:
    ypos, yadj = 0.95, 0.95
plt.show()