Import libraries

Standard libraries



In [22]:

    
#you have to make sure that you have all of these installed
import cProfile
import re
import math
import numpy as np
import scipy as sp
from scipy import stats
from scipy import optimize as opt
import pandas as pd
import random as rnd
from matplotlib import pyplot as plt
import time
import numpy.random
import warnings
warnings.filterwarnings('ignore')
import multiprocessing as mp

Custom build



In [2]:

    
import chen_utils as ch
import user_simulation_utils as sim

Function to generate observed data



In [3]:

    
rate_input = 0.3
share_observed = 0.7
observations_total = 7
print(rate_input, share_observed, observations_total)



In [4]:

    
type(sim.simulate_data()[0])









    Out[4]:





numpy.ndarray

Set of functions for Chen Model, including

Custom gamma function
Chen pdf and cdf
Chen pdf summing over all possible k's
Function accounting for right censoring
Likelihood function accounting for all IPT observations
Solver function (performing grid search)
Plot function
Pipeline function executing all functions in the respective order



In [5]:

    
x = sim.simulate_data()
p = 0.5
beta = 0.5
#runtime estimate
x









    Out[5]:





[array([ 10.89137374,   1.20842404,   6.45357491,  14.88010635,
          4.36227465,   4.25281117,  16.64814006,  18.2935035 ,
          2.76672231,   5.40805486,   4.96089087,   0.75862208,
          9.18711358,   6.13814847,  21.1624289 ,   8.59584475,
         12.72168178,   8.22119479,   7.81925312,  16.61838199]), 0]



In [6]:

    
#check function call
ch._gamma_calc(x[1],2 * (np.array(range(101))+1),0.5).shape









    Out[6]:





(1, 101)



In [9]:

    
np.random.seed(1234)
true_param = [0.5,0.5]
x = sim.simulate_data(true_param[0], true_param[1], 100)
ch.maximum_likelihood_estimate(x)









    Out[9]:





[[0.52500000000000002, 0.47500000000000003], 3.3544381804858493e-133]



In [10]:

    
ch.total_pipeline(x, true_param)









    












    



True SOW 0.5
Chen SOW 0.525
True beta 0.5
Chen beta 0.475
Naive beta 0.246483402217

Metropolis Simulation

so from here onwards we are simulating parameter pairs using the Metropolis-Hastings algorithm



In [11]:

    
p = 0.6
beta = 0.4
x = sim.simulate_data(beta,p, 60)
traj = ch.metropolis(x, starting_point = [0.5,0.5],chain_length = 5000, burn_in = 0.2)
print(traj)









    



[[ 0.42697008  0.57276997]
 [ 0.42697008  0.57276997]
 [ 0.42697008  0.57276997]
 ..., 
 [ 0.35929237  0.65031639]
 [ 0.35929237  0.65031639]
 [ 0.35929237  0.65031639]]



In [10]:

    
x









    Out[10]:





[array([ 24.71062137,   6.99326157,   6.50827662,   1.9969418 ,
         23.77869001,  11.15403862,   3.37967397,   5.90454673,
         11.43553058,   5.52678083,  24.08431148,   2.72254472,
          8.21647734,   9.1819978 ,   7.10399645,   2.64806385,
          2.7460171 ,   4.3089925 ,   7.1743395 ,   7.14308843,
          3.8961428 ,   7.24042932,   1.20979013,   7.42027775,
          6.57617247,   1.09111557,   0.61863018,   7.20347059,
         42.5941273 ,   6.06569325,   5.37880198,   2.7763459 ,
          8.09561917,   8.77648786,  10.98405096,   7.63436722,
          7.55571838,  20.62609736,  12.57206735,   3.36336209,
          2.92946916,   5.62146593,   9.19075908,  18.89254115,
          0.43744348,  18.27340094,  16.47262318,  10.12417296,
         10.1960705 ,   2.06710341,  19.4184893 ,   5.47342536,
         11.71397339,   4.33197612,   2.79820984,  14.70799679,
          7.5835689 ,   6.29638479,   1.84747889,   2.55871926]),
 4.1593177726786656]



In [11]:

    
print(np.mean(traj[:,0]), np.mean(traj[:,1]))









    



0.474804857128 0.577202839161



In [12]:

    
traj_new = ch.metropolis_new(x, starting_point = [0.5,0.5],chain_length = 5000, burn_in = 0.2)



In [13]:

    
np.mean(traj_new, axis = 0)









    Out[13]:





array([ 0.45335536,  0.57649189])



In [14]:

    
x









    Out[14]:





[array([ 24.71062137,   6.99326157,   6.50827662,   1.9969418 ,
         23.77869001,  11.15403862,   3.37967397,   5.90454673,
         11.43553058,   5.52678083,  24.08431148,   2.72254472,
          8.21647734,   9.1819978 ,   7.10399645,   2.64806385,
          2.7460171 ,   4.3089925 ,   7.1743395 ,   7.14308843,
          3.8961428 ,   7.24042932,   1.20979013,   7.42027775,
          6.57617247,   1.09111557,   0.61863018,   7.20347059,
         42.5941273 ,   6.06569325,   5.37880198,   2.7763459 ,
          8.09561917,   8.77648786,  10.98405096,   7.63436722,
          7.55571838,  20.62609736,  12.57206735,   3.36336209,
          2.92946916,   5.62146593,   9.19075908,  18.89254115,
          0.43744348,  18.27340094,  16.47262318,  10.12417296,
         10.1960705 ,   2.06710341,  19.4184893 ,   5.47342536,
         11.71397339,   4.33197612,   2.79820984,  14.70799679,
          7.5835689 ,   6.29638479,   1.84747889,   2.55871926]),
 4.1593177726786656]



In [29]:

    
cProfile.run('ch.metropolis_new(x, starting_point = [0.5,0.5],chain_length = 5000, burn_in = 0.1)')









    



         5673274 function calls (5662449 primitive calls) in 12.673 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:989(_handle_fromlist)
        1    0.000    0.000   12.735   12.735 <string>:1(<module>)
    10825    0.004    0.000    0.004    0.000 <string>:2(_parse_args)
        1    0.000    0.000    0.000    0.000 <string>:5(_parse_args_rvs)
    10825    0.147    0.000    0.147    0.000 _continuous_distns.py:1978(_cdf)
    21644    0.058    0.000    0.495    0.000 _continuous_distns.py:379(_pdf)
    21644    0.437    0.000    0.437    0.000 _continuous_distns.py:382(_logpdf)
        1    0.000    0.000    0.000    0.000 _continuous_distns.py:4893(_rvs)
    21650    0.991    0.000    5.707    0.000 _distn_infrastructure.py:1619(pdf)
    10825    0.554    0.000    2.275    0.000 _distn_infrastructure.py:1702(cdf)
    32469    0.226    0.000    3.129    0.000 _distn_infrastructure.py:520(argsreduce)
    32469    0.508    0.000    2.412    0.000 _distn_infrastructure.py:545(<listcomp>)
        1    0.000    0.000    0.000    0.000 _distn_infrastructure.py:785(_argcheck_rvs)
        2    0.000    0.000    0.000    0.000 _distn_infrastructure.py:799(squeeze_left)
        1    0.000    0.000    0.000    0.000 _distn_infrastructure.py:815(<listcomp>)
        1    0.000    0.000    0.000    0.000 _distn_infrastructure.py:844(<listcomp>)
    32476    0.404    0.000    0.461    0.000 _distn_infrastructure.py:859(_argcheck)
    21650    0.085    0.000    0.085    0.000 _distn_infrastructure.py:871(_support_mask)
    10825    0.047    0.000    0.047    0.000 _distn_infrastructure.py:874(_open_support_mask)
        1    0.000    0.000    0.001    0.001 _distn_infrastructure.py:905(rvs)
    21650    0.015    0.000    0.155    0.000 _methods.py:31(_sum)
    32476    0.017    0.000    0.193    0.000 _methods.py:37(_any)
        6    0.000    0.000    0.000    0.000 _methods.py:40(_all)
        1    0.000    0.000    0.000    0.000 _multivariate.py:201(_get_random_state)
        1    0.000    0.000    0.000    0.000 _multivariate.py:351(_process_parameters)
        1    0.000    0.000    0.000    0.000 _multivariate.py:36(_squeeze_output)
        1    0.000    0.000    0.002    0.002 _multivariate.py:508(rvs)
        1    0.000    0.000    0.000    0.000 _util.py:182(_asarray_validated)
        1    0.000    0.000    0.000    0.000 base.py:1081(isspmatrix)
        1    0.000    0.000    0.000    0.000 blas.py:177(find_best_blas_type)
        1    0.000    0.000    0.000    0.000 blas.py:206(<listcomp>)
        1    0.000    0.000    0.000    0.000 blas.py:226(_get_funcs)
    10825    0.519    0.000    1.130    0.000 chen_utils.py:376(_gamma_logcalc)
    10825    0.843    0.000    2.029    0.000 chen_utils.py:395(_dchen_sum)
    10825    0.096    0.000    2.326    0.000 chen_utils.py:419(_loglikelihood)
    10825    0.318    0.000    2.593    0.000 chen_utils.py:435(_pchen)
    10825    0.117    0.000    4.115    0.000 chen_utils.py:443(_right_censoring)
    10825    0.013    0.000    2.606    0.000 chen_utils.py:453(<lambda>)
    10825    0.185    0.000    5.891    0.001 chen_utils.py:531(_prior)
    11110    0.226    0.000   12.659    0.001 chen_utils.py:564(log_posterior_new)
    22220    0.024    0.000    0.024    0.000 chen_utils.py:568(<genexpr>)
        1    0.074    0.074   12.735   12.735 chen_utils.py:671(metropolis_new)
        1    0.000    0.000    0.000    0.000 core.py:6192(isMaskedArray)
        1    0.000    0.000    0.000    0.000 decomp_svd.py:16(svd)
   216452    0.257    0.000    0.847    0.000 fromnumeric.py:1380(ravel)
   108226    0.068    0.000    0.280    0.000 fromnumeric.py:1487(nonzero)
    32475    0.020    0.000    0.020    0.000 fromnumeric.py:1565(shape)
    21650    0.086    0.000    0.251    0.000 fromnumeric.py:1730(sum)
    32476    0.092    0.000    0.406    0.000 fromnumeric.py:1886(any)
        5    0.000    0.000    0.000    0.000 fromnumeric.py:1973(all)
    21650    0.031    0.000    0.081    0.000 fromnumeric.py:504(transpose)
   238102    0.171    0.000    0.599    0.000 fromnumeric.py:55(_wrapfunc)
   108226    0.112    0.000    0.448    0.000 fromnumeric.py:70(take)
        1    0.000    0.000    0.000    0.000 function_base.py:1152(asarray_chkfinite)
   108226    0.329    0.000    1.904    0.000 function_base.py:2278(extract)
    54119    0.059    0.000    0.180    0.000 function_base.py:2329(place)
        1    0.000    0.000    0.000    0.000 getlimits.py:507(__init__)
        1    0.000    0.000    0.000    0.000 getlimits.py:532(max)
    10825    0.126    0.000    0.594    0.000 index_tricks.py:566(__init__)
    10825    0.003    0.000    0.003    0.000 index_tricks.py:574(__iter__)
    21650    0.028    0.000    0.040    0.000 index_tricks.py:585(__next__)
        1    0.000    0.000    0.000    0.000 lapack.py:417(get_lapack_funcs)
        1    0.000    0.000    0.000    0.000 lapack.py:459(_compute_lwork)
        1    0.000    0.000    0.000    0.000 lapack.py:478(<listcomp>)
        1    0.000    0.000    0.000    0.000 misc.py:169(_datacopied)
    10825    0.300    0.000    0.336    0.000 numeric.py:1076(outer)
        1    0.000    0.000    0.000    0.000 numeric.py:2135(isscalar)
        1    0.000    0.000    0.000    0.000 numeric.py:2397(allclose)
        1    0.000    0.000    0.000    0.000 numeric.py:2463(isclose)
        1    0.000    0.000    0.000    0.000 numeric.py:2522(within_tol)
        2    0.000    0.000    0.000    0.000 numeric.py:2667(seterr)
        2    0.000    0.000    0.000    0.000 numeric.py:2767(geterr)
        1    0.000    0.000    0.000    0.000 numeric.py:3060(__init__)
        1    0.000    0.000    0.000    0.000 numeric.py:3064(__enter__)
        1    0.000    0.000    0.000    0.000 numeric.py:3069(__exit__)
   270629    0.107    0.000    0.564    0.000 numeric.py:463(asarray)
   378810    0.144    0.000    0.627    0.000 numeric.py:534(asanyarray)
    10825    0.019    0.000    0.124    0.000 numeric.py:87(zeros_like)
    32476    0.044    0.000    0.044    0.000 numerictypes.py:1015(<listcomp>)
    32476    0.007    0.000    0.007    0.000 numerictypes.py:1016(<listcomp>)
    64952    0.616    0.000    0.829    0.000 numerictypes.py:942(_can_coerce_all)
   454650    0.139    0.000    0.139    0.000 numerictypes.py:951(<listcomp>)
    32476    0.093    0.000    0.972    0.000 numerictypes.py:964(find_common_type)
    21650    0.046    0.000    0.087    0.000 shape_base.py:109(<genexpr>)
    32470    0.173    0.000    0.483    0.000 shape_base.py:11(atleast_1d)
    10825    0.308    0.000    3.884    0.000 shape_base.py:23(apply_along_axis)
        1    0.000    0.000    0.000    0.000 stride_tricks.py:176(_broadcast_shape)
        1    0.000    0.000    0.000    0.000 stride_tricks.py:195(broadcast_arrays)
    10825    0.013    0.000    0.013    0.000 stride_tricks.py:20(__init__)
        1    0.000    0.000    0.000    0.000 stride_tricks.py:247(<listcomp>)
    10825    0.009    0.000    0.009    0.000 stride_tricks.py:25(_maybe_view_as_subclass)
        3    0.000    0.000    0.000    0.000 stride_tricks.py:251(<genexpr>)
    10825    0.178    0.000    0.300    0.000 stride_tricks.py:38(as_strided)
        2    0.000    0.000    0.000    0.000 {built-in method builtins.abs}
    22222    0.022    0.000    0.101    0.000 {built-in method builtins.all}
        1    0.000    0.000   12.735   12.735 {built-in method builtins.exec}
   238107    0.062    0.000    0.062    0.000 {built-in method builtins.getattr}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.hasattr}
   357170    0.131    0.000    0.131    0.000 {built-in method builtins.isinstance}
   562901    0.082    0.000    0.082    0.000 {built-in method builtins.len}
32475/21650    0.019    0.000    0.054    0.000 {built-in method builtins.next}
        2    0.000    0.000    0.000    0.000 {built-in method math.ceil}
    54119    0.102    0.000    0.102    0.000 {built-in method numpy.core.multiarray._insert}
    10825    0.051    0.000    0.051    0.000 {built-in method numpy.core.multiarray.arange}
   703570    0.983    0.000    0.983    0.000 {built-in method numpy.core.multiarray.array}
    10825    0.026    0.000    0.026    0.000 {built-in method numpy.core.multiarray.copyto}
    10825    0.054    0.000    0.054    0.000 {built-in method numpy.core.multiarray.empty_like}
    10825    0.007    0.000    0.007    0.000 {built-in method numpy.core.multiarray.normalize_axis_index}
    21650    0.052    0.000    0.052    0.000 {built-in method numpy.core.multiarray.putmask}
        1    0.000    0.000    0.000    0.000 {built-in method numpy.core.multiarray.result_type}
    64953    0.119    0.000    0.119    0.000 {built-in method numpy.core.multiarray.zeros}
        4    0.000    0.000    0.000    0.000 {built-in method numpy.core.umath.geterrobj}
        2    0.000    0.000    0.000    0.000 {built-in method numpy.core.umath.seterrobj}
    10825    0.003    0.000    0.003    0.000 {method '__array_prepare__' of 'numpy.ndarray' objects}
    10825    0.002    0.000    0.002    0.000 {method '__array_wrap__' of 'numpy.ndarray' objects}
        6    0.000    0.000    0.000    0.000 {method 'all' of 'numpy.ndarray' objects}
    32476    0.049    0.000    0.242    0.000 {method 'any' of 'numpy.ndarray' objects}
   108229    0.025    0.000    0.025    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {method 'astype' of 'numpy.ndarray' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
    10825    0.056    0.000    0.056    0.000 {method 'dot' of 'numpy.ndarray' objects}
        4    0.000    0.000    0.000    0.000 {method 'get' of 'dict' objects}
        1    0.000    0.000    0.000    0.000 {method 'index' of 'list' objects}
        1    0.001    0.001    0.002    0.002 {method 'multivariate_normal' of 'mtrand.RandomState' objects}
   108226    0.108    0.000    0.108    0.000 {method 'nonzero' of 'numpy.ndarray' objects}
    10825    0.249    0.000    0.249    0.000 {method 'outer' of 'numpy.ufunc' objects}
        4    0.000    0.000    0.000    0.000 {method 'pop' of 'dict' objects}
   238102    0.215    0.000    0.215    0.000 {method 'ravel' of 'numpy.ndarray' objects}
    54132    0.315    0.000    0.315    0.000 {method 'reduce' of 'numpy.ufunc' objects}
    97402    0.192    0.000    0.192    0.000 {method 'reshape' of 'numpy.ndarray' objects}
        1    0.000    0.000    0.000    0.000 {method 'squeeze' of 'numpy.ndarray' objects}
   108226    0.233    0.000    0.233    0.000 {method 'take' of 'numpy.ndarray' objects}
    21650    0.025    0.000    0.025    0.000 {method 'transpose' of 'numpy.ndarray' objects}
        1    0.000    0.000    0.000    0.000 {method 'uniform' of 'mtrand.RandomState' objects}



In [30]:

    
%timeit ch.metropolis_new(x, starting_point = [0.5,0.5],chain_length = 5000, burn_in = 0.1)









    



1 loop, best of 3: 8.79 s per loop



In [15]:

    
np.max(traj_new[:,1])









    Out[15]:





1.8161243307592783



In [16]:

    
ch.plot_trajectory(traj)



In [17]:

    
ch.plot_trajectory(traj_new)



In [18]:

    
ch.plot_region(traj, x, 80.0)









    



[-188.26867712 -188.26867712 -188.26867712 ..., -186.01082084 -186.01082084
 -186.01082084]



In [19]:

    
p = 0.6
beta = 0.3
x = sim.simulate_data(beta,p, 60)
time0 = time.time()
traj = ch.metropolis(x, starting_point = [0.5,0.5],chain_length = 100000, burn_in = 0.2)
print(time.time()-time0)
print(traj)









    



181.61224484443665
[[ 0.09646851  1.95949828]
 [ 0.09646851  1.95949828]
 [ 0.09646851  1.95949828]
 ..., 
 [ 0.29242333  0.65080326]
 [ 0.29242333  0.65080326]
 [ 0.22866486  0.68407161]]



In [20]:

    
ch.plot_trajectory(traj)
ch.plot_region(traj, x, 90.0)









    












    



[-207.19229174 -207.19229174 -207.19229174 ..., -203.1138449  -203.1138449
 -203.62981182]



In [56]:

    
acceptedTraj = np.column_stack((sim.logit(traj[:,0]), np.log(traj[:,1])))

# Display the sampled points
"""Consider square shaped plot"""
# par( pty="s" ) # makes plots in square axes.
XY = np.transpose(acceptedTraj)
plt.plot(XY[0], XY[1], 'b')
plt.xlim([-3.0, 3.0])
plt.ylim([-3.0, 3.0])
plt.xlabel('P')
plt.ylabel(r'$\beta$')
# Display means and rejected/accepted ratio in plot.
if (meanTraj[0] > .5):
    xpos, xadj = 0.01, 0.01
else:
    xpos, xadj = 0.95, 0.95
if (meanTraj[1] > .5):
    ypos, yadj = 0.01, 0.01
else:
    ypos, yadj = 0.95, 0.95
plt.show()