Defines the complete model in David's paper

Simulate single trajectory up to X level


In [3]:
import pymc3 as pm
import numpy as np
N = 100
M = 6
K = 10
D = 721
Dd = 80

min_obs = 10
max_obs = 30

np.random.seed(1729)
T = np.random.choice(np.arange(min_obs-1,max_obs), N) + 1

step_sizes = [1,2,4]

import rpy2
%load_ext rpy2.ipython


The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython

In [4]:
%Rpush T N M

In [5]:
%%R
library(smfsb)

#initial state distribution
pi <- c(0.147026,0.102571,0.239819,0.188710,0.267137,0.054738)
#pi <- c(1,0,0,0,0,0)

#transition rate matrix
Q <- matrix(c(-0.631921,0.631921,0.000000,0.000000,0.000000,0.000000,
    0.000000,-0.229485,0.229485,0.000000,0.000000,0.000000,
    0.000000,0.000000,-0.450538,0.450538,0.000000,0.000000,
    0.000000,0.000000,0.000000,-0.206042,0.206042,0.000000,
    0.000000,0.000000,0.000000,0.000000,-0.609582,0.609582,
    0.000000,0.000000,0.000000,0.000000,0.00001,-0.00001), nrow=M, ncol=M,byrow=TRUE)

rcfmc_wrapper <- function(dummy){
  return(rcfmc(M, Q, pi))
}
set.seed(1729)
sampled_trajectories <- lapply(1:N, rcfmc_wrapper)

In [6]:
#make 1 the most likely step size to happen because
#this is what David's data is like
step_sizes = [1,1,1,1,1,1,1,1,2,4]
np.random.seed(1729)
  
obs_jumps = np.zeros((N,max_obs-1), dtype=np.int) # Per user observed jumps in stages, sampled and zero-padded
for user_num, Tn in zip(range(N),T):
    obs_jumps[user_num,:(Tn-1)] = np.random.choice(step_sizes, Tn-1)

obs_times = np.insert(np.cumsum(obs_jumps, axis=1),0,0, axis=1)
%Rpush obs_times

In [7]:
%%R
#zipped up index and matrix to use apply function
index_and_obs_times <- cbind(1:100, obs_times)
getS <- function(index_and_obs_times){
  n_obs <- length(index_and_obs_times)
  S <- sampled_trajectories[[index_and_obs_times[1]]](index_and_obs_times[2:n_obs])
  return(S)
}
S <- apply(index_and_obs_times, 1, getS)
S <- t(S)

In [21]:
%Rpull S
S = S.astype(np.int32) - 1

if np.any(np.diff(S)<0):
    raise Exception('S goes down a stage at some point!')

In [22]:
import itertools

B = np.array([
[0.000001,0.760000,0.720000,0.570000,0.700000,0.610000],
[0.000001,0.460000,0.390000,0.220000,0.200000,0.140000],
[0.000001,0.620000,0.620000,0.440000,0.390000,0.240000],
[0.000001,0.270000,0.210000,0.170000,0.190000,0.070000],
[0.000001,0.490000,0.340000,0.220000,0.160000,0.090000],
[0.000001,0.620000,0.340000,0.320000,0.240000,0.120000],
[0.000001,0.550000,0.390000,0.320000,0.290000,0.150000],
[0.000001,0.420000,0.240000,0.170000,0.170000,0.110000],
[0.000001,0.310000,0.300000,0.230000,0.190000,0.110000],
[0.000001,0.470000,0.340000,0.190000,0.190000,0.110000]])

B0 = np.array([
[0.410412,0.410412,0.418293,0.418293,0.429890,0.429890],
[0.240983,0.240983,0.240983,0.240983,0.240983,0.240983],
[0.339714,0.339714,0.339714,0.339714,0.339714,0.339714],
[0.130415,0.130415,0.130415,0.130415,0.130415,0.130415],
[0.143260,0.143260,0.143260,0.143260,0.143260,0.143260],
[0.211465,0.211465,0.211465,0.211465,0.211465,0.211465],
[0.194187,0.194187,0.194187,0.194187,0.194187,0.194187],
[0.185422,0.185422,0.185422,0.185422,0.185422,0.185422],
[0.171973,0.171973,0.171973,0.171973,0.171973,0.171973],
[0.152277,0.152277,0.152277,0.152277,0.152277,0.152277]])

X = np.zeros((K, max_obs, N), dtype=np.int8)
for n, k in itertools.product(range(N), range(K)):
    # n: user number
    # T[n]: time of observation for user
    # k: comorbidity number
    
    # Initialize comorbidities with initial comborbidity distribution (B0)
    X[k, 0, n] = np.random.binomial(n=1, p=B0[k,S[n,0]])
    
    for time_step in range(1,max_obs):
        state_not_changed = (S[n,time_step] == S[n,time_step-1])
        comorb_already_present = (X[k,time_step-1,n]==1)
        if state_not_changed or comorb_already_present:
            X[k,time_step,n] = X[k,time_step-1,n]
        else:
            # If not, chance of comorbidity is a function of current state
            X[k,time_step,n] = np.random.binomial(n=1, p=B[k,S[n,time_step]])

In [23]:
Z = np.loadtxt('../data/synthetic/Z.txt')
L = np.loadtxt('../data/synthetic/L.txt')
#X = load(open('../data/X_layer_100_patients/X.pkl', 'rb'))

O = np.zeros((Dd, max_obs,N), dtype=np.int) - 1
pO = np.zeros((D,max_obs,N))
pOp = np.zeros((D,max_obs,N))
Xp = np.copy(X)
Xp[0,:,5] = 1
for n in range(N):
    for time_step in range(0,T[n]):
        pO[:,time_step,n] = 1 - (1-L)*np.prod(1-(X[:,time_step,n]*Z.T), axis=1)
        pOp[:,time_step,n] = 1 - (1-L)*np.prod(1-(Xp[:,time_step,n]*Z.T), axis=1)
        codes = np.where(np.random.binomial(n=1, p=pO[:,time_step,n]))[0]
        O[0:len(codes),time_step,n] = codes

In [33]:
#from pickle import load
#test_O= load(open('../data/X_layer_100_patients/O_input.pkl', 'rb'))

In [35]:
from pickle import dump
dump(O, open('../data/X_layer_100_patients/O_input.pkl', 'wb'))

In [341]:
idx = np.in1d(np.arange(D), O[:,0,0])
Z_factor = np.prod(1-Z[0,np.logical_not(idx)])
print Z_factor
off = 4.45961100e-07
on = 2.23648853e-06*Z_factor
off_n = off / (on+off)
on_n = on / (on+off)
print off_n, on_n
np.mean((1-Z[0,np.logical_not(idx)]) > 0.99)


0.00368068207167
0.981875975654 0.0181240243457
Out[341]:
0.30659025787965616

In [239]:
from pickle import load
O = load(open('../data/X_layer_100_patients/O_input.pkl', 'rb'))

print X[:,0,5]
print Xp[:,0,5]
idx = np.in1d(np.arange(D), O[:,0,5])
print sum(idx)

pO = np.zeros(D)
pOp = np.zeros(D)
pO = 1 - (1-L)*np.prod(1-(X[:,22,0]*Z.T), axis=1)
pOp = 1 - (1-L)*np.prod(1-(Xp[:,22,0]*Z.T), axis=1)

off = np.prod(pO[idx]) * np.prod(1-pO[~idx])
on = np.prod(pOp[idx]) * np.prod(1-pOp[~idx])
print on/off

other_k = np.prod(1-X[1:10,0,5]*Z[1:10,idx].T, axis=1)
print other_k
on = np.prod(1-Z[0,~idx])*np.prod(1-(1-L[idx])*(1-Z[0,idx])*other_k)
off = np.prod(1-(1-L[idx])*other_k)
print on
print on/off


[0 1 0 0 0 0 0 0 0 0]
[1 1 0 0 0 0 0 0 0 0]
13
1.0
[ 0.99  0.99  0.99  0.99  0.99  0.99  0.99  0.99  0.99  0.99  0.99  0.99
  0.99]
4.85022258224e-23
0.631933932521

In [155]:
temp = S[0:6,0:4].T
print temp
print temp.T
idx_temp = np.array([[0,1],[4,-1],[3,-1]],dtype=np.int)
m = np.in1d(idx_temp,np.arange(6)).reshape((3,2))
print m
#idx_temp = np.array([[True,True,False,False],[True,False,True,True]])
print idx_temp
print (temp.T[idx_temp])


[[3 0 0 4 1 0]
 [3 1 0 4 1 1]
 [3 1 0 4 1 1]
 [3 1 0 4 2 1]]
[[3 3 3 3]
 [0 1 1 1]
 [0 0 0 0]
 [4 4 4 4]
 [1 1 1 2]
 [0 1 1 1]]
[[ True  True]
 [ True False]
 [ True False]]
[[ 0  1]
 [ 4 -1]
 [ 3 -1]]
[[[3 3 3 3]
  [0 1 1 1]]

 [[1 1 1 2]
  [0 1 1 1]]

 [[4 4 4 4]
  [0 1 1 1]]]

In [270]:
O[:,20,0]


Out[270]:
array([  3,  23,  34,  43,  47,  56,  74,  84,  94, 102, 120, 132, 141,
       148, 177, 181, 186, 230, 236, 243, 267, 271, 292, 307, 333, 335,
       338, 390, 393, 413, 440, 442, 469, 471, 490, 510, 520, 578, 617,
        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
        -1,  -1])

In [211]:
O4=O[:,0:3,0:2]
print O4.shape
#print O4[0:10,:,:]
Z_pos = Z.T[O4]
print Z_pos.shape
Xt = X[:,0:3,0:2].T
#print X[:,0:3,0]
#print X[:,0:3,1]
#print Xt
print Xt.shape
#print Xt
XZ_t = (Xt*Z_pos)
print XZ_t.shape
#XZ_t[:,0,1]


(80, 3, 2)
(80, 3, 2, 10)
(2, 3, 10)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-211-d92a31aecb0e> in <module>()
     10 print Xt.shape
     11 #print Xt
---> 12 XZ_t = (Xt*Z_pos)
     13 print XZ_t.shape
     14 #XZ_t[:,0,1]

ValueError: operands could not be broadcast together with shapes (2,3,10) (80,3,2,10) 

In [130]:
off = np.prod(pO[idx]) * np.prod(1-pO[~idx])
on = np.prod(pOp[idx]) * np.prod(1-pOp[~idx])
off_n = off /(off+on)
on_n = on /(off+on)
print off_n, on_n


0.85090434092 0.14909565908

In [262]:
1e-06


Out[262]:
1e-06

In [ ]:


In [259]:
X.dtype


Out[259]:
dtype('int8')

In [251]:



Out[251]:
(80, 30, 2000)

In [ ]:


In [248]:
from pickle import dump

dump(T, open('../data/synthetic2000/T.pkl', 'wb'))
dump(obs_jumps, open('../data/synthetic2000/obs_jumps.pkl', 'wb'))
dump(S, open('../data/synthetic2000/S.pkl', 'wb'))
dump(X, open('../data/synthetic2000/X.pkl', 'wb'))
dump(Z, open('../data/synthetic2000/Z.pkl', 'wb'))
dump(L, open('../data/synthetic2000/L.pkl', 'wb'))
dump(O, open('../data/synthetic2000/O_input.pkl', 'wb'))

In [ ]:


In [ ]:


In [ ]:


In [407]:
from pickle import dump

dump(T, open('../data/X_layer_100_patients/T.pkl', 'wb'))
dump(obs_jumps, open('../data/X_layer_100_patients/obs_jumps.pkl', 'wb'))
dump(S, open('../data/X_layer_100_patients/S.pkl', 'wb'))
dump(X, open('../data/X_layer_100_patients/X.pkl', 'wb'))
dump(Z, open('../data/X_layer_100_patients/Z.pkl', 'wb'))
dump(L, open('../data/X_layer_100_patients/L.pkl', 'wb'))
dump(O, open('../data/X_layer_100_patients/O_input.pkl', 'wb'))

Compute C Testing


In [8]:
obs_jumps[0,:]


Out[8]:
array([1, 1, 1, 1, 1, 4, 1, 1, 1, 4, 4, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0])

In [9]:
#pad the rest of the matrix with -1
obs_jumps = np.insert(obs_jumps, max_obs-1, -1, axis=1)
for n in range(N):
    obs_jumps[n,T[n]-1:] = -1

In [10]:
#convert observed jumps to their appropriate array index
step_sizes = np.sort(np.unique(obs_jumps))
step_sizes = step_sizes[step_sizes > 0]
obs_jump_ind = obs_jumps.copy()
for index, step in enumerate(step_sizes):
    obs_jump_ind[obs_jumps == step] = index

In [11]:
obs_jump_ind


Out[11]:
array([[ 0,  0,  0, ..., -1, -1, -1],
       [ 0,  0,  0, ..., -1, -1, -1],
       [ 0,  0,  0, ...,  0, -1, -1],
       ..., 
       [ 0,  0,  0, ..., -1, -1, -1],
       [ 0,  1,  0, ..., -1, -1, -1],
       [ 0,  2,  0, ...,  0, -1, -1]])

In [16]:
j = np.vstack((obs_jump_ind.flatten()[:-1]*M**2, S.flatten()[:-1]*M, S.flatten()[1:]))
q= np.sum(j[:,j[0,:] >= 0], axis=0)

In [21]:
np.bincount(q, minlength=3*M*M).reshape((3,M,M))


Out[21]:
array([[[  16,   14,    1,    1,    0,    0],
        [   0,   64,   13,    2,    0,    0],
        [   0,    0,   51,   26,    3,    1],
        [   0,    0,    0,  151,   20,   12],
        [   0,    0,    0,    0,   47,   42],
        [   0,    0,    0,    0,    0, 1070]],

       [[   1,    4,    0,    0,    0,    0],
        [   0,    6,    0,    2,    0,    0],
        [   0,    0,    2,    3,    1,    0],
        [   0,    0,    0,    6,    5,    5],
        [   0,    0,    0,    0,    0,    5],
        [   0,    0,    0,    0,    0,  110]],

       [[   1,    1,    0,    0,    0,    0],
        [   0,    6,    9,    5,    0,    1],
        [   0,    0,    1,    4,    3,    1],
        [   0,    0,    0,    7,    3,   10],
        [   0,    0,    0,    0,    1,    8],
        [   0,    0,    0,    0,    0,  150]]])

In [23]:
import theano.tensor as T
from theano import function
import theano
from theano.tensor.extra_ops import bincount
theano.config.compute_test_value = 'ignore'
theano.config.exception_verbosity = 'high'

obs_jump_ind_T = T.as_tensor_variable(obs_jump_ind, 'obs_jump_ind_T')
out0 = T.flatten(obs_jump_ind_T)[:-1]*M*M
keep = (out0 >= 0).nonzero()

S_T = T.lmatrix('S_T')


out1 = T.flatten(S_T)[:-1]*M
out2 = T.flatten(S_T)[1:]

indices=out0+out1+out2
k = indices[keep]
f = function([S_T], bincount(k,minlength=3*M*M).reshape(shape=np.array([3,M,M])))
C=f(S)
C


Out[23]:
array([[[  16,   14,    1,    1,    0,    0],
        [   0,   64,   13,    2,    0,    0],
        [   0,    0,   51,   26,    3,    1],
        [   0,    0,    0,  151,   20,   12],
        [   0,    0,    0,    0,   47,   42],
        [   0,    0,    0,    0,    0, 1070]],

       [[   1,    4,    0,    0,    0,    0],
        [   0,    6,    0,    2,    0,    0],
        [   0,    0,    2,    3,    1,    0],
        [   0,    0,    0,    6,    5,    5],
        [   0,    0,    0,    0,    0,    5],
        [   0,    0,    0,    0,    0,  110]],

       [[   1,    1,    0,    0,    0,    0],
        [   0,    6,    9,    5,    0,    1],
        [   0,    0,    1,    4,    3,    1],
        [   0,    0,    0,    7,    3,   10],
        [   0,    0,    0,    0,    1,    8],
        [   0,    0,    0,    0,    0,  150]]])

In [24]:
C[0,:,:]


Out[24]:
array([[  16,   14,    1,    1,    0,    0],
       [   0,   64,   13,    2,    0,    0],
       [   0,    0,   51,   26,    3,    1],
       [   0,    0,    0,  151,   20,   12],
       [   0,    0,    0,    0,   47,   42],
       [   0,    0,    0,    0,    0, 1070]])

Auxilary Distributions


In [8]:
from ContinuousTimeMarkovModel.distributions import *

In [9]:
from pymc3 import Metropolis, Continuous, Beta
from pymc3.distributions.discrete import Binomial

class Observations(Continuous):
    pass

Auxilary Samplers


In [10]:
from ContinuousTimeMarkovModel.forwardS import *

In [11]:
from pymc3.core import *
from pymc3.step_methods.arraystep import ArrayStepShared
from numpy import array, max, exp, cumsum, nested_iters, empty, searchsorted, ones
from numpy.random import uniform

from theano import theano
from theano.gof.graph import inputs
from theano.tensor import add 
from pymc3.theanof import make_shared_replacements

from pymc3.distributions import transforms

class sampleX(ArrayStepShared):
    """
    Use forward sampling (equation 10) to sample a realization of S_t, t=1,...,T_n
    given Q, B, and X constant.
    """
    def __init__(self, vars, model=None):
        model = modelcontext(model)
        #self.sh = ones(vars.dshape, vars.dtype)
        self.vars = vars
        shared = make_shared_replacements(vars, model)
        super(sampleX, self).__init__(vars, shared)

    def astep(self, q, logp):
        return q
        #p = array([logp(v * self.sh) for v in self.values])
        #return categorical(p, self.var.dshape)

Main


In [12]:
model = Model()

with model:
    pi = Dirichlet('pi', a = as_tensor_variable([0.5, 0.5, 0.5, 0.5, 0.5, 0.5]), shape=M)
    Q = DiscreteObsMJP_unif_prior('Q', M=M, shape=(M,M))
    
    S = DiscreteObsMJP('S', pi=pi, Q=Q, Tn=Tn, observed_jumps=obs_jumps, shape=(Tn))

    B0 = Beta('B0', alpha = 1, beta = 1, shape=(K,M))
    B = Beta('B', alpha = 1, beta = 1, shape=(K,M))

    Xobs = Comorbidities('Xobs', S=S, B0=B0,B=B, shape=(K, max_obs+1, N), observed = X_input)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-75e405eb411a> in <module>()
     16 with model:
     17     step1 = Metropolis(vars=[Q])
---> 18     step2 = forwardS(vars=[S], X=X_input)
     19     step3 = Metropolis(vars=[B])

/Users/Arya/Achievemint/ContinuousTimeMarkovModel/src/forwardS.py in __init__(self, vars, X, model)
     13         vars = inputvars(vars)
     14         shared = make_shared_replacements(vars, model)
---> 15         self.sampleS = sampleS()
     16 
     17         super(forwardS, self).__init__(vars, shared)

/Users/Arya/Achievemint/ContinuousTimeMarkovModel/src/forwardS.py in sampleS()
     33 
     34 def sampleS():
---> 35     B_printed = theano.printing.Print('B:')(B)
     36     f = theano.function([B], B_printed)
     37     return f

NameError: global name 'B' is not defined

In [ ]: