In [3]:
import pymc3 as pm
import numpy as np
N = 100
M = 6
K = 10
D = 721
Dd = 80
min_obs = 10
max_obs = 30
np.random.seed(1729)
T = np.random.choice(np.arange(min_obs-1,max_obs), N) + 1
step_sizes = [1,2,4]
import rpy2
%load_ext rpy2.ipython
In [4]:
%Rpush T N M
In [5]:
%%R
library(smfsb)
#initial state distribution
pi <- c(0.147026,0.102571,0.239819,0.188710,0.267137,0.054738)
#pi <- c(1,0,0,0,0,0)
#transition rate matrix
Q <- matrix(c(-0.631921,0.631921,0.000000,0.000000,0.000000,0.000000,
0.000000,-0.229485,0.229485,0.000000,0.000000,0.000000,
0.000000,0.000000,-0.450538,0.450538,0.000000,0.000000,
0.000000,0.000000,0.000000,-0.206042,0.206042,0.000000,
0.000000,0.000000,0.000000,0.000000,-0.609582,0.609582,
0.000000,0.000000,0.000000,0.000000,0.00001,-0.00001), nrow=M, ncol=M,byrow=TRUE)
rcfmc_wrapper <- function(dummy){
return(rcfmc(M, Q, pi))
}
set.seed(1729)
sampled_trajectories <- lapply(1:N, rcfmc_wrapper)
In [6]:
#make 1 the most likely step size to happen because
#this is what David's data is like
step_sizes = [1,1,1,1,1,1,1,1,2,4]
np.random.seed(1729)
obs_jumps = np.zeros((N,max_obs-1), dtype=np.int) # Per user observed jumps in stages, sampled and zero-padded
for user_num, Tn in zip(range(N),T):
obs_jumps[user_num,:(Tn-1)] = np.random.choice(step_sizes, Tn-1)
obs_times = np.insert(np.cumsum(obs_jumps, axis=1),0,0, axis=1)
%Rpush obs_times
In [7]:
%%R
#zipped up index and matrix to use apply function
index_and_obs_times <- cbind(1:100, obs_times)
getS <- function(index_and_obs_times){
n_obs <- length(index_and_obs_times)
S <- sampled_trajectories[[index_and_obs_times[1]]](index_and_obs_times[2:n_obs])
return(S)
}
S <- apply(index_and_obs_times, 1, getS)
S <- t(S)
In [21]:
%Rpull S
S = S.astype(np.int32) - 1
if np.any(np.diff(S)<0):
raise Exception('S goes down a stage at some point!')
In [22]:
import itertools
B = np.array([
[0.000001,0.760000,0.720000,0.570000,0.700000,0.610000],
[0.000001,0.460000,0.390000,0.220000,0.200000,0.140000],
[0.000001,0.620000,0.620000,0.440000,0.390000,0.240000],
[0.000001,0.270000,0.210000,0.170000,0.190000,0.070000],
[0.000001,0.490000,0.340000,0.220000,0.160000,0.090000],
[0.000001,0.620000,0.340000,0.320000,0.240000,0.120000],
[0.000001,0.550000,0.390000,0.320000,0.290000,0.150000],
[0.000001,0.420000,0.240000,0.170000,0.170000,0.110000],
[0.000001,0.310000,0.300000,0.230000,0.190000,0.110000],
[0.000001,0.470000,0.340000,0.190000,0.190000,0.110000]])
B0 = np.array([
[0.410412,0.410412,0.418293,0.418293,0.429890,0.429890],
[0.240983,0.240983,0.240983,0.240983,0.240983,0.240983],
[0.339714,0.339714,0.339714,0.339714,0.339714,0.339714],
[0.130415,0.130415,0.130415,0.130415,0.130415,0.130415],
[0.143260,0.143260,0.143260,0.143260,0.143260,0.143260],
[0.211465,0.211465,0.211465,0.211465,0.211465,0.211465],
[0.194187,0.194187,0.194187,0.194187,0.194187,0.194187],
[0.185422,0.185422,0.185422,0.185422,0.185422,0.185422],
[0.171973,0.171973,0.171973,0.171973,0.171973,0.171973],
[0.152277,0.152277,0.152277,0.152277,0.152277,0.152277]])
X = np.zeros((K, max_obs, N), dtype=np.int8)
for n, k in itertools.product(range(N), range(K)):
# n: user number
# T[n]: time of observation for user
# k: comorbidity number
# Initialize comorbidities with initial comborbidity distribution (B0)
X[k, 0, n] = np.random.binomial(n=1, p=B0[k,S[n,0]])
for time_step in range(1,max_obs):
state_not_changed = (S[n,time_step] == S[n,time_step-1])
comorb_already_present = (X[k,time_step-1,n]==1)
if state_not_changed or comorb_already_present:
X[k,time_step,n] = X[k,time_step-1,n]
else:
# If not, chance of comorbidity is a function of current state
X[k,time_step,n] = np.random.binomial(n=1, p=B[k,S[n,time_step]])
In [23]:
Z = np.loadtxt('../data/synthetic/Z.txt')
L = np.loadtxt('../data/synthetic/L.txt')
#X = load(open('../data/X_layer_100_patients/X.pkl', 'rb'))
O = np.zeros((Dd, max_obs,N), dtype=np.int) - 1
pO = np.zeros((D,max_obs,N))
pOp = np.zeros((D,max_obs,N))
Xp = np.copy(X)
Xp[0,:,5] = 1
for n in range(N):
for time_step in range(0,T[n]):
pO[:,time_step,n] = 1 - (1-L)*np.prod(1-(X[:,time_step,n]*Z.T), axis=1)
pOp[:,time_step,n] = 1 - (1-L)*np.prod(1-(Xp[:,time_step,n]*Z.T), axis=1)
codes = np.where(np.random.binomial(n=1, p=pO[:,time_step,n]))[0]
O[0:len(codes),time_step,n] = codes
In [33]:
#from pickle import load
#test_O= load(open('../data/X_layer_100_patients/O_input.pkl', 'rb'))
In [35]:
from pickle import dump
dump(O, open('../data/X_layer_100_patients/O_input.pkl', 'wb'))
In [341]:
idx = np.in1d(np.arange(D), O[:,0,0])
Z_factor = np.prod(1-Z[0,np.logical_not(idx)])
print Z_factor
off = 4.45961100e-07
on = 2.23648853e-06*Z_factor
off_n = off / (on+off)
on_n = on / (on+off)
print off_n, on_n
np.mean((1-Z[0,np.logical_not(idx)]) > 0.99)
Out[341]:
In [239]:
from pickle import load
O = load(open('../data/X_layer_100_patients/O_input.pkl', 'rb'))
print X[:,0,5]
print Xp[:,0,5]
idx = np.in1d(np.arange(D), O[:,0,5])
print sum(idx)
pO = np.zeros(D)
pOp = np.zeros(D)
pO = 1 - (1-L)*np.prod(1-(X[:,22,0]*Z.T), axis=1)
pOp = 1 - (1-L)*np.prod(1-(Xp[:,22,0]*Z.T), axis=1)
off = np.prod(pO[idx]) * np.prod(1-pO[~idx])
on = np.prod(pOp[idx]) * np.prod(1-pOp[~idx])
print on/off
other_k = np.prod(1-X[1:10,0,5]*Z[1:10,idx].T, axis=1)
print other_k
on = np.prod(1-Z[0,~idx])*np.prod(1-(1-L[idx])*(1-Z[0,idx])*other_k)
off = np.prod(1-(1-L[idx])*other_k)
print on
print on/off
In [155]:
temp = S[0:6,0:4].T
print temp
print temp.T
idx_temp = np.array([[0,1],[4,-1],[3,-1]],dtype=np.int)
m = np.in1d(idx_temp,np.arange(6)).reshape((3,2))
print m
#idx_temp = np.array([[True,True,False,False],[True,False,True,True]])
print idx_temp
print (temp.T[idx_temp])
In [270]:
O[:,20,0]
Out[270]:
In [211]:
O4=O[:,0:3,0:2]
print O4.shape
#print O4[0:10,:,:]
Z_pos = Z.T[O4]
print Z_pos.shape
Xt = X[:,0:3,0:2].T
#print X[:,0:3,0]
#print X[:,0:3,1]
#print Xt
print Xt.shape
#print Xt
XZ_t = (Xt*Z_pos)
print XZ_t.shape
#XZ_t[:,0,1]
In [130]:
off = np.prod(pO[idx]) * np.prod(1-pO[~idx])
on = np.prod(pOp[idx]) * np.prod(1-pOp[~idx])
off_n = off /(off+on)
on_n = on /(off+on)
print off_n, on_n
In [262]:
1e-06
Out[262]:
In [ ]:
In [259]:
X.dtype
Out[259]:
In [251]:
Out[251]:
In [ ]:
In [248]:
from pickle import dump
dump(T, open('../data/synthetic2000/T.pkl', 'wb'))
dump(obs_jumps, open('../data/synthetic2000/obs_jumps.pkl', 'wb'))
dump(S, open('../data/synthetic2000/S.pkl', 'wb'))
dump(X, open('../data/synthetic2000/X.pkl', 'wb'))
dump(Z, open('../data/synthetic2000/Z.pkl', 'wb'))
dump(L, open('../data/synthetic2000/L.pkl', 'wb'))
dump(O, open('../data/synthetic2000/O_input.pkl', 'wb'))
In [ ]:
In [ ]:
In [ ]:
In [407]:
from pickle import dump
dump(T, open('../data/X_layer_100_patients/T.pkl', 'wb'))
dump(obs_jumps, open('../data/X_layer_100_patients/obs_jumps.pkl', 'wb'))
dump(S, open('../data/X_layer_100_patients/S.pkl', 'wb'))
dump(X, open('../data/X_layer_100_patients/X.pkl', 'wb'))
dump(Z, open('../data/X_layer_100_patients/Z.pkl', 'wb'))
dump(L, open('../data/X_layer_100_patients/L.pkl', 'wb'))
dump(O, open('../data/X_layer_100_patients/O_input.pkl', 'wb'))
In [8]:
obs_jumps[0,:]
Out[8]:
In [9]:
#pad the rest of the matrix with -1
obs_jumps = np.insert(obs_jumps, max_obs-1, -1, axis=1)
for n in range(N):
obs_jumps[n,T[n]-1:] = -1
In [10]:
#convert observed jumps to their appropriate array index
step_sizes = np.sort(np.unique(obs_jumps))
step_sizes = step_sizes[step_sizes > 0]
obs_jump_ind = obs_jumps.copy()
for index, step in enumerate(step_sizes):
obs_jump_ind[obs_jumps == step] = index
In [11]:
obs_jump_ind
Out[11]:
In [16]:
j = np.vstack((obs_jump_ind.flatten()[:-1]*M**2, S.flatten()[:-1]*M, S.flatten()[1:]))
q= np.sum(j[:,j[0,:] >= 0], axis=0)
In [21]:
np.bincount(q, minlength=3*M*M).reshape((3,M,M))
Out[21]:
In [23]:
import theano.tensor as T
from theano import function
import theano
from theano.tensor.extra_ops import bincount
theano.config.compute_test_value = 'ignore'
theano.config.exception_verbosity = 'high'
obs_jump_ind_T = T.as_tensor_variable(obs_jump_ind, 'obs_jump_ind_T')
out0 = T.flatten(obs_jump_ind_T)[:-1]*M*M
keep = (out0 >= 0).nonzero()
S_T = T.lmatrix('S_T')
out1 = T.flatten(S_T)[:-1]*M
out2 = T.flatten(S_T)[1:]
indices=out0+out1+out2
k = indices[keep]
f = function([S_T], bincount(k,minlength=3*M*M).reshape(shape=np.array([3,M,M])))
C=f(S)
C
Out[23]:
In [24]:
C[0,:,:]
Out[24]:
In [8]:
from ContinuousTimeMarkovModel.distributions import *
In [9]:
from pymc3 import Metropolis, Continuous, Beta
from pymc3.distributions.discrete import Binomial
class Observations(Continuous):
pass
In [10]:
from ContinuousTimeMarkovModel.forwardS import *
In [11]:
from pymc3.core import *
from pymc3.step_methods.arraystep import ArrayStepShared
from numpy import array, max, exp, cumsum, nested_iters, empty, searchsorted, ones
from numpy.random import uniform
from theano import theano
from theano.gof.graph import inputs
from theano.tensor import add
from pymc3.theanof import make_shared_replacements
from pymc3.distributions import transforms
class sampleX(ArrayStepShared):
"""
Use forward sampling (equation 10) to sample a realization of S_t, t=1,...,T_n
given Q, B, and X constant.
"""
def __init__(self, vars, model=None):
model = modelcontext(model)
#self.sh = ones(vars.dshape, vars.dtype)
self.vars = vars
shared = make_shared_replacements(vars, model)
super(sampleX, self).__init__(vars, shared)
def astep(self, q, logp):
return q
#p = array([logp(v * self.sh) for v in self.values])
#return categorical(p, self.var.dshape)
In [12]:
model = Model()
with model:
pi = Dirichlet('pi', a = as_tensor_variable([0.5, 0.5, 0.5, 0.5, 0.5, 0.5]), shape=M)
Q = DiscreteObsMJP_unif_prior('Q', M=M, shape=(M,M))
S = DiscreteObsMJP('S', pi=pi, Q=Q, Tn=Tn, observed_jumps=obs_jumps, shape=(Tn))
B0 = Beta('B0', alpha = 1, beta = 1, shape=(K,M))
B = Beta('B', alpha = 1, beta = 1, shape=(K,M))
Xobs = Comorbidities('Xobs', S=S, B0=B0,B=B, shape=(K, max_obs+1, N), observed = X_input)
In [ ]: