In [1]:
#simulation to perform many runs of the healthy vs. unhealthy dynamics

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import math
import sys
from tf_funs import *
from threechoice_dynamics import *

#tf.set_random_seed(101)
pi = math.pi
#np.random.seed(101)

sess = tf.Session()

In [2]:
#define the gradients and parameters of the simulation
xdim = 2 #state dimension
udim = 2 #control dimension

#tensors for state, control, observation, covariance
X_est = tf.placeholder(shape=xdim,dtype=tf.float32,name='X_est') #the state estimate
PI_est = tf.placeholder(shape = (xdim,xdim),dtype=tf.float32, name = 'PI_est') #estimated covariance
Y_tp1 = tf.placeholder(shape= xdim,dtype=tf.float32, name = 'Y_tp1') #the most recent observation
Control = tf.placeholder(shape = udim, dtype=tf.float32, name='Control')

#define the noise for the system
dt = 1.0e-4
gamma = 1.0e-4
sigsstate = (1./dt)*(1e-9)#state noise
sigsobs = 1e-6 #observation noise 

Q = sigsstate*np.eye(xdim)
Q_tf = tf.constant(Q,dtype=tf.float32, name = 'Q') #state noise covariance
R = sigsobs*np.eye(xdim)
R_tf = tf.constant(R,dtype=tf.float32, name = 'R') #observation noise covariance  

#graphs for updating state and observation
true_model_est = grad_threechoice_tf(X_est[0],X_est[1],dt,Control)
true_model_est_null = grad_threechoice_tf(X_est[0],X_est[1],dt,[0.,0.]) #state est. gradient null control
target_model_est = grad_threechoice_healthy_tf(X_est[0],X_est[1],dt) #state est. target dynamics
#the non-tensorflow anonymous functions, for generalizations
true_nontf = lambda x,c: grad_threechoice(x[0],x[1],dt,c)
target_nontf = lambda x: grad_threechoice_healthy(x[0],x[1],dt)   

X_plus,PI_plus = EKF(X_est,Y_tp1,PI_est,true_model_est,true_model_est_null,Q_tf,R_tf,xdim,dt)

Cnew = myopicController_meanonly(
    X_est,PI_est,Control,gamma,true_model_est,
    true_model_est_null,target_model_est,xdim,udim)

#covariance prediction update graph
Ak = dynamics_linearized(X_est,true_model_est_null,xdim)

#the full loss function, not just loss of mean values
loss_tf = loss_full(X_est,PI_est,true_model_est,target_model_est)

In [4]:
T = 1000
Tsstart = 0 #starting point for control on
tf.set_random_seed(152)
np.random.seed(152)
ns = 1 #number of samples

#------- the main simulation loop. I might make this a function if i use it a lot
statenoise = np.random.normal(0,sigsstate**0.5,[xdim,T,ns])
obsnoise = np.random.normal(0,sigsobs**0.5,[xdim,T,ns])
G = dt**(0.5)*np.eye(xdim) #system noise matrix, for covariance prediction

x_estvec = np.zeros((xdim,T,ns))
xvec = np.zeros((xdim,T,ns))
yvec = np.zeros((xdim,T,ns))
x_targvec = np.zeros((xdim,T,ns))
PI_estvec = np.zeros((xdim,xdim,T,ns))
contall = np.zeros((udim,T,ns))

loss = np.zeros((T,ns,4))
loss_nocont = np.zeros((T,ns,4))
target_dynvec = np.zeros((2,T,ns))#the value that x would have taken under target dynamics

lag = 1 #how many steps in the past will we recieve observations. 

init = tf.global_variables_initializer()
sess.run(init)
initvals = np.zeros((xdim,ns))

for m in range(ns):
    x_init = np.random.uniform(0.,1.,(2,))
    initvals[:,m] = x_init
    print(x_init)
    PI_init = [[0.,0.],[0.,0.]] #initial covariance
    c_init = [0.,0.]

    xest_k = x_init
    pi_k = PI_init
    c_k = c_init
    x_k = x_init
    x_targ_k = x_init
    ykp1 = np.array(x_init)

    x_estvec[:,0,m] = x_init
    xvec[:,0,m] = x_init
    x_targvec[:,0,m] = x_init
    PI_estvec[:,:,0,m] = PI_init

    print(m)

    #go ahead and propagate lag-steps ahead before starting state estimation and such
    for k in range(1,lag):
        #update actual dynamics
        grad_cont = true_nontf(xvec[:,k-1,m],c_init)
        grad_targ = target_nontf(x_targvec[:,k-1,m])

        xvec[:,k,m] = xvec[:,k-1,m] + grad_cont + statenoise[:,k,m]
        x_targvec[:,k,m] = x_targvec[:,k-1,m] + grad_targ + statenoise[:,k,m]
        yvec[:,k,m] = xvec[:,k,m] + obsnoise[:,k,m]

        #set estimates in beginning lags to initial state
        x_estvec[:,k,m] = x_init
        PI_estvec[:,:,k,m] = PI_init
        print('updating trajectory index ' + str(k))
        
    print('end pre loop')

    for k in range(max(1,lag),T): 
        #update actual dynamics
        grad_cont = true_nontf(xvec[:,k-1,m],contall[:,k-1,m])
        grad_targ = target_nontf(x_targvec[:,k-1,m])
        xvec[:,k,m] = xvec[:,k-1,m] + grad_cont + statenoise[:,k,m]
        x_targvec[:,k,m] = x_targvec[:,k-1,m] + grad_targ + statenoise[:,k,m]
        yvec[:,k,m] = xvec[:,k,m] + obsnoise[:,k,m]
        #print('begin loop')
        #print('updating trajectory index ' + str(k))
        
        #old indexing. likely wrong
        #test = sess.run([X_plus,PI_plus],
        #                {X_est:x_estvec[:,k-lag-1,m], PI_est:PI_estvec[:,:,k-lag-1,m],
        #                 Control:contall[:,k-lag-1,m], Y_tp1:yvec[:,k-lag,m]})
        #x_estvec[:,k-lag,m] = test[0]
        #PI_estvec[:,:,k-lag,m] = test[1]
        
        #new indexing
        test = sess.run([X_plus,PI_plus],
                        {X_est:x_estvec[:,k-lag,m], PI_est:PI_estvec[:,:,k-lag,m],
                         Control:contall[:,k-lag,m], Y_tp1:yvec[:,k-lag+1,m]})
        x_estvec[:,k-lag+1,m] = test[0]
        PI_estvec[:,:,k-lag+1,m] = test[1]
        #print('updating state estimate index ' + str(k-lag+1))

        
        #predit lag states in the future to calculate control
        
        #old indexing
        #x_est_n = x_estvec[:,k-lag,m]
        #PI_est_n = PI_estvec[:,:,k-lag,m]
        #new indexing
        x_est_n = x_estvec[:,k-lag+1,m]
        PI_est_n = PI_estvec[:,:,k-lag+1,m]
        
        for n in range(1,lag):
            #state prediction step
            grad_cont = true_nontf(x_est_n,contall[:,k-lag+1+n,m])

            #covariance prediction step. calculate jacobian. expand about which control pt?
            Ak_n= sess.run(Ak,
                        {X_est: x_est_n, PI_est: PI_est_n,
                         Control: contall[:,0,m], Y_tp1:yvec[:,0,m]})

            x_est_n = x_est_n + grad_cont
            PI_est_n = np.matmul(Ak_n,PI_est_n) + np.matmul(PI_est_n,np.transpose(Ak_n)) + np.matmul(
                np.matmul(G,Q),np.transpose(G))

            #print('propagate state estimate' + str(k-lag+1+n))
        #run myopic controller using predicted state estimated. cov, doesnt matter
        #find control for time k
        
        if k > Tsstart:
            c_k = sess.run(Cnew,{X_est:x_est_n, PI_est:PI_est_n,
                             Control:contall[:,k-1,m], Y_tp1:yvec[:,k,m]})
            contall[:,k,m] = c_k
        #print('update control index ' + str(k))
        
        #old indexing
        
        #loss[k-lag,m] = np.linalg.norm(true_nontf(x_estvec[:,k-lag,m],contall[:,k-lag,m])-
        #                               target_nontf(x_estvec[:,k-lag,m]))
        #ltest = sess.run(loss_tf,{X_est:x_estvec[:,k-lag,m],
        #                                       PI_est:PI_estvec[:,:,k-lag,m],
        #                                       Control:contall[:,k-lag,m]
        #                                      })
        #loss[k-lag,m,:] = ltest
       # 
    #
    #    ltest = sess.run(loss_tf,{X_est:x_estvec[:,k-lag,m],
    #                                           PI_est:PI_estvec[:,:,k-lag,m],
    #                                           Control:np.array([0.,0.])
    #                                          })
    #    loss_nocont[k-lag,m,:] = ltest
    #    
    #    #value of target dynamics at that timestep
    #    target_dynvec[:,k-lag,m] = target_nontf(x_estvec[:,k-lag,m])
    #    
    
        #new indexing

        ltest = sess.run(loss_tf,{X_est:x_estvec[:,k-lag+1,m],
                                               PI_est:PI_estvec[:,:,k-lag+1,m],
                                               Control:contall[:,k-lag+1,m]
                                              })
        loss[k-lag+1,m,:] = ltest
        
        ltest = sess.run(loss_tf,{X_est:x_estvec[:,k-lag+1,m],
                                               PI_est:PI_estvec[:,:,k-lag+1,m],
                                               Control:np.array([0.,0.])
                                              })
        loss_nocont[k-lag+1,m,:] = ltest
        
        #value of target dynamics at that timestep
        target_dynvec[:,k-lag+1,m] = target_nontf(x_estvec[:,k-lag+1,m])
        
        
    #set final lag estimate values to esimate. changed indexing
    print('post loop')
    for k in range(lag-1):
        #print('update state estimate ' + str(T-lag+1+k))
        x_targvec[:,T-lag+1+k,m] = x_targvec[:,T-lag,m]
        x_estvec[:,T-lag+1+k,m] = x_estvec[:,T-lag,m]
        PI_estvec[:,:,T-lag+1+k,m] = PI_estvec[:,:,T-lag,m]


[ 0.31828541  0.95536256]
0
end pre loop
post loop

In [8]:
#------------ save
import pickle
fname = 'threechoice_debug_1en6_lag5_fixed_index.p'
#fname = sys.argv[1]
index = ['x_estvec','x_targvec','PI_estvec','contall','loss','loss_nocont','target_dynvec']
alldata = [index,x_estvec,x_targvec,PI_estvec,contall,loss,loss_nocont,target_dynvec]
pickle.dump( alldata, open( fname, "wb" ) )

In [17]:
print(x_estvec[:,-4,0])


[ 0.  0.]

In [5]:
#true state
tind = 0
plt.plot(x_estvec[0,:,tind],x_estvec[1,:,tind],label='controlled dynamics')
plt.plot(x_targvec[0,:,tind],x_targvec[1,:,tind],label = 'target dynamics')
plt.plot(x_targvec[0,0,tind],x_targvec[1,0,tind],'kx',markersize=10)
plt.legend()
plt.xlabel('S_1')
plt.ylabel('S_2')
plt.title('controlled trajecotory')
plt.show()

tind = 0
plt.plot(np.trace(PI_estvec[:,:,3:30,tind],0,0,1),label='covariance')
plt.legend()
plt.xlabel('timesteps')
plt.ylabel('Tr[Sigma]')
plt.title('trace of covariance')
plt.show()

#plt.plot(test[2][:,0]+ (test[3][:,0,0])**0.5)
#plt.plot(test[2][:,0]- (test[3][:,0,0])**0.5)
#plt.show()
plt.plot(contall[0,:,tind],label='control on S_1, full myopic')
plt.plot(contall[1,:,tind],label='control on S_2, full myopic')
plt.ylabel('C(t)')
plt.title('control signals')
plt.xlabel('timesteps')
plt.legend()
plt.show()

#expected state
fullloss = plt.plot((loss[:,tind,0])**0.5,label='full loss')
fullloss = plt.plot((loss[:,tind,1])**0.5,label='mean exp loss')
#nocontloss = plt.plot(loss_nocont[0,:,tind]),label='no control')
#nolagloss = plt.plot(np.cumsum(loss_nolag[:,tind]),label='no lag,full myopic')
#loss_mo = plt.plot(loss[:,tind],label='lagged, mean-only myopic')
plt.xlabel('time')
plt.ylabel('loss')
plt.title('cumulative loss over time, sum_i(loss_i)')
plt.legend()
plt.show()
#standard deviation


tind = 0
plt.plot(x_estvec[0,:,tind],label='controlled dynamics, x')
plt.plot(x_targvec[0,:,tind],label='controlled dynamics, x')
plt.plot(xvec[0,:,tind],label='controlled dynamics, x')
plt.legend()
plt.title('controlled x 1 position')
plt.show()



In [ ]: