In [ ]:

    
import numpy as np
import pylab as pb
import GPy
import scipy.io
import matplotlib.pyplot as plt
%matplotlib inline

Readind Y is the gene expression(n,t) n= 45038, t= 64, and T matrix is time series (t,)t=64, and S is connectivity matrix between gene expression and TF (n,q) n=45038 , q=69.



In [2]:

    
Y=np.load("/users/suraalrashid/expression.npy")
t = np.asarray([30,30,30,30,60,60,60,60,90,90,90,90,120,120,120,120,30,30,30,30,60,60,60,60,90,90,90,90,120,120,120,120,30,30,30,30,60,60,60,60,90,90,90,90,120,120,120,120,30,30,30,30,60,60,60,60,90,90,90,90,120,120,120,120])
t = t[:, None]
Y=Y[0:45038,:]
Y=Y[0:100,:]



def con_mat_True_False():
    X=np.load("/users/suraalrashid/connectivity matrix.npy")
    transcription_factors = X[0,1:70]
    annotations = X[1:45039,0]
    #X = X[transcription_factors]
    
    X=X[1:45039,1:70]
    return  (annotations, X,  transcription_factors)


annotations,X,transcription_Factors= con_mat_True_False()
# set S to find relationships where p-value is less than 1e-3
#S = data['X'].T<1e-3
S=np.repeat(0,45038*69).reshape(45038,69)
for i in range(45038):
        for j in range(69):
            S[i,j]=1 if X[i,j] =='1' else 0
S=S[0:100,0:7]
TF=transcription_Factors[0:7]

Compute SVD processing R is (n,n) , Lambda1(q), make Lambda(q,q) from Lambda1, V is (q,q)



In [3]:

    
# step 1, find the SVD of S.
n, q = S.shape
T = Y.shape[1]

R, Lambda1, V = np.linalg.svd(S)
# Extract first q columns for Q
Q = R[:, :q]
# remaining columns for U
U = R[:, q:]
print n,q,T
Lambda=np.zeros(q*q).reshape(q,q)
for i in range(q):
    Lambda[i,i]=Lambda1[i]

To compute sigma2 must be compute Y_u fron U matrix



In [4]:

    
# Find sigma2 by looking at variance of y_u
Y_u = np.dot(U.T, Y)
sigma2 = 1./(T*(n-q))*(Y_u*Y_u).sum()
print "sigma2 found as", sigma2









    



sigma2 found as 0.245147250992

To getting X and Y, X make from 3 matrix the first matrix is t (time series), the second matrix for coregionalization for 2 strains and 2 mutations, and the third matrix for coregionalization for transcription factors (q), X is (t*q,3 ),

and Y is made from Q where it's shape is (q*t,)



In [5]:

    
# Prepare the data for processing in GPy
Y_q = np.dot(Q.T, Y) # project data onto the principal subspace of X 

# Generate the input associated with each Y, the TF and the time point.
s_m=np.asarray([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
                2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
                3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3])
x0, x1= np.asarray(   np.meshgrid(  t.flatten(),np.arange(q)  )    )
x2,_= np.asarray(   np.meshgrid( s_m.flatten(),np.arange(q)  )    )
#print x0.shape,x1.shape, x2.shape
#print x0,x1,x2
Xt=np.hstack([t.flatten()[:, None],s_m.flatten()[:,None]])
#np.save("/home/sura/GPy/TF/Xt.npy",Xt)

X = np.hstack([x0.flatten()[:, None], x2.flatten()[:, None],x1.flatten()[:,None]])
#np.save("/home/sura/GPy/TF/Xt.npy",X)
np.save("Xt.npy",X)

#print X.shape, X
y = Y_q.flatten()[:, None]
#print Y_q.flatten()[:, None].shape

Saveing the V , Lambda and Q to compute the mean and cov. later



In [6]:

    
np.save("V.npy", V)
np.save("Lambda.npy", Lambda)
np.save("Q.npy", Q)



In [7]:

    
kern = GPy.kern.RBF(1, active_dims=[0])*GPy.kern.Coregionalize(1,4, rank=4,active_dims=[1])*GPy.kern.Coregionalize(1,q, rank=q,active_dims=[2])
#kern = GPy.kern.RBF(1, active_dims=[0])*GPy.kern.Coregionalize(1,q, rank=q,active_dims=[1])*GPy.kern.Coregionalize(1,4, rank=4,active_dims=[2])

m = GPy.models.GPRegression(X, y, kern)
#m.mul.rbf.lengthscale = 50
m.mul.mul.rbf.lengthscale=50
m.Gaussian_noise.variance = sigma2
#m.optimize(messages=True)



In [8]:

    
#import GPy.inference.latent_function_inference.inference_method
print m
K1= m.kern.mul.coregion.K(Xt)#self.X)
print K1.shape









    



  GP_regression.           |      Value       |  Constraint  |  Prior  |  Tied to
  mul.mul.rbf.variance     |             1.0  |     +ve      |         |         
  mul.mul.rbf.lengthscale  |            50.0  |     +ve      |         |         
  mul.mul.coregion.W       |          (4, 4)  |              |         |         
  mul.mul.coregion.kappa   |            (4,)  |     +ve      |         |         
  mul.coregion.W           |          (7, 7)  |              |         |         
  mul.coregion.kappa       |            (7,)  |     +ve      |         |         
  Gaussian_noise.variance  |  0.245147250992  |     +ve      |         |         
(64, 64)



In [9]:

    
si=np.dot(m.mul.coregion.W,m.mul.coregion.W.T)+np.diag(m.mul.coregion.kappa)
#we have si , V, Lambda 
#np.save("/home/sura/GPy/TF/Si.npy", si)
np.save("Si.npy", si)
print si.shape









    



(7, 7)



In [ ]:

    
for i in range(q):
    m.plot(fixed_inputs=[(1, 0),(2,i),],linecol='Red',fillcol='lightBlue',which_data_rows=range(0,16*q))   #,(1,3)])#,(1,0),(1,0),(1,0),(1,1)])
    m.plot(fixed_inputs=[(1, 1),(2,i)],linecol='Blue',which_data_rows=range(16*q,32*q),newfig=False)   #,(1,0)])#,(1,0),(1,0),(1,0),(1,2)])
    m.plot(fixed_inputs=[(1, 2),(2,i)],linecol='Green',which_data_rows=range(32*q,48*q),newfig=False)   #,(1,0)])#,(1,0),(1,0),(1,0),(1,3)])
    m.plot(fixed_inputs=[(1, 3),(2,i)],linecol='Yellow',which_data_rows=range(48*q,64*q),newfig=False) #,(1,0)])#,(1,0),(1,0),(1,0),(1,4)])
    pb.xlabel("Time series")
    pb.ylabel("connectivity info. between \n Transcripton Factor {} \n and group from genes".format (transcription_Factors[i]))

The below code show how compute mean and covariance, and then plot model , from the last block begin the execuation



In [11]:

    
X.shape
G=np.zeros(200).reshape(200)
print G.shape

import sys
import warnings
from GPy import kern
from GPy.util.linalg import dtrtrs
#from GPy.model import Model
#from parameterization import ObsAr
from GPy import likelihoods
from GPy.util import diag
from GPy.util.linalg import pdinv, pdinv_post, dpotrs, tdot
from GPy.util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol
from GPy.likelihoods.gaussian import Gaussian
#from GPy.inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
#from parameterization.variational import VariationalPosterior
#from GPy.plotting.matplot_dep.base_plots import x_frame1D
#import Tango
#from base_plots import gpplot, x_frame1D, x_frame2D
from GPy.plotting.matplot_dep.base_plots import gpplot, x_frame1D, x_frame2D
#from ...util.misc import param_to_array
from GPy.util.misc import param_to_array
#from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
#from GPy.models.gp_coregionalized_regression import GPCoregionalizedRegression
#from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
#from GPy.models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression

def _raw_predict_post(self, _Xnew, full_cov=False, kern=None):

    YYT_factor = self.Y
    
    si=np.load("Si.npy")
    Vv=np.load("V.npy")
    Lambda=np.load("Lambda.npy")
    Q=np.load("Q.npy")
    t=np.load("Xt.npy")
    t=t[:64,:3]
    t[:,0]=self.X[0:64,0]
    t[:,1]=_Xnew[0,1]
    t[:,2]=_Xnew[0,2]

    #K1= self.kern.mul.rbf.K(t)#self.X)
    K1=self.mul.mul.coregion.K(t)
    #print 'K.shape',K.shape  


    #np.save("/Users/suraAlrashid/GPy/TF/K.npy",K)


    te=np.dot(np.dot(np.dot(Lambda,Vv.T), si),np.dot(Vv,Lambda))
    K=np.kron(K1,te)

    Ky = K.copy()
    diag.add(Ky, self.likelihood.gaussian_variance(self.Y_metadata))

    LW = pdinv_post(Ky)
    
    YYT_factor=YYT_factor.reshape(YYT_factor.shape[0],1)

    #print 'YYT_factor',YYT_factor.shape
    alpha, _ = dpotrs(LW, YYT_factor, lower=1)


    """K = kern.K(X)"""
    self.woodbury_chol=LW 
    self.woodbury_vector=alpha 
    self.woodbury_inv, _ = dpotri(self.woodbury_chol, lower=1)

    symmetrify(self.woodbury_inv)

    te1=si
    te2=np.dot( si,np.dot(Vv,Lambda))
    te3=np.dot(np.dot(Lambda,Vv.T), si)
    te=np.dot(np.dot(np.dot(Lambda,Vv.T), si),np.dot(Vv,Lambda))



    """mu=self.posterior.mean(self.likelihood)
    var=self.posterior.covariance(self.likelihood)"""

    #Kx = self.kern.mul.rbf.K(t,_Xnew)
    Kx=self.mul.mul.coregion.K(t,_Xnew)
    #print 'Kx',Kx

    mu  = np.dot(np.kron(Kx.T,te2), self.woodbury_vector)
    #mu  = np.dot(np.kron(K1,te2), self.woodbury_vector)
   #print 'mu.',mu

    #Kxx = self.kern.mul.rbf.K(_Xnew)
    Kxx=self.mul.mul.coregion.K(_Xnew)
    var = np.atleast_3d(np.kron(Kxx,te1)) - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, np.kron(Kx,te2)), np.kron(Kx,te3), [1,0]).T
    #var = np.atleast_3d(np.kron(K1,te1)) - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, np.kron(K1,te2)), np.kron(K1,te3), [1,0]).T

    #print 'var',var.shape
    #K = self.kern.mul.rbf.K(t)

    #pb.figure()
    #full_K = np.vstack([np.hstack([K, Kx]), np.hstack([Kx.T, Kxx])])

    #pb.imshow(full_K)
    #pb.colorbar
    return mu, var


def predict_post(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
    #predict the latent function values
    mu, var = _raw_predict_post(self,Xnew, full_cov=full_cov, kern=kern)
    #print mu.shape, var.shape
    # now push through likelihood
    mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
    return mean, var


def predict_quantiles_post(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
    m, v = _raw_predict_post(self,X,  full_cov=False)
    #print 'predict_quantiles_post, sura '
    return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)


def plot_postt(model, newfig=True,plot_limits=None, which_data_rows='all',
        which_data_ycols='all', fixed_inputs=[],
        levels=20, samples=0, fignum=None, ax=None, resolution=None,
        plot_raw=False,
        linecol='darkBlue',fillcol='lightBlue', Y_metadata=None, data_symbol='kx'):
    #deal with optional arguments
    if which_data_rows == 'all':
        which_data_rows = slice(None)
    if which_data_ycols == 'all':
        which_data_ycols = np.arange(model.output_dim)
    #if len(which_data_ycols)==0:
        #raise ValueError('No data selected for plotting')
    if ax is None:
        if newfig:
            fig = pb.figure(num=fignum)
            ax = fig.add_subplot(111)
        else :
            
            fig = pb.gcf()
            ax = fig.add_subplot(111)

    if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
        X = model.X.mean
        X_variance = param_to_array(model.X.variance)
    else:
        X = model.X
    X, Y = param_to_array(X, model.Y)
    if hasattr(model, 'Z'): Z = param_to_array(model.Z)

    #work out what the inputs are for plotting (1D or 2D)
    fixed_dims = np.array([i for i,v in fixed_inputs])
    free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
    plots = {}
    #one dimensional plotting
    #print len(free_dims)
    if len(free_dims) == 1:

        #define the frame on which to plot
        
        t=np.load("Xt.npy")
        t=t[:64,:3]

        Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits, resolution=resolution or 200)
        #print Xnew.shape,'XNew'
        Xgrid = np.empty((Xnew.shape[0],model.input_dim))
        Xgrid[:,free_dims] = Xnew
        for i,vz in fixed_inputs:
            Xgrid[:,i] = vz
        #print 'Xgrid',Xgrid
        #print 'xgrid',Xgrid.shape
        #make a prediction on the frame and plot it
        ii= [v for i,v in fixed_inputs]
        ii =ii[1]
        if plot_raw:
            m, v = _raw_predict_post(model,Xgrid)
            np.save("m.npy",m[200*ii:200*(ii+1), 0])
           

            lower = m - 2*np.sqrt(v)
            upper = m + 2*np.sqrt(v)
        else:
            if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
                meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
            else:
                meta = None
            m, v = predict_post(model,Xgrid, full_cov=False, Y_metadata=meta)
            #print 'meta',meta
            lower, upper = predict_quantiles_post(model,Xgrid, Y_metadata=meta)

            np.save("m.npy",m[200*ii:200*(ii+1), 0])
            #print 'm[200*ii:200*(ii+1)',m[200*ii:200*(ii+1),0], ii
            #rr=m[200*ii:200*(ii+1),0]-m[200*(ii+1):200*(ii+1+1),0]
            #print rr
        #print m.shape, Xnew.shape
        #if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
        
            # the end block
        

        
        for d in which_data_ycols:
            
            #print 'm[:d]',m[:,d].shape, 'Xnew',Xnew.shape, 'lower[:,d] ,upper[: ,d]', lower[:,d].shape ,upper[: ,d].shape
            plots['gpplot'] = gpplot(Xnew, m[200*ii:200*(ii+1), d], lower[200*ii:200*(ii+1), d], upper[200*ii:200*(ii+1), d], ax=ax, edgecol=linecol, fillcol=fillcol)
            
            
            #print 'X[which_data_rows,free_dims]',X[which_data_rows,free_dims].shape,X[which_data_rows,free_dims]

            #print 'Y[which_data_rows, d]',Y[which_data_rows, d].shape,Y[which_data_rows, d]

            if fixed_inputs==[(1,0),(2,0)] :
                
                if not plot_raw: plots['dataplot'] =ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', color=linecol,mew=1,label=" 129 Ntg")
            if fixed_inputs==[(1,0),(2,1)] :
                if not plot_raw: plots['dataplot'] =ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', color=linecol,mew=1,label=" 129 SOD1")
            if fixed_inputs==[(1,0),(2,2)]  :
                if not plot_raw: plots['dataplot'] =ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', color=linecol,mew=1,label="C57 Ntg")
            if  fixed_inputs==[(1,0),(2,3)] :
                if not plot_raw: plots['dataplot'] =ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', color=linecol,mew=1,label="C57 SOD1") 
            else :
                if not plot_raw: plots['dataplot'] =ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', color=linecol,mew=1) 
            
        
            ax.legend(bbox_to_anchor=(1,1.4)) 
        #optionally plot some samples
        if samples: #NOTE not tested with fixed_inputs
            Ysim = model.posterior_samples(Xgrid, samples)
            for yi in Ysim.T:
                plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
                #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.


        #add error bars for uncertain (if input uncertainty is being modelled)
        if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
            plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
                        xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
                        ecolor='k', fmt=None, elinewidth=.5, alpha=.5)


        #set the limits of the plot to some sensible values
        ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))
        ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
        #print ymin,ymax
        ax.set_xlim(xmin, xmax)
        ax.set_ylim(-3,3)
        #ax.set_ylim(ymin, ymax)

        #add inducing inputs (if a sparse model is used)
        if hasattr(model,"Z"):
            #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
            if isinstance(model,SparseGPCoregionalizedRegression):
                Z = Z[Z[:,-1] == Y_metadata['output_index'],:]
            Zu = Z[:,free_dims]
            z_height = ax.get_ylim()[0]
            plots['inducing_inputs'] = ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
    else:
        raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
    return plots
#










for i in range(q):
    plot_postt(m,fixed_inputs=[(1, 0),(2,i)],linecol='Red',which_data_rows=range(0,16*q))#,1)])
    plot_postt(m,fixed_inputs=[(1, 1),(2,i)],linecol='Blue',which_data_rows=range(16*q,32*q))#,newfig=False)   #,(1,0)])#,(1,0),(1,0),(1,0),(1,2)])
    plot_postt(m,fixed_inputs=[(1, 2),(2,i)],linecol='Green',which_data_rows=range(32*q,48*q))#,newfig=False)   #,(1,0)])#,(1,0),(1,0),(1,0),(1,3)])
    plot_postt(m,fixed_inputs=[(1, 3),(2,i)],linecol='Yellow',which_data_rows=range(48*q,64*q))#,newfig=False) #,(1,0)])#,(1,0),(1,0),(1,0),(1,4)])
       
    mm=np.load("m.npy")
    mm.reshape(-1,1)
    
    G= np.hstack((G,mm))









    



(200,)






    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-860e6b39ca83> in <module>()
    259 
    260 for i in range(q):
--> 261     plot_postt(m,fixed_inputs=[(1, 0),(2,i)],linecol='Red',which_data_rows=range(0,16*q))#,1)])
    262     plot_postt(m,fixed_inputs=[(1, 1),(2,i)],linecol='Blue',which_data_rows=range(16*q,32*q))#,newfig=False)   #,(1,0)])#,(1,0),(1,0),(1,0),(1,2)])
    263     plot_postt(m,fixed_inputs=[(1, 2),(2,i)],linecol='Green',which_data_rows=range(32*q,48*q))#,newfig=False)   #,(1,0)])#,(1,0),(1,0),(1,0),(1,3)])

<ipython-input-11-860e6b39ca83> in plot_postt(model, newfig, plot_limits, which_data_rows, which_data_ycols, fixed_inputs, levels, samples, fignum, ax, resolution, plot_raw, linecol, fillcol, Y_metadata, data_symbol)
    180             upper = m + 2*np.sqrt(v)
    181         else:
--> 182             if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
    183                 meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
    184             else:

NameError: global name 'GPCoregionalizedRegression' is not defined



In [ ]:

    
#G= np.hstack((G,mm))

print G.shape



In [ ]:

    
print G.shape
G=G.reshape(-1,q+1)
print G.shape



In [ ]:

    
G1 = G[:,1:q+1]
G1.shape
F=np.dot(G1,np.dot(V.T,Lambda))
F.shape
pb.plot(F[:,0])



In [ ]:

    
print G1.shape



In [ ]:



In [ ]: