In [ ]:
import numpy as np
import pylab as pb
import GPy
import scipy.io
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
Y=np.load("/users/suraalrashid/expression.npy")
t = np.asarray([30,30,30,30,60,60,60,60,90,90,90,90,120,120,120,120,30,30,30,30,60,60,60,60,90,90,90,90,120,120,120,120,30,30,30,30,60,60,60,60,90,90,90,90,120,120,120,120,30,30,30,30,60,60,60,60,90,90,90,90,120,120,120,120])
t = t[:, None]
Y=Y[0:45038,:]
Y=Y[0:100,:]
def con_mat_True_False():
X=np.load("/users/suraalrashid/connectivity matrix.npy")
transcription_factors = X[0,1:70]
annotations = X[1:45039,0]
#X = X[transcription_factors]
X=X[1:45039,1:70]
return (annotations, X, transcription_factors)
annotations,X,transcription_Factors= con_mat_True_False()
# set S to find relationships where p-value is less than 1e-3
#S = data['X'].T<1e-3
S=np.repeat(0,45038*69).reshape(45038,69)
for i in range(45038):
for j in range(69):
S[i,j]=1 if X[i,j] =='1' else 0
S=S[0:100,0:7]
TF=transcription_Factors[0:7]
In [3]:
# step 1, find the SVD of S.
n, q = S.shape
T = Y.shape[1]
R, Lambda1, V = np.linalg.svd(S)
# Extract first q columns for Q
Q = R[:, :q]
# remaining columns for U
U = R[:, q:]
print n,q,T
Lambda=np.zeros(q*q).reshape(q,q)
for i in range(q):
Lambda[i,i]=Lambda1[i]
In [4]:
# Find sigma2 by looking at variance of y_u
Y_u = np.dot(U.T, Y)
sigma2 = 1./(T*(n-q))*(Y_u*Y_u).sum()
print "sigma2 found as", sigma2
To getting X and Y, X make from 3 matrix the first matrix is t (time series), the second matrix for coregionalization for 2 strains and 2 mutations, and the third matrix for coregionalization for transcription factors (q), X is (t*q,3 ),
and Y is made from Q where it's shape is (q*t,)
In [5]:
# Prepare the data for processing in GPy
Y_q = np.dot(Q.T, Y) # project data onto the principal subspace of X
# Generate the input associated with each Y, the TF and the time point.
s_m=np.asarray([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3])
x0, x1= np.asarray( np.meshgrid( t.flatten(),np.arange(q) ) )
x2,_= np.asarray( np.meshgrid( s_m.flatten(),np.arange(q) ) )
#print x0.shape,x1.shape, x2.shape
#print x0,x1,x2
Xt=np.hstack([t.flatten()[:, None],s_m.flatten()[:,None]])
#np.save("/home/sura/GPy/TF/Xt.npy",Xt)
X = np.hstack([x0.flatten()[:, None], x2.flatten()[:, None],x1.flatten()[:,None]])
#np.save("/home/sura/GPy/TF/Xt.npy",X)
np.save("Xt.npy",X)
#print X.shape, X
y = Y_q.flatten()[:, None]
#print Y_q.flatten()[:, None].shape
In [6]:
np.save("V.npy", V)
np.save("Lambda.npy", Lambda)
np.save("Q.npy", Q)
In [7]:
kern = GPy.kern.RBF(1, active_dims=[0])*GPy.kern.Coregionalize(1,4, rank=4,active_dims=[1])*GPy.kern.Coregionalize(1,q, rank=q,active_dims=[2])
#kern = GPy.kern.RBF(1, active_dims=[0])*GPy.kern.Coregionalize(1,q, rank=q,active_dims=[1])*GPy.kern.Coregionalize(1,4, rank=4,active_dims=[2])
m = GPy.models.GPRegression(X, y, kern)
#m.mul.rbf.lengthscale = 50
m.mul.mul.rbf.lengthscale=50
m.Gaussian_noise.variance = sigma2
#m.optimize(messages=True)
In [8]:
#import GPy.inference.latent_function_inference.inference_method
print m
K1= m.kern.mul.coregion.K(Xt)#self.X)
print K1.shape
In [9]:
si=np.dot(m.mul.coregion.W,m.mul.coregion.W.T)+np.diag(m.mul.coregion.kappa)
#we have si , V, Lambda
#np.save("/home/sura/GPy/TF/Si.npy", si)
np.save("Si.npy", si)
print si.shape
In [ ]:
for i in range(q):
m.plot(fixed_inputs=[(1, 0),(2,i),],linecol='Red',fillcol='lightBlue',which_data_rows=range(0,16*q)) #,(1,3)])#,(1,0),(1,0),(1,0),(1,1)])
m.plot(fixed_inputs=[(1, 1),(2,i)],linecol='Blue',which_data_rows=range(16*q,32*q),newfig=False) #,(1,0)])#,(1,0),(1,0),(1,0),(1,2)])
m.plot(fixed_inputs=[(1, 2),(2,i)],linecol='Green',which_data_rows=range(32*q,48*q),newfig=False) #,(1,0)])#,(1,0),(1,0),(1,0),(1,3)])
m.plot(fixed_inputs=[(1, 3),(2,i)],linecol='Yellow',which_data_rows=range(48*q,64*q),newfig=False) #,(1,0)])#,(1,0),(1,0),(1,0),(1,4)])
pb.xlabel("Time series")
pb.ylabel("connectivity info. between \n Transcripton Factor {} \n and group from genes".format (transcription_Factors[i]))
In [11]:
X.shape
G=np.zeros(200).reshape(200)
print G.shape
import sys
import warnings
from GPy import kern
from GPy.util.linalg import dtrtrs
#from GPy.model import Model
#from parameterization import ObsAr
from GPy import likelihoods
from GPy.util import diag
from GPy.util.linalg import pdinv, pdinv_post, dpotrs, tdot
from GPy.util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol
from GPy.likelihoods.gaussian import Gaussian
#from GPy.inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
#from parameterization.variational import VariationalPosterior
#from GPy.plotting.matplot_dep.base_plots import x_frame1D
#import Tango
#from base_plots import gpplot, x_frame1D, x_frame2D
from GPy.plotting.matplot_dep.base_plots import gpplot, x_frame1D, x_frame2D
#from ...util.misc import param_to_array
from GPy.util.misc import param_to_array
#from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
#from GPy.models.gp_coregionalized_regression import GPCoregionalizedRegression
#from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
#from GPy.models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
def _raw_predict_post(self, _Xnew, full_cov=False, kern=None):
YYT_factor = self.Y
si=np.load("Si.npy")
Vv=np.load("V.npy")
Lambda=np.load("Lambda.npy")
Q=np.load("Q.npy")
t=np.load("Xt.npy")
t=t[:64,:3]
t[:,0]=self.X[0:64,0]
t[:,1]=_Xnew[0,1]
t[:,2]=_Xnew[0,2]
#K1= self.kern.mul.rbf.K(t)#self.X)
K1=self.mul.mul.coregion.K(t)
#print 'K.shape',K.shape
#np.save("/Users/suraAlrashid/GPy/TF/K.npy",K)
te=np.dot(np.dot(np.dot(Lambda,Vv.T), si),np.dot(Vv,Lambda))
K=np.kron(K1,te)
Ky = K.copy()
diag.add(Ky, self.likelihood.gaussian_variance(self.Y_metadata))
LW = pdinv_post(Ky)
YYT_factor=YYT_factor.reshape(YYT_factor.shape[0],1)
#print 'YYT_factor',YYT_factor.shape
alpha, _ = dpotrs(LW, YYT_factor, lower=1)
"""K = kern.K(X)"""
self.woodbury_chol=LW
self.woodbury_vector=alpha
self.woodbury_inv, _ = dpotri(self.woodbury_chol, lower=1)
symmetrify(self.woodbury_inv)
te1=si
te2=np.dot( si,np.dot(Vv,Lambda))
te3=np.dot(np.dot(Lambda,Vv.T), si)
te=np.dot(np.dot(np.dot(Lambda,Vv.T), si),np.dot(Vv,Lambda))
"""mu=self.posterior.mean(self.likelihood)
var=self.posterior.covariance(self.likelihood)"""
#Kx = self.kern.mul.rbf.K(t,_Xnew)
Kx=self.mul.mul.coregion.K(t,_Xnew)
#print 'Kx',Kx
mu = np.dot(np.kron(Kx.T,te2), self.woodbury_vector)
#mu = np.dot(np.kron(K1,te2), self.woodbury_vector)
#print 'mu.',mu
#Kxx = self.kern.mul.rbf.K(_Xnew)
Kxx=self.mul.mul.coregion.K(_Xnew)
var = np.atleast_3d(np.kron(Kxx,te1)) - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, np.kron(Kx,te2)), np.kron(Kx,te3), [1,0]).T
#var = np.atleast_3d(np.kron(K1,te1)) - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, np.kron(K1,te2)), np.kron(K1,te3), [1,0]).T
#print 'var',var.shape
#K = self.kern.mul.rbf.K(t)
#pb.figure()
#full_K = np.vstack([np.hstack([K, Kx]), np.hstack([Kx.T, Kxx])])
#pb.imshow(full_K)
#pb.colorbar
return mu, var
def predict_post(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
#predict the latent function values
mu, var = _raw_predict_post(self,Xnew, full_cov=full_cov, kern=kern)
#print mu.shape, var.shape
# now push through likelihood
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
return mean, var
def predict_quantiles_post(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
m, v = _raw_predict_post(self,X, full_cov=False)
#print 'predict_quantiles_post, sura '
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
def plot_postt(model, newfig=True,plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol='darkBlue',fillcol='lightBlue', Y_metadata=None, data_symbol='kx'):
#deal with optional arguments
if which_data_rows == 'all':
which_data_rows = slice(None)
if which_data_ycols == 'all':
which_data_ycols = np.arange(model.output_dim)
#if len(which_data_ycols)==0:
#raise ValueError('No data selected for plotting')
if ax is None:
if newfig:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
else :
fig = pb.gcf()
ax = fig.add_subplot(111)
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean
X_variance = param_to_array(model.X.variance)
else:
X = model.X
X, Y = param_to_array(X, model.Y)
if hasattr(model, 'Z'): Z = param_to_array(model.Z)
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
plots = {}
#one dimensional plotting
#print len(free_dims)
if len(free_dims) == 1:
#define the frame on which to plot
t=np.load("Xt.npy")
t=t[:64,:3]
Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits, resolution=resolution or 200)
#print Xnew.shape,'XNew'
Xgrid = np.empty((Xnew.shape[0],model.input_dim))
Xgrid[:,free_dims] = Xnew
for i,vz in fixed_inputs:
Xgrid[:,i] = vz
#print 'Xgrid',Xgrid
#print 'xgrid',Xgrid.shape
#make a prediction on the frame and plot it
ii= [v for i,v in fixed_inputs]
ii =ii[1]
if plot_raw:
m, v = _raw_predict_post(model,Xgrid)
np.save("m.npy",m[200*ii:200*(ii+1), 0])
lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v)
else:
if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
else:
meta = None
m, v = predict_post(model,Xgrid, full_cov=False, Y_metadata=meta)
#print 'meta',meta
lower, upper = predict_quantiles_post(model,Xgrid, Y_metadata=meta)
np.save("m.npy",m[200*ii:200*(ii+1), 0])
#print 'm[200*ii:200*(ii+1)',m[200*ii:200*(ii+1),0], ii
#rr=m[200*ii:200*(ii+1),0]-m[200*(ii+1):200*(ii+1+1),0]
#print rr
#print m.shape, Xnew.shape
#if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
# the end block
for d in which_data_ycols:
#print 'm[:d]',m[:,d].shape, 'Xnew',Xnew.shape, 'lower[:,d] ,upper[: ,d]', lower[:,d].shape ,upper[: ,d].shape
plots['gpplot'] = gpplot(Xnew, m[200*ii:200*(ii+1), d], lower[200*ii:200*(ii+1), d], upper[200*ii:200*(ii+1), d], ax=ax, edgecol=linecol, fillcol=fillcol)
#print 'X[which_data_rows,free_dims]',X[which_data_rows,free_dims].shape,X[which_data_rows,free_dims]
#print 'Y[which_data_rows, d]',Y[which_data_rows, d].shape,Y[which_data_rows, d]
if fixed_inputs==[(1,0),(2,0)] :
if not plot_raw: plots['dataplot'] =ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', color=linecol,mew=1,label=" 129 Ntg")
if fixed_inputs==[(1,0),(2,1)] :
if not plot_raw: plots['dataplot'] =ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', color=linecol,mew=1,label=" 129 SOD1")
if fixed_inputs==[(1,0),(2,2)] :
if not plot_raw: plots['dataplot'] =ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', color=linecol,mew=1,label="C57 Ntg")
if fixed_inputs==[(1,0),(2,3)] :
if not plot_raw: plots['dataplot'] =ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', color=linecol,mew=1,label="C57 SOD1")
else :
if not plot_raw: plots['dataplot'] =ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', color=linecol,mew=1)
ax.legend(bbox_to_anchor=(1,1.4))
#optionally plot some samples
if samples: #NOTE not tested with fixed_inputs
Ysim = model.posterior_samples(Xgrid, samples)
for yi in Ysim.T:
plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
#add error bars for uncertain (if input uncertainty is being modelled)
if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
#set the limits of the plot to some sensible values
ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
#print ymin,ymax
ax.set_xlim(xmin, xmax)
ax.set_ylim(-3,3)
#ax.set_ylim(ymin, ymax)
#add inducing inputs (if a sparse model is used)
if hasattr(model,"Z"):
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
if isinstance(model,SparseGPCoregionalizedRegression):
Z = Z[Z[:,-1] == Y_metadata['output_index'],:]
Zu = Z[:,free_dims]
z_height = ax.get_ylim()[0]
plots['inducing_inputs'] = ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
return plots
#
for i in range(q):
plot_postt(m,fixed_inputs=[(1, 0),(2,i)],linecol='Red',which_data_rows=range(0,16*q))#,1)])
plot_postt(m,fixed_inputs=[(1, 1),(2,i)],linecol='Blue',which_data_rows=range(16*q,32*q))#,newfig=False) #,(1,0)])#,(1,0),(1,0),(1,0),(1,2)])
plot_postt(m,fixed_inputs=[(1, 2),(2,i)],linecol='Green',which_data_rows=range(32*q,48*q))#,newfig=False) #,(1,0)])#,(1,0),(1,0),(1,0),(1,3)])
plot_postt(m,fixed_inputs=[(1, 3),(2,i)],linecol='Yellow',which_data_rows=range(48*q,64*q))#,newfig=False) #,(1,0)])#,(1,0),(1,0),(1,0),(1,4)])
mm=np.load("m.npy")
mm.reshape(-1,1)
G= np.hstack((G,mm))
In [ ]:
#G= np.hstack((G,mm))
print G.shape
In [ ]:
print G.shape
G=G.reshape(-1,q+1)
print G.shape
In [ ]:
G1 = G[:,1:q+1]
G1.shape
F=np.dot(G1,np.dot(V.T,Lambda))
F.shape
pb.plot(F[:,0])
In [ ]:
print G1.shape
In [ ]:
In [ ]: