Alexis Boukouvalas, 2017
Branching GP regression with Gaussian noise on the hematopoiesis data described in the paper "BGP: Gaussian processes for identifying branching dynamics in single cell data".
This notebook shows how to build a BGP model and plot the posterior model fit and posterior branching times.
In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
We have also performed Monocle2 (version 2.1) - DDRTree on this data. The results loaded include the Monocle estimated pseudotime, branching assignment (state) and the DDRTree latent dimensions.
In [2]:
Y = pd.read_csv('singlecelldata/hematoData.csv', index_col=[0])
monocle = pd.read_csv('singlecelldata/hematoMonocle.csv', index_col=[0])
In [3]:
Y.head()
Out[3]:
In [4]:
monocle.head()
Out[4]:
In [5]:
# Plot Monocle DDRTree space
genelist = ['FLT3','KLF1','MPO']
f, ax = plt.subplots(1, len(genelist), figsize=(10, 5), sharex=True, sharey=True)
for ig, g in enumerate(genelist):
y = Y[g].values
yt = np.log(1+y/y.max())
yt = yt/yt.max()
h = ax[ig].scatter(monocle['DDRTreeDim1'], monocle['DDRTreeDim2'],
c=yt, s=50, alpha=1.0, vmin=0, vmax=1)
ax[ig].set_title(g)
In [6]:
import BranchedGP, time
def PlotGene(label, X, Y, s=3, alpha=1.0, ax=None):
fig = None
if ax is None:
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
for li in np.unique(label):
idxN = (label == li).flatten()
ax.scatter(X[idxN], Y[idxN], s=s, alpha=alpha, label=int(np.round(li)))
return fig, ax
In [8]:
def FitGene(g, ns=20): # for quick results subsample data
t = time.time()
Bsearch = list(np.linspace(0.05, 0.95, 5)) + [1.1] # set of candidate branching points
GPy = (Y[g].iloc[::ns].values - Y[g].iloc[::ns].values.mean())[:, None] # remove mean from gene expression data
GPt = monocle['StretchedPseudotime'].values[::ns]
globalBranching = monocle['State'].values[::ns].astype(int)
d = BranchedGP.FitBranchingModel.FitModel(Bsearch, GPt, GPy, globalBranching)
print(g, 'BGP inference completed in %.1f seconds.' % (time.time()-t))
# plot BGP
fig,ax=BranchedGP.VBHelperFunctions.PlotBGPFit(GPy, GPt, Bsearch, d, figsize=(10,10))
# overplot data
f, a=PlotGene(monocle['State'].values, monocle['StretchedPseudotime'].values, Y[g].values-Y[g].iloc[::ns].values.mean(),
ax=ax[0], s=10, alpha=0.5)
# Calculate Bayes factor of branching vs non-branching
bf = BranchedGP.VBHelperFunctions.CalculateBranchingEvidence(d)['logBayesFactor']
fig.suptitle('%s log Bayes factor of branching %.1f' % (g, bf))
return d, fig, ax
d, fig, ax = FitGene('MPO')
In [9]:
d_c, fig_c, ax_c = FitGene('CTSG')
In [ ]: