In [1]:
%matplotlib inline
from __future__ import division, print_function
import matplotlib as mpl
import seaborn as sns; sns.set_style('white')
import matplotlib.pyplot as plt
import numpy as np
np.seterr(divide='ignore')

import lavaburst

In [2]:
blues = sns.cubehelix_palette(0.4, gamma=0.5, rot=-0.3, dark=0.1, light=0.9, as_cmap=True)

def nice_ticks(ax, start, end, binsize, axis=(0,1), tick_params=None):
    from matplotlib.ticker import MaxNLocator, FuncFormatter
    tick_locator = MaxNLocator(5)
    tick_formatter = FuncFormatter(lambda x, pos: '{:,}'.format(int(start + x*binsize)))
    if axis == 0 or axis == (0,1):
        ax.xaxis.set_major_locator(tick_locator)
        ax.xaxis.set_major_formatter(tick_formatter)
    else:
        ax.set_xticks([])
    if axis == 1 or axis == (0,1):
        ax.yaxis.set_major_locator(tick_locator)
        ax.yaxis.set_major_formatter(tick_formatter)
    else:
        ax.set_yticks([])
    
    if tick_params is not None:
        ax.tick_params(**tick_params)

Load your heatmap


In [3]:
A = np.loadtxt('IMR90_inSitu-all-MboI-hg19-chr11-0-20000000-balanced.tsv')
start = 0
end = 20000000
binsize = 10000
good_bins = A.astype(bool).sum(axis=0) > 100

At = lavaburst.utils.tilt_heatmap(A, n_diags=500)

Create a score matrix and a segmentation model


In [4]:
S = lavaburst.scoring.modularity_score(A, gamma=6.0, binmask=good_bins)
model = lavaburst.SegModel(S)

Find the optimal domain segmentation


In [5]:
segments = model.optimal_segmentation()

f = plt.figure(figsize=(16, 2))
ax = f.add_subplot(111)
ax.matshow(np.log(At), cmap=blues)
for a,b in segments:
    ax.plot([a-0.5,b-0.5], [-1,-1], 'k.')
ax.set_xlim([0,len(A)])
ax.set_ylim([100,-100])
ax.set_aspect(0.5)
nice_ticks(ax, start, end, binsize, axis=0)


Domain boundary probabilities for all bin edges


In [6]:
beta = 10000
prob = model.boundary_marginals(beta)

f = plt.figure(figsize=(16, 2))
ax = f.add_subplot(111)
ax.matshow(np.log(At), cmap=blues)
ax.plot(np.arange(len(prob))-0.5, -100*prob, 'k', lw=1)
ax.set_xlim([0,len(A)])
ax.set_ylim([100,-100])
ax.set_aspect(0.5)
nice_ticks(ax, start, end, binsize, axis=0)


Call peak regions


In [7]:
beta = 50000
Pb = model.boundary_marginals(beta)
x,y,p = lavaburst.callers.call_boundary_peaks(Pb)

f = plt.figure(figsize=(15,15))
ax = f.add_subplot(111)
ax.matshow(np.log(At), cmap=blues)
ax.plot(np.arange(len(Pb))-0.5, -100*Pb, 'k', lw=1)
for xx, yy, pp in zip(x,y,p):
    if pp > 0.1:
        ax.plot([xx-0.5, xx-0.5, yy-0.5, yy-0.5], [-1, -100*pp, -100*pp, -1], 'b', lw=1)
        
ax.set_title('Region calls from marginal domain boundary probabilities')
ax.set_xlim([1200, 1800])
ax.set_ylim([300, -100])
ax.set_aspect(0.5)
nice_ticks(ax, start, end, binsize, axis=0)


Domain probabilities for every genomic segment


In [8]:
beta = 50000
Ps = model.domain_marginals(beta)

f = plt.figure(figsize=(12,12))
ax = f.add_subplot(111)
ax.matshow(lavaburst.utils.reveal_tril(np.log(A)), cmap=blues)
cp = ax.contourf(
    lavaburst.utils.reveal_triu(Ps), 
    levels=[1e-100, 1e-50, 1e-10, 0.1, 0.2, 0.3, 0.4], 
    cmap=plt.cm.jet)
lo, hi = 1000, 1400
ax.set_xlim([lo,hi])
ax.set_ylim([hi,lo])
nice_ticks(ax, start, end, binsize)
ax.set_title('Upper matrix: Marginal domain probability for segment [i,j)')
f.colorbar(cp, shrink=0.8)


Out[8]:
<matplotlib.colorbar.Colorbar at 0x7f827c80ce10>

Call peaks (domains)


In [9]:
beta = 50000
Ps = model.domain_marginals(beta)
thresh = 0.4

f = plt.figure(figsize=(5,3))
ax = f.add_subplot(111)
x = np.triu(Ps)
ax.hist(x[x>0.01].flat, bins=200)
ax.axvline(thresh, ls='--', color='r')
ax.set_title('high scoring segments')
ax.set_xlabel('marginal probability')
ax.set_ylabel('count')

x,y,p = lavaburst.callers.call_domain_peaks(Ps, thresh)
f = plt.figure(figsize=(12,12))
ax = f.add_subplot(111)
ax.matshow(np.log(A), cmap=blues)
ax.plot(y-0.5, x-0.5, 'k.')
lo, hi = 1000, 1400
for xx, yy, pp in zip(x, y, p):
    if (lo < xx < hi) and (lo < yy < hi):
        ax.text(yy, xx, pp)

ax.set_title('Domains from peak calls on marginal domain probabilities')        
ax.set_xlim([lo,hi])
ax.set_ylim([hi,lo])
nice_ticks(ax, start, end, binsize)


/home/mmatus/miniconda/envs/py36/lib/python3.6/site-packages/ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in greater
  
/home/mmatus/local/devel/lavaburst/lavaburst/core/callers.py:70: RuntimeWarning: invalid value encountered in less
  Ps_thresholded[Ps_thresholded < thresh] = 0.0

Co-segmentation frequency heatmap


In [10]:
beta = 50000
Pss = model.cooccurence_marginals(beta)

f = plt.figure(figsize=(16, 2))
ax = f.add_subplot(111)
ax.matshow(np.log(At), cmap=blues)
ax.plot(np.arange(len(prob))-0.5, -100*prob, 'k', lw=1)
ax.set_xlim([0,len(A)])
ax.set_ylim([100,-100])
ax.set_aspect(0.5)
ax.set_title('data\n')
nice_ticks(ax, start, end, binsize, axis=0)

f = plt.figure(figsize=(16, 2))
ax = f.add_subplot(111)
ax.matshow(lavaburst.utils.tilt_heatmap(np.log(Pss), n_diags=400), cmap=blues)
ax.plot(np.arange(len(prob))-0.5, -100*prob, 'k', lw=1)
ax.set_xlim([0,len(A)])
ax.set_ylim([100,-100])
ax.set_aspect(0.5)
ax.set_title('model (co-segmentation probabilities)\n')
nice_ticks(ax, start, end, binsize, axis=0)


/home/mmatus/miniconda/envs/py36/lib/python3.6/site-packages/matplotlib/colors.py:897: UserWarning: Warning: converting a masked element to nan.
  dtype = np.min_scalar_type(value)
/home/mmatus/miniconda/envs/py36/lib/python3.6/site-packages/numpy/ma/core.py:748: UserWarning: Warning: converting a masked element to nan.
  data = np.array(a, copy=False, subok=subok)

Exact statistical sampling of domain segmentations


In [11]:
beta = 50000
segs = model.sample(beta, n=1000)

f = plt.figure(figsize=(16, 2))
ax = f.add_subplot(111)
ax.plot(np.arange(len(prob))-0.5, 100*prob, 'k', lw=1)
ax.plot(np.arange(len(prob))-0.5, 100*segs.mean(axis=0) - 100, 'r', lw=1)
ax.set_xlim([0,len(A)])
ax.set_ylim([-100,100])
ax.set_aspect(0.5)
nice_ticks(ax, start, end, binsize, axis=0)
ax.legend(['exact', 'sampled (N=1000)'], bbox_to_anchor=(1.15,1))


Out[11]:
<matplotlib.legend.Legend at 0x7f8279951710>

In [ ]: