Demonstration of Plotly interactive plots with an interactive interface.

More information on plotly can be found

  1. 3d scatter plots
  2. 2D scatter plots and annotations
  3. Line charts
  4. Reference

In [1]:
from matplotlib import pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
# plotly
import plotly
plotly.offline.init_notebook_mode() # run at the start of every ipython notebook
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=False)
# use seaborn plotting style defaults
doInteractive = True
def interactivePlots(fig, axes):
    # helper function to decide to use plotly interactive plots or not
    if(doInteractive):
        plotly.offline.iplot_mpl(fig, show_link=False, strip_style=True) # offline ipython notebook


Try visualisation methods on single-cell RNA-seq data

Data obtained from:


In [2]:
# Iris data
from sklearn import datasets
data = datasets.load_iris()
labels = np.array(['0']*50 + ['1']*50 + ['2']*50)
d = pd.DataFrame(data.data, columns = ['1','2','3','4'], index = labels)

Data summary


In [3]:
print('data shape\n', d.shape)
d.head()


data shape
 (150, 4)
Out[3]:
1 2 3 4
0 5.1 3.5 1.4 0.2
0 4.9 3.0 1.4 0.2
0 4.7 3.2 1.3 0.2
0 4.6 3.1 1.5 0.2
0 5.0 3.6 1.4 0.2

Calculate PCA


In [4]:
def do_pca(X, n_components=10, centered=True):
    '''
        Return:
            f: ratio of explained variance for each PC
            Wt: PCA loadings for each features
            Y: projected data in PCA space
    '''
    d = X
    n_features = X.shape[1]
    if n_components > n_features:
        n_components = n_features
    if isinstance(X, pd.DataFrame):
        X = X.values
    if centered:
        X = X.astype('float')  # Since X is object
        X = X - X.mean(0)
        X = X/X.std(0)
    pca = PCA(n_components=n_components)
    pca.fit(X)
    fracs = pca.explained_variance_ratio_
    F = [float("{0:.4f}".format(i)) for i in fracs]
    Y = pca.fit_transform(X)  # n_sample, n_redim
    Wt = pca.components_  # n_redim, n_feature
    # construct three DataFrames for later use:
    # PCA loadings; % of variance explained for each PC; projected data on PCs
    Wt_df = pd.DataFrame(Wt, index=['PC' + str(i) for i in np.arange(1, Wt.shape[0]+1)], columns=d.columns.values)
    F_df = pd.DataFrame([str(f*100) + '%' for f in F], index=Wt_df.index, columns=['var. %'])
    Data_proj = pd.DataFrame(Y, index=d.index, columns=['PC'+str(i) for i in np.arange(1, Y.shape[1]+1)])
    return F_df, Wt_df, Data_proj, fracs

F, Wt, X_proj, fracs = do_pca(d)
X_proj = X_proj/abs(X_proj).max().max()

In [5]:
fig=plt.figure(figsize=(5, 5))
plt.plot(np.cumsum(fracs))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')
interactivePlots(fig, None)


Do PCA plot


In [6]:
PC1 = 0
PC2 = 1
# Create a trace
traceCelltype = list()
labelList = np.unique(labels)
assert set(np.unique(labels)) == set(labelList), 'Missing labels'
for lbl in labelList:
    traceCelltype.append(go.Scatter(
        x = X_proj[labels==lbl].iloc[:,PC1],
        y = X_proj[labels==lbl].iloc[:,PC2],
        mode='markers',
        name = lbl,
        text = lbl
        ))
iplot(traceCelltype, filename='GuoPCA')



In [7]:
# 3D plot
PC1 = 0
PC2 = 1
PC3 = 2
traceCelltype3d = list()
layout = go.Layout(showlegend=True, title='Scatter plot of 3-D PCA space', annotations=list())
for lbl in labelList:
    traceCelltype3d.append(go.Scatter3d(
        x = X_proj[labels==lbl].iloc[:,PC1],
        y = X_proj[labels==lbl].iloc[:,PC2],
        z = X_proj[labels==lbl].iloc[:,PC3],
        mode='markers',
        name = lbl,
        text = lbl
        ))
fig = go.Figure(data=traceCelltype3d, layout=layout)
iplot(fig, filename='GuoPCA3D')


Plot factor loadings for each gene in PCA space.


In [8]:
# Calculate vector magnitudes for each gene
GenePCMagnitudes = Wt.loc['PC1',:]**2+Wt.loc['PC2',:]
GenePCMagnitudes.sort_values(inplace=True, ascending=False)    
# plot principal components
def GetPCVectorsForEachGene(fLine=False, topN=None):
    tracePCs = list()
    layout = go.Layout(showlegend=True, title='Top principal components', annotations=list())
    for i, g in enumerate(GenePCMagnitudes.iteritems()):  # plot feature by feature
        if(topN is not None and i >= topN):
            continue
        if(fLine):
            xp = [0, Wt.loc['PC1', g[0]]* 2.5] # x1.5 to make it more visible
            yp = [0, Wt.loc['PC2', g[0]]* 2.5]
            mode='lines'
            layout.annotations.append(
                    dict(
                        x=xp[1],
                        y=yp[1],
                        xref='x',
                        yref='y',
                        text=g[0]+'_PC',
                        showarrow=True,
                        arrowhead=7,
                        ax=0,
                        ay=-15
                    )
            )
        else:
            xp = [Wt.loc['PC1', g[0]]* 2.5] # x1.5 to make it more visible
            yp = [Wt.loc['PC2', g[0]]* 2.5]
            mode='markers+text'
        tracePCs.append(go.Scatter(
                x = xp,
                y = yp,
                mode=mode,
                name = g[0]+'_PC',
                text = g[0]+'_PC',
                marker={'size': 10},
                line = {'width': 10},
                textposition='top'
                ))
    return tracePCs, layout
t, l = GetPCVectorsForEachGene(fLine=True, topN=4)
fig = go.Figure(data=t, layout=l)
iplot(fig, filename='GuoPCs')


Draw interactive plot where user can select different genes and other options for PCA plot


In [9]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
def plotinteractPCA(gene='Atp12a', showPCs=1, linePCs=False, PC1=0, PC2=1):
    layout = go.Layout(showlegend=True, title='Interactive PCA plot',
                       annotations=list(), legend={'orientation': 'h'})
    traceGene = list()
    if(gene == 'celltype'):
        for lbl in labelList:
            traceGene.append(go.Scatter(
                x = X_proj[labels==lbl].iloc[:,PC1],
                y = X_proj[labels==lbl].iloc[:,PC2],
                mode='markers',
                name = lbl,
                text = lbl
                ))
    else:
        traceGene.append(go.Scatter(
            x = X_proj.iloc[:,PC1],
            y = X_proj.iloc[:,PC2],
            mode='markers',
            marker = {'color': d[gene].values, 'size': 10, 'colorscale': 'Viridis',
                      'colorbar':{'title': 'expression'}},
            text = labels,
            name = 'Gene Expression'
            ))
    if(showPCs > 0):
        t, l = GetPCVectorsForEachGene(topN=showPCs, fLine=linePCs)
        layout.annotations += l.annotations
        traceGene += t
    fig = go.Figure(data=traceGene, layout=layout)
    iplot(fig, filename='GuoPCAInteractive')
_=interact(plotinteractPCA, gene=['celltype']+list(d.columns.values),
         showPCs=(0, d.shape[1]), linePCs=True, PC1=(0, 3), PC2=(1, 3))



In [ ]: