Demonstration of Plotly interactive plots with an interactive interface.

More information on plotly can be found



In [1]:

    
from matplotlib import pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
# plotly
import plotly
plotly.offline.init_notebook_mode() # run at the start of every ipython notebook
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=False)
# use seaborn plotting style defaults
doInteractive = True
def interactivePlots(fig, axes):
    # helper function to decide to use plotly interactive plots or not
    if(doInteractive):
        plotly.offline.iplot_mpl(fig, show_link=False, strip_style=True) # offline ipython notebook

Try visualisation methods on single-cell RNA-seq data

Data obtained from:



In [2]:

    
# Iris data
from sklearn import datasets
data = datasets.load_iris()
labels = np.array(['0']*50 + ['1']*50 + ['2']*50)
d = pd.DataFrame(data.data, columns = ['1','2','3','4'], index = labels)

Data summary



In [3]:

    
print('data shape\n', d.shape)
d.head()

Calculate PCA



In [4]:

    
def do_pca(X, n_components=10, centered=True):
    '''
        Return:
            f: ratio of explained variance for each PC
            Wt: PCA loadings for each features
            Y: projected data in PCA space
    '''
    d = X
    n_features = X.shape[1]
    if n_components > n_features:
        n_components = n_features
    if isinstance(X, pd.DataFrame):
        X = X.values
    if centered:
        X = X.astype('float')  # Since X is object
        X = X - X.mean(0)
        X = X/X.std(0)
    pca = PCA(n_components=n_components)
    pca.fit(X)
    fracs = pca.explained_variance_ratio_
    F = [float("{0:.4f}".format(i)) for i in fracs]
    Y = pca.fit_transform(X)  # n_sample, n_redim
    Wt = pca.components_  # n_redim, n_feature
    # construct three DataFrames for later use:
    # PCA loadings; % of variance explained for each PC; projected data on PCs
    Wt_df = pd.DataFrame(Wt, index=['PC' + str(i) for i in np.arange(1, Wt.shape[0]+1)], columns=d.columns.values)
    F_df = pd.DataFrame([str(f*100) + '%' for f in F], index=Wt_df.index, columns=['var. %'])
    Data_proj = pd.DataFrame(Y, index=d.index, columns=['PC'+str(i) for i in np.arange(1, Y.shape[1]+1)])
    return F_df, Wt_df, Data_proj, fracs

F, Wt, X_proj, fracs = do_pca(d)
X_proj = X_proj/abs(X_proj).max().max()



In [5]:

    
fig=plt.figure(figsize=(5, 5))
plt.plot(np.cumsum(fracs))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')
interactivePlots(fig, None)

Do PCA plot



In [6]:

    
PC1 = 0
PC2 = 1
# Create a trace
traceCelltype = list()
labelList = np.unique(labels)
assert set(np.unique(labels)) == set(labelList), 'Missing labels'
for lbl in labelList:
    traceCelltype.append(go.Scatter(
        x = X_proj[labels==lbl].iloc[:,PC1],
        y = X_proj[labels==lbl].iloc[:,PC2],
        mode='markers',
        name = lbl,
        text = lbl
        ))
iplot(traceCelltype, filename='GuoPCA')



In [7]:

    
# 3D plot
PC1 = 0
PC2 = 1
PC3 = 2
traceCelltype3d = list()
layout = go.Layout(showlegend=True, title='Scatter plot of 3-D PCA space', annotations=list())
for lbl in labelList:
    traceCelltype3d.append(go.Scatter3d(
        x = X_proj[labels==lbl].iloc[:,PC1],
        y = X_proj[labels==lbl].iloc[:,PC2],
        z = X_proj[labels==lbl].iloc[:,PC3],
        mode='markers',
        name = lbl,
        text = lbl
        ))
fig = go.Figure(data=traceCelltype3d, layout=layout)
iplot(fig, filename='GuoPCA3D')

Plot factor loadings for each gene in PCA space.



In [8]:

    
# Calculate vector magnitudes for each gene
GenePCMagnitudes = Wt.loc['PC1',:]**2+Wt.loc['PC2',:]
GenePCMagnitudes.sort_values(inplace=True, ascending=False)    
# plot principal components
def GetPCVectorsForEachGene(fLine=False, topN=None):
    tracePCs = list()
    layout = go.Layout(showlegend=True, title='Top principal components', annotations=list())
    for i, g in enumerate(GenePCMagnitudes.iteritems()):  # plot feature by feature
        if(topN is not None and i >= topN):
            continue
        if(fLine):
            xp = [0, Wt.loc['PC1', g[0]]* 2.5] # x1.5 to make it more visible
            yp = [0, Wt.loc['PC2', g[0]]* 2.5]
            mode='lines'
            layout.annotations.append(
                    dict(
                        x=xp[1],
                        y=yp[1],
                        xref='x',
                        yref='y',
                        text=g[0]+'_PC',
                        showarrow=True,
                        arrowhead=7,
                        ax=0,
                        ay=-15
                    )
            )
        else:
            xp = [Wt.loc['PC1', g[0]]* 2.5] # x1.5 to make it more visible
            yp = [Wt.loc['PC2', g[0]]* 2.5]
            mode='markers+text'
        tracePCs.append(go.Scatter(
                x = xp,
                y = yp,
                mode=mode,
                name = g[0]+'_PC',
                text = g[0]+'_PC',
                marker={'size': 10},
                line = {'width': 10},
                textposition='top'
                ))
    return tracePCs, layout
t, l = GetPCVectorsForEachGene(fLine=True, topN=4)
fig = go.Figure(data=t, layout=l)
iplot(fig, filename='GuoPCs')

Draw interactive plot where user can select different genes and other options for PCA plot



In [9]:

    
from __future__ import print_function
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
def plotinteractPCA(gene='Atp12a', showPCs=1, linePCs=False, PC1=0, PC2=1):
    layout = go.Layout(showlegend=True, title='Interactive PCA plot',
                       annotations=list(), legend={'orientation': 'h'})
    traceGene = list()
    if(gene == 'celltype'):
        for lbl in labelList:
            traceGene.append(go.Scatter(
                x = X_proj[labels==lbl].iloc[:,PC1],
                y = X_proj[labels==lbl].iloc[:,PC2],
                mode='markers',
                name = lbl,
                text = lbl
                ))
    else:
        traceGene.append(go.Scatter(
            x = X_proj.iloc[:,PC1],
            y = X_proj.iloc[:,PC2],
            mode='markers',
            marker = {'color': d[gene].values, 'size': 10, 'colorscale': 'Viridis',
                      'colorbar':{'title': 'expression'}},
            text = labels,
            name = 'Gene Expression'
            ))
    if(showPCs > 0):
        t, l = GetPCVectorsForEachGene(topN=showPCs, fLine=linePCs)
        layout.annotations += l.annotations
        traceGene += t
    fig = go.Figure(data=traceGene, layout=layout)
    iplot(fig, filename='GuoPCAInteractive')
_=interact(plotinteractPCA, gene=['celltype']+list(d.columns.values),
         showPCs=(0, d.shape[1]), linePCs=True, PC1=(0, 3), PC2=(1, 3))



In [ ]:

1	2	3	4
5.1	3.5	1.4	0.2
4.9	3.0	1.4	0.2
4.7	3.2	1.3	0.2
4.6	3.1	1.5	0.2
5.0	3.6	1.4	0.2

1	2	3	4
5.1	3.5	1.4	0.2
4.9	3.0	1.4	0.2
4.7	3.2	1.3	0.2
4.6	3.1	1.5	0.2
5.0	3.6	1.4	0.2

1	2	3	4
5.1	3.5	1.4	0.2
4.9	3.0	1.4	0.2
4.7	3.2	1.3	0.2
4.6	3.1	1.5	0.2
5.0	3.6	1.4	0.2

1	2	3	4
5.1	3.5	1.4	0.2
4.9	3.0	1.4	0.2
4.7	3.2	1.3	0.2
4.6	3.1	1.5	0.2
5.0	3.6	1.4	0.2