Mass2Motif Clustering


In [1]:
%matplotlib inline

# initialise plotly in offline mode
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)



In [2]:
import urllib, json

import numpy as np
from sklearn.cluster.bicluster import SpectralBiclustering

from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go
import plotly

Define some methods


In [3]:
def load_data(mfid, m2m):
    
    url = 'http://ms2lda.org/basicviz/get_doc_table/%d/mass2motif_%d/' % (mfid, m2m)
    response = urllib.urlopen(url)
    data = json.loads(response.read())
    
    view_url = 'http://ms2lda.org/basicviz/view_multi_m2m/%d/mass2motif_%d/' % (mfid, m2m)
    print 'Mass2Motif %d experiment %d retrieved from %s' % (m2m, mfid, view_url)

    samples, docs, intensities, masses, rts = data
    data = {}
    data['samples'] = np.array(samples)
    data['docs'] = np.array(docs)
    data['intensities'] = np.array(intensities)
    data['masses'] = np.array(masses)
    data['rts'] = np.array(rts)
    
    return data

In [4]:
def get_figure(title, data):

    stripped = []
    for samp in data['samples']:
        short_str = samp.replace('Urine_', '')
        short_str = short_str.replace('_fullscan_split', '')
        stripped.append(short_str)
    row_labels = stripped
    
    col_labels = []
    for i in range(len(data['masses'])):
        mz = data['masses'][i]
        rt = data['rts'][i]
        label = '%.4f, %.4f' % (mz, rt)
        col_labels.append(label)
        
    plot_data = [
        go.Heatmap(
            z=data['intensities'].transpose(),
            x=col_labels,
            y=row_labels,
            colorscale='Jet',
        )
    ]

    layout = go.Layout(
        title=title,
        xaxis = dict(ticks=''),
        yaxis = dict(ticks=''),
        width=1000,
        height=500,
        margin=go.Margin(
            l=120,
            r=50,
            b=120,
            t=50,
            pad=4
        ),)

    fig = go.Figure(data=plot_data, layout=layout)
    return fig

In [5]:
def bicluster(n_clusters, data):
    
    # do biclustering
    model = SpectralBiclustering(n_clusters=n_clusters, random_state=0)
    model.fit(data['intensities'])
    
    # rearrange the rows and columns
    row_idx = np.argsort(model.row_labels_)
    col_idx = np.argsort(model.column_labels_)
    fit_data = data['intensities'][row_idx]
    fit_data = fit_data[:, col_idx]

    # make new data
    new_data = {}
    new_data['samples'] = data['samples'][col_idx]
    new_data['docs'] = data['docs'][row_idx]
    new_data['intensities'] = fit_data
    new_data['masses'] = data['masses'][row_idx]
    new_data['rts'] = data['rts'][row_idx]
    return new_data

In [66]:
mfid = 4
m2m = 122
n_clusters = (2, 3)
title = 'Paracetamol mercapture'

In [61]:
data = load_data(mfid, m2m)


Mass2Motif 122 experiment 4 retrieved from http://ms2lda.org/basicviz/view_multi_m2m/4/mass2motif_122/

In [62]:
fig = get_figure(title, data)
plotly.offline.iplot(fig, filename=title)



In [67]:
new_data = bicluster(n_clusters, data)
fig = get_figure(title + ' -- rearranged', new_data)
plotly.offline.iplot(fig, filename=title)


Carnatine


In [48]:
mfid = 4
m2m = 293
n_clusters = (4, 4)
title = 'Carnatine'

In [45]:
data = load_data(mfid, m2m)


Mass2Motif 293 experiment 4 retrieved from http://ms2lda.org/basicviz/view_multi_m2m/4/mass2motif_293/

In [46]:
fig = get_figure(title, data)
plotly.offline.iplot(fig, filename=title)



In [49]:
new_data = bicluster(n_clusters, data)
fig = get_figure(title + ' -- rearranged', new_data)
plotly.offline.iplot(fig, filename=title)


Glutamine


In [58]:
mfid = 4
m2m = 133
n_clusters = (3, 3)
title = 'Glutamine'

In [51]:
data = load_data(mfid, m2m)


Mass2Motif 133 experiment 4 retrieved from http://ms2lda.org/basicviz/view_multi_m2m/4/mass2motif_133/

In [16]:
fig = get_figure(title, data)
plotly.offline.iplot(fig, filename=title)



In [59]:
new_data = bicluster(n_clusters, data)
fig = get_figure(title + ' -- rearranged', new_data)
plotly.offline.iplot(fig, filename=title)



In [ ]: