In [1]:
%matplotlib inline
# initialise plotly in offline mode
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)
In [2]:
import urllib, json
import numpy as np
from sklearn.cluster.bicluster import SpectralBiclustering
from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go
import plotly
In [3]:
def load_data(mfid, m2m):
url = 'http://ms2lda.org/basicviz/get_doc_table/%d/mass2motif_%d/' % (mfid, m2m)
response = urllib.urlopen(url)
data = json.loads(response.read())
view_url = 'http://ms2lda.org/basicviz/view_multi_m2m/%d/mass2motif_%d/' % (mfid, m2m)
print 'Mass2Motif %d experiment %d retrieved from %s' % (m2m, mfid, view_url)
samples, docs, intensities, masses, rts = data
data = {}
data['samples'] = np.array(samples)
data['docs'] = np.array(docs)
data['intensities'] = np.array(intensities)
data['masses'] = np.array(masses)
data['rts'] = np.array(rts)
return data
In [4]:
def get_figure(title, data):
stripped = []
for samp in data['samples']:
short_str = samp.replace('Urine_', '')
short_str = short_str.replace('_fullscan_split', '')
stripped.append(short_str)
row_labels = stripped
col_labels = []
for i in range(len(data['masses'])):
mz = data['masses'][i]
rt = data['rts'][i]
label = '%.4f, %.4f' % (mz, rt)
col_labels.append(label)
plot_data = [
go.Heatmap(
z=data['intensities'].transpose(),
x=col_labels,
y=row_labels,
colorscale='Jet',
)
]
layout = go.Layout(
title=title,
xaxis = dict(ticks=''),
yaxis = dict(ticks=''),
width=1000,
height=500,
margin=go.Margin(
l=120,
r=50,
b=120,
t=50,
pad=4
),)
fig = go.Figure(data=plot_data, layout=layout)
return fig
In [5]:
def bicluster(n_clusters, data):
# do biclustering
model = SpectralBiclustering(n_clusters=n_clusters, random_state=0)
model.fit(data['intensities'])
# rearrange the rows and columns
row_idx = np.argsort(model.row_labels_)
col_idx = np.argsort(model.column_labels_)
fit_data = data['intensities'][row_idx]
fit_data = fit_data[:, col_idx]
# make new data
new_data = {}
new_data['samples'] = data['samples'][col_idx]
new_data['docs'] = data['docs'][row_idx]
new_data['intensities'] = fit_data
new_data['masses'] = data['masses'][row_idx]
new_data['rts'] = data['rts'][row_idx]
return new_data
In [66]:
mfid = 4
m2m = 122
n_clusters = (2, 3)
title = 'Paracetamol mercapture'
In [61]:
data = load_data(mfid, m2m)
In [62]:
fig = get_figure(title, data)
plotly.offline.iplot(fig, filename=title)
In [67]:
new_data = bicluster(n_clusters, data)
fig = get_figure(title + ' -- rearranged', new_data)
plotly.offline.iplot(fig, filename=title)
In [48]:
mfid = 4
m2m = 293
n_clusters = (4, 4)
title = 'Carnatine'
In [45]:
data = load_data(mfid, m2m)
In [46]:
fig = get_figure(title, data)
plotly.offline.iplot(fig, filename=title)
In [49]:
new_data = bicluster(n_clusters, data)
fig = get_figure(title + ' -- rearranged', new_data)
plotly.offline.iplot(fig, filename=title)
In [58]:
mfid = 4
m2m = 133
n_clusters = (3, 3)
title = 'Glutamine'
In [51]:
data = load_data(mfid, m2m)
In [16]:
fig = get_figure(title, data)
plotly.offline.iplot(fig, filename=title)
In [59]:
new_data = bicluster(n_clusters, data)
fig = get_figure(title + ' -- rearranged', new_data)
plotly.offline.iplot(fig, filename=title)
In [ ]: