An attempt to display data using tsne, then use Lightning's scatter-line to plot what the data underneath actually looks like

Trying mpld3 instead, ahead of working out how to do it in Lightning


In [9]:
import numpy as np
# import tables as tb
import pandas as pd

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
# import sklearn as sk
import seaborn as sns

from lightning import Lightning
lgn = Lightning(host="http://localhost:3000",ipython=True)
lgn.create_session("tsne_scatter_line")
# lgn = Lightning(local=True,ipython=True)


Lightning initialized
Connected to server at http://localhost:3000
Out[1]:
Session
number: f02e216e-9964-436d-bb6d-36fcf2353e41name: tsne_scatter_line

In [18]:
theta = pd.read_csv('/Users/mathew/work/whiskfree/data/theta_34.csv')
kappa = pd.read_csv('/Users/mathew/work/whiskfree/data/kappa_34.csv')

In [19]:
tt = pd.read_csv('/Users/mathew/work/whiskfree/data/trialtype_34.csv')
ch = pd.read_csv('/Users/mathew/work/whiskfree/data/choice_34.csv')

In [38]:
tt.shape


Out[38]:
(1789, 1)

In [5]:
theta.shape


Out[5]:
(1789, 5000)

In [128]:
_ = plt.plot(np.mean(kappa.values.squeeze()[:,500:2500],0))



In [52]:
_ = plt.plot(np.mean(kappa.values.squeeze()[:,500:2500],0))



In [55]:
_ = plt.plot(np.mean(kappa.values.squeeze()[:,500:2500],0))



In [91]:
_ = plt.plot(np.mean(kappa.values.squeeze()[:,500:2500],0))



In [20]:
from sklearn import manifold
from sklearn.decomposition import PCA

In [21]:
Xpca_theta = PCA(n_components=30).fit_transform(theta.values.squeeze()[:,499:2499])
Xpca_kappa = PCA(n_components=30).fit_transform(kappa.values.squeeze()[:,499:2499])

In [26]:
Xpca.shape


Out[26]:
(1789, 30)

In [22]:
tsne = manifold.TSNE(n_components=2,learning_rate=500,verbose=1,random_state=0)
mappedX_theta = tsne.fit_transform(Xpca_theta)
mappedX_kappa = tsne.fit_transform(Xpca_kappa)


[t-SNE] Computing pairwise distances...
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Computed conditional probabilities for sample 388 / 388
[t-SNE] Mean sigma: 100.839784
[t-SNE] Error after 100 iterations with early exaggeration: 0.960278
[t-SNE] Error after 175 iterations: 0.903594
[t-SNE] Computing pairwise distances...
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Computed conditional probabilities for sample 388 / 388
[t-SNE] Mean sigma: 0.002814
[t-SNE] Error after 100 iterations with early exaggeration: 0.584771
[t-SNE] Error after 175 iterations: 0.575275

In [96]:
mappedX_kappa.shape


Out[96]:
(388, 2)

In [97]:
ch.shape


Out[97]:
(388, 1)

In [114]:
lgn.scatter(mappedX_kappa[:,0],mappedX_kappa[:,1],group= ch.values.squeeze(),size = 3,colormap='Dark2')


---------------------------------------------------------------------------
Exception                                 Traceback (most recent call last)
<ipython-input-114-ff3324282c30> in <module>()
----> 1 lgn.scatter(mappedX_kappa[:,0],mappedX_kappa[:,1],group= ch.values.squeeze(),size = 3,colormap='Dark2')

<string> in <lambda>(self, x, y, labels, values, color, group, colormap, size, alpha, xaxis, yaxis, tooltips, width, brush, description, zoom, height)

/Users/mathew/anaconda/lib/python2.7/site-packages/lightning/types/decorators.pyc in plotter(self, *args, **kwargs)
     34                 viz = VizType._baseplot(self.session, type, *args, **kwargs)
     35             else:
---> 36                 viz = VizType._baseplot(self.session, VizType._name, *args, **kwargs)
     37             self.session.visualizations.append(viz)
     38             return viz

/Users/mathew/anaconda/lib/python2.7/site-packages/lightning/types/base.pyc in _baseplot(cls, session, type, *args, **kwargs)
    175 
    176         else:
--> 177             viz = cls._create(session, data=data, type=type, options=options, description=description)
    178 
    179         return viz

/Users/mathew/anaconda/lib/python2.7/site-packages/lightning/visualization.pyc in _create(cls, session, data, images, type, options, description)
    112                 raise Exception(r.text)
    113             elif not r.status_code == requests.codes.ok:
--> 114                 raise Exception('Problem uploading data')
    115 
    116             viz = cls(session=session, json=r.json(), auth=session.auth)

Exception: Problem uploading data

In [98]:
fig, ax = plt.subplots(1,2,figsize = (16,5))
ax[0].scatter(mappedX_theta[:,0],mappedX_theta[:,1],c=tt.values,cmap='cubehelix')
ax[0].set_title('t-SNE (theta) labelled by trialtype')
ax[1].scatter(mappedX_theta[:,0],mappedX_theta[:,1],c=ch.values,cmap='cubehelix')
ax[1].set_title('t-SNE (theta) labelled by choice')
plt.savefig('theta_34_tsne.png')



In [99]:
fig, ax = plt.subplots(1,2,figsize = (16,5))
ax[0].scatter(mappedX_kappa[:,0],mappedX_kappa[:,1],c=tt.values,cmap='cubehelix')
ax[0].set_title('t-SNE (kappa) labelled by trialtype')
ax[1].scatter(mappedX_kappa[:,0],mappedX_kappa[:,1],c=ch.values,cmap='cubehelix')
ax[1].set_title('t-SNE (kappa) labelled by choice')
plt.savefig('kappa_34_tsne.png')



In [187]:
x = theta.values.squeeze()
theta.shape


Out[187]:
(1789, 5000)

In [188]:
plt.scatter?

In [41]:
fig, ax = plt.subplots(1,2,figsize = (15,6))
tax = ax[0].imshow(theta.values.squeeze()[:,499:2499],cmap='cubehelix')
plt.colorbar(tax)
kax = ax[1].imshow(kappa.values.squeeze()[:,499:2499],cmap='cubehelix')
plt.colorbar(kax)


Out[41]:
<matplotlib.colorbar.Colorbar instance at 0x11127aef0>

In [23]:
Xpca_kappa.shape


Out[23]:
(388, 30)

In [110]:
# dfpc = pd.DataFrame(Xpca_theta[:,0:5])
dfpc = pd.DataFrame(Xpca_kappa[:,0:5])
dfpc['tt'] = tt.values

In [61]:
dfpc['tt'] = tt.values

In [170]:
dfpc?

In [111]:
sns.pairplot(dfpc,hue = 'tt',diag_kind="kde")
# plt.savefig('figs/theta_34_pairplot.png')
plt.savefig('figs/kappa_33_pairplot.png')



In [106]:
# lgn.line(theta.values.squeeze()[:,499:2499],group= ch.values.squeeze())
fig, ax = plt.subplots(1,3,figsize = (15,10))
for i in range(1,4):
    c = ch.values.squeeze()==i
    
#     ax[i-1].plot(theta.values.squeeze()[c,499:2499].T,color = 'black',alpha = 0.01)
    ax[i-1].plot(kappa.values.squeeze()[c,499:2499].T,color = 'black',alpha = 0.01)

#     tt1 = theta.values.squeeze()[c,499:2499]
#     ax[i-1].imshow(tt1,cmap='cubehelix',aspect = float(tt1.shape[1])/tt1.shape[0])
    
# plt.savefig('figs/theta_34_transparent.png')
plt.savefig('figs/kappa_34_transparent.png')



In [221]:
cmap = sns.color_palette('cubehelix',4)

In [8]:
c = ch.values.squeeze()==0

In [81]:
c = ch.values.squeeze()==3
tt1 = theta.values.squeeze()[c,499:2499]
tt1.shape[0]


Out[81]:
1065

In [86]:
float(tt1.shape[0]) /tt1.shape[1]


Out[86]:
0.5325

In [ ]:
iris = sns.load_dataset('iris')

In [219]:
plt.plot?

In [21]:
lgn.line(tt1,color=[0,0,0],"alpha" = 0.01)


  File "<ipython-input-21-024b4224bb19>", line 1
    lgn.line(tt1,color=[0,0,0],"alpha" = 0.01)
SyntaxError: keyword can't be an expression

In [24]:
lgn.scatter?

MPLD3 linked plot attempt


In [24]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import mpld3
from mpld3 import plugins, utils



class LinkedView(plugins.PluginBase):
    """A simple plugin showing how multiple axes can be linked"""

    JAVASCRIPT = """
    mpld3.register_plugin("linkedview", LinkedViewPlugin);
    LinkedViewPlugin.prototype = Object.create(mpld3.Plugin.prototype);
    LinkedViewPlugin.prototype.constructor = LinkedViewPlugin;
    LinkedViewPlugin.prototype.requiredProps = ["idpts", "idline", "data"];
    LinkedViewPlugin.prototype.defaultProps = {}
    function LinkedViewPlugin(fig, props){
        mpld3.Plugin.call(this, fig, props);
    };

    LinkedViewPlugin.prototype.draw = function(){
      var pts = mpld3.get_element(this.props.idpts);
      var line = mpld3.get_element(this.props.idline);
      var data = this.props.data;

      function mouseover(d, i){
        line.data = data[i];
        line.elements().transition()
            .attr("d", line.datafunc(line.data))
            .style("stroke", this.style.fill);
      }
      pts.elements().on("mouseover", mouseover);
    };
    """

    def __init__(self, points, line, linedata):
        if isinstance(points, matplotlib.lines.Line2D):
            suffix = "pts"
        else:
            suffix = None

        self.dict_ = {"type": "linkedview",
                      "idpts": utils.get_id(points, suffix),
                      "idline": utils.get_id(line),
                      "data": linedata}

In [25]:
mpld3.enable_notebook()

In [26]:
fig, ax = plt.subplots(1,2,figsize = (10,6))

# scatter periods and amplitudes
# P = 0.2 + np.random.random(size=20)
# A = np.random.random(size=20)
# data = np.array([[x, Ai * np.sin(x / Pi)]
#                  for (Ai, Pi) in zip(A, P)])
# points = ax[1].scatter(P, A, c=P + A,
#                        s=200, alpha=0.5)
# points = ax[1].scatter(P, A, c=P + A,s=200, alpha=0.5)


subset = random.sample(kappa.index, 100)
x = np.linspace(500, 2499, 2000)
data = np.array([[x,kappa.values.squeeze()[si,499:2499]] for si in subset]) # data needs to be N by 2 x time (for x and y axes)

points = ax[1].scatter(mappedX_kappa[subset,0],mappedX_kappa[subset,1],s = 50,c=tt.values[subset],cmap='cubehelix',alpha=0.5)
ax[1].set_xlabel('t-sne dim 1')
ax[1].set_ylabel('t-sne dim 2')

# create the line object
lines = ax[0].plot(x, 0 * x, '-w', lw=3, alpha=0.5)
# ax[0].set_ylim(-6e-3, 6e-3)

ax[0].set_title("Hover over points to see lines")

# transpose line data and add plugin
linedata = data.transpose(0, 2, 1).tolist()
# linedata = data.tolist()
plugins.connect(fig, LinkedView(points, lines[0], linedata))



In [11]:
import random
r = random.sample(kappa.index, 10)

In [13]:
subset = random.sample(kappa.index, 100)
x = np.linspace(500, 2499, 2000)
data = np.array([[x,theta.values.squeeze()[si,499:2499]] for si in subset]) # data needs to be N by 2 x time (for x and y axes)
data.shape


Out[13]:
(100, 2, 2000)

In [16]:
kappa.shape


Out[16]:
(388, 5000)

In [171]:
x = np.linspace(0, 10, 100)
P.shape


Out[171]:
(20,)

In [222]:
data_ = np.array([[x, Ai * np.sin(x / Pi)]
                 for (Ai, Pi) in zip(A, P)])

# data[0]
data_.shape


Out[222]:
(20, 2, 2000)

In [15]:
# plt.plot(data.transpose(0, 2, 1)[1])
plt.plot(data[3,0],data[3,1])


Out[15]:
[<matplotlib.lines.Line2D at 0x108b0fc90>]

In [228]:
linedata = data.transpose(0, 2, 1)#.tolist()
linedata.shape


Out[228]:
(100, 2000, 2)

In [230]:
mappedX_kappa[subset,0].shape


Out[230]:
(100,)

In [204]:
x = np.linspace(500, 2499, 2000)

data = kappa.values.squeeze()[subset,499:2499]
data.shape


Out[204]:
(100, 2000)

In [206]:
x = np.linspace(500, 2499, 2000)
x.shape


Out[206]:
(2000,)

In [207]:
data = np.array([[x,kappa.values.squeeze()[si,499:2499]] for si in subset])

In [208]:
# subset
# zip(A,P)
data.shape


Out[208]:
(100, 2, 2000)

In [27]:
fig, ax = plt.subplots(2)

# scatter periods and amplitudes
np.random.seed(0)
P = np.random.random(size=10)
A = np.random.random(size=10)
x = np.linspace(0, 10, 100)
data = np.array([[x, Ai * np.sin(x / Pi)]
                 for (Ai, Pi) in zip(A, P)])
points = ax[1].scatter(P, A, c=P + A,
                       s=200, alpha=0.5)
ax[1].set_xlabel('Period')
ax[1].set_ylabel('Amplitude')

# create the line object
lines = ax[0].plot(x, 0 * x, '-w', lw=3, alpha=0.5)
ax[0].set_ylim(-1, 1)

# transpose line data and add plugin
linedata = data.transpose(0, 2, 1).tolist()
fig.plugins = [LinkedView(points, lines[0], linedata)]



In [ ]: