Smarter dimensionality reduction, followed by tsne and inverse PCA for 'whisk types'



In [1]:

    
import numpy as np
# import tables as tb
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
# import sklearn as sk
import seaborn as sns
from sklearn import manifold
from sklearn.decomposition import PCA
import random

# from lightning import Lightning
# lgn = Lightning(host="http://localhost:3000",ipython=True)
# lgn.create_session("tsne_scatter_line")
# lgn = Lightning(local=True,ipython=True)



In [6]:

    
theta = pd.read_csv('/Users/mathew/work/whiskfree/data/theta_34.csv',header=None)
kappa = pd.read_csv('/Users/mathew/work/whiskfree/data/kappa_34.csv',header=None)
tt = pd.read_csv('/Users/mathew/work/whiskfree/data/trialtype_34.csv',header=None)
ch = pd.read_csv('/Users/mathew/work/whiskfree/data/choice_34.csv',header=None)



In [86]:

    
decimate?



In [7]:

    
# reduce sampling frequency with decimate
from scipy.signal import decimate, resample



In [87]:

    
resample?



In [8]:

    
# theta_d = np.array([[decimate(theta.values.squeeze()[i,900:1890],10)] for i in xrange(0,theta.shape[0])])
theta_r = np.array([[resample(theta.values.squeeze()[i,900:1890],100)] for i in xrange(0,theta.shape[0])])



In [49]:

    
# plot original and decimated version
# theta_d = theta_d.squeeze()
theta_r = theta_r.squeeze()
theta_r.shape









    Out[49]:





(389, 100)



In [61]:

    
theta_dm = np.array([[theta_r[i] - np.mean(theta_r[i,0:10])] for i in xrange(0,theta_r.shape[0])])
theta_dm = theta_dm.squeeze()
_ = plt.plot(theta_dm[1:10,:].T)



In [60]:



In [10]:

    
# import mpld3
# %matplotlib inline
# mpld3.enable_notebook()

plt.plot(np.linspace(900,1890,990),theta.values.squeeze()[10,900:1890])
plt.plot(np.linspace(900,1890,100),theta_r[10])
# mpld3.show()









    Out[10]:





[<matplotlib.lines.Line2D at 0x10d0fa810>]



In [104]:

    
x = np.linspace(900,1890,990)
x.max()









    Out[104]:





1890.0



In [105]:

    
theta_r.shape









    Out[105]:





(389, 99)



In [74]:

    
x = theta.values.squeeze()[10,900:1890]
x.shape









    Out[74]:





(990,)



In [81]:

    
y = decimate(x,10)
y.shape









    Out[81]:





(99,)



In [89]:

    
z = resample(x,99)
z.shape









    Out[89]:





(99,)



In [90]:

    
plt.plot(np.linspace(900,1890,990),x)
plt.plot(np.linspace(900,1890,99),y)
plt.plot(np.linspace(900,1890,99),z)









    Out[90]:





[<matplotlib.lines.Line2D at 0x112f78d50>]



In [121]:

    
# ch = pd.read_csv('/Users/mathew/work/whiskfree/data/choice_34.csv',header=None)



In [91]:

    
Xpca_theta = PCA(n_components=30).fit_transform(theta_r)



In [70]:

    
PCA.fit_transform?
theta_dm.shape









    Out[70]:





(389, 100)



In [71]:

    
Xpca_theta.shape
y = np.nan_to_num(tt.values.squeeze())



In [93]:

    
tsne = manifold.TSNE(n_components=2,learning_rate=1000,min_grad_norm=1e-8,method='exact',verbose=1,random_state=0)
mappedX_theta = tsne.fit_transform(Xpca_theta)
# mappedX_kappa = tsne.fit_transform(Xpca_kappa)
plt.scatter(mappedX_theta[:,0],mappedX_theta[:,1],c=Y_hat.labels_,cmap='rainbow')









    



[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 389 / 389
[t-SNE] Mean sigma: 18.716389
[t-SNE] Error after 100 iterations with early exaggeration: 17.636001
[t-SNE] Error after 225 iterations: 1.672239






    Out[93]:





<matplotlib.collections.PathCollection at 0x116a09650>



In [37]:

    
# mappedX_theta.shape
# manifold.__file__
manifold.TSNE?



In [39]:

    
from sklearn.cluster import KMeans



In [92]:

    
kmeans = KMeans(5, random_state=8)
Y_hat = kmeans.fit(Xpca_theta)



In [72]:

    
plt.scatter(mappedX_theta[:,0],mappedX_theta[:,1],c=y,cmap='rainbow')









    Out[72]:





<matplotlib.collections.PathCollection at 0x1185b8d90>



In [114]:

    
ch = ch.squeeze()
ch.shape









    Out[114]:





(389,)



In [120]:

    
# lgn.scatter(mappedX_theta[:,0],mappedX_theta[:,1],group= ch,size = 3,colormap='Dark2')



In [123]:

    
fig, ax = plt.subplots(1,2,figsize = (16,5))
ax[0].scatter(mappedX_theta[:,0],mappedX_theta[:,1],c=tt.values,cmap='cubehelix')
ax[0].set_title('t-SNE (theta) labelled by trialtype')
ax[1].scatter(mappedX_theta[:,0],mappedX_theta[:,1],c=ch.values,cmap='cubehelix')
ax[1].set_title('t-SNE (theta) labelled by choice')
# plt.savefig('theta_34_tsne.png')









    Out[123]:





<matplotlib.text.Text at 0x114e14c10>



In [ ]:

    
fig, ax = plt.subplots(1,2,figsize = (16,5))
ax[0].scatter(mappedX_kappa[:,0],mappedX_kappa[:,1],c=tt.values,cmap='cubehelix')
ax[0].set_title('t-SNE (kappa) labelled by trialtype')
ax[1].scatter(mappedX_kappa[:,0],mappedX_kappa[:,1],c=ch.values,cmap='cubehelix')
ax[1].set_title('t-SNE (kappa) labelled by choice')
plt.savefig('kappa_34_tsne.png')



In [ ]:

    
x = theta.values.squeeze()
theta.shape



In [145]:

    
PCA?



In [181]:

    
pca_theta = PCA(n_components=0.8)
pca_theta.fit?



In [217]:

    
theta_reduced = pca_theta.fit(theta_r)
# pca_theta.inverse_transform?
theta_reduced.components_.shape









    Out[217]:





(13, 100)



In [219]:

    
np.std(theta_reduced.components_[i])









    Out[219]:





0.099999968392379096



In [228]:

    
# theta_reduced.
plt.subplots(figsize=(10,10))
offset = 0.0;
for i in xrange(0,12):
    _ =plt.plot(offset + theta_reduced.components_[i])
    offset = offset + 4*np.std(theta_reduced.components_[i])
    
plt.legend(xrange(1,13))









    Out[228]:





<matplotlib.legend.Legend at 0x1225bdf10>



In [214]:

    
x = theta_reduced.components_
# x.shape
# plt.imshow?
plt.imshow(x,aspect = float(x.shape[1])/x.shape[0],interpolation = 'none',cmap='cubehelix')
plt.colorbar()









    Out[214]:





<matplotlib.colorbar.Colorbar instance at 0x1224cef38>



In [215]:

    
tax = plt.imshow(theta_r,cmap='cubehelix',aspect = float(theta_r.shape[1])/theta_r.shape[0])
plt.colorbar()









    Out[215]:





<matplotlib.colorbar.Colorbar instance at 0x120264cb0>



In [103]:

    
# plot kmeans cluster centers
# Y_hat.cluster_centers_.shape
# Xpca_theta = PCA(n_components=30).fit_transform(theta_dm)

pca = PCA(n_components=30).fit(theta_r)

theta_pca = pca.transform(theta_r)
# theta_pca.shape

kmeans = KMeans(9, random_state=8)
Y_hat = kmeans.fit(theta_pca)

reconstructed_centers = pca.inverse_transform(Y_hat.cluster_centers_)



In [104]:

    
reconstructed_centers.shape
_ = plt.plot(reconstructed_centers.T)



In [105]:

    
# _ = plt.plot(pca.components_.T)
offset = 0.0;
for i in xrange(0,12):
    _ = plt.plot(offset + pca.components_[i])
    offset = offset + 4*np.std(pca.components_[i])
    
plt.legend(xrange(1,13))









    Out[105]:





<matplotlib.legend.Legend at 0x115dc6b50>



In [144]:

    
# reorder theta into clusters.
with sns.axes_style("white"):
    fig, ax = plt.subplots(1,2,figsize = (12,6))
ax[0].imshow(theta_r,aspect = float(theta_r.shape[1])/theta_r.shape[0],cmap = 'cubehelix',interpolation = 'none')
ax[1].imshow(theta_r[np.argsort(Y_hat.labels_)],aspect = float(theta_r.shape[1])/theta_r.shape[0],cmap = 'cubehelix',interpolation = 'none')









    Out[144]:





<matplotlib.image.AxesImage at 0x11d5b6050>



In [135]:

    
# np.argsort(Y_hat.labels_)

Watching t-sne in action

using code from https://www.oreilly.com/learning/an-illustrated-introduction-to-the-t-sne-algorithm



In [240]:



In [4]:

    
from numpy import linalg
from numpy.linalg import norm
import sklearn
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.manifold.t_sne import (_joint_probabilities,
                                    _kl_divergence)
from sklearn.utils.extmath import _ravel
from sklearn.manifold import TSNE



In [8]:

    
positions = []
def _gradient_descent(objective, p0, it, n_iter, objective_error=None,
                      n_iter_check=1, n_iter_without_progress=50,
                      momentum=0.5, learning_rate=1000.0, min_gain=0.01,
                      min_grad_norm=1e-7, min_error_diff=1e-7, verbose=0,
                      args=None, kwargs=None):

    p = p0.copy().ravel()
    update = np.zeros_like(p)
    gains = np.ones_like(p)
    error = np.finfo(np.float).max
    best_error = np.finfo(np.float).max
    best_iter = 0

    for i in range(it, n_iter):
        positions.append(p.copy())
        
        new_error, grad = objective(p, *args, **kwargs)
        grad_norm = linalg.norm(grad)

        inc = update * grad >= 0.0
        dec = np.invert(inc)
        gains[inc] += 0.05
        gains[dec] *= 0.95
        np.clip(gains, min_gain, np.inf)
        grad *= gains
        update = momentum * update - learning_rate * grad
        p += update

        if (i + 1) % n_iter_check == 0:
            if new_error is None:
                new_error = objective_error(p, *args)
            error_diff = np.abs(new_error - error)
            error = new_error

            if verbose >= 2:
                m = "[t-SNE] Iteration %d: error = %.7f, gradient norm = %.7f"
                print(m % (i + 1, error, grad_norm))

            if error < best_error:
                best_error = error
                best_iter = i
            elif i - best_iter > n_iter_without_progress:
                if verbose >= 2:
                    print("[t-SNE] Iteration %d: did not make any progress "
                          "during the last %d episodes. Finished."
                          % (i + 1, n_iter_without_progress))
                break
            if grad_norm <= min_grad_norm:
                if verbose >= 2:
                    print("[t-SNE] Iteration %d: gradient norm %f. Finished."
                          % (i + 1, grad_norm))
                break
            if error_diff <= min_error_diff:
                if verbose >= 2:
                    m = "[t-SNE] Iteration %d: error difference %f. Finished."
                    print(m % (i + 1, error_diff))
                break

        if new_error is not None:
            error = new_error

    return p, error, i

sklearn.manifold.t_sne._gradient_descent = _gradient_descent



In [5]:

    
# from sklearn import manifold
# RS = 20150101
# tsne = manifold.TSNE(n_components=2,learning_rate=500,verbose=1,random_state=0)
X_proj = TSNE().fit_transform(Xpca_theta)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-9489067a405e> in <module>()
      2 # RS = 20150101
      3 # tsne = manifold.TSNE(n_components=2,learning_rate=500,verbose=1,random_state=0)
----> 4 X_proj = TSNE().fit_transform(Xpca_theta)

NameError: name 'Xpca_theta' is not defined



In [12]:

    
X_iter = np.dstack(position.reshape(-1, 2)
                   for position in positions)



In [25]:

    
sklearn.__version__









    Out[25]:





'0.17'



In [18]:

    
y = tt.values
y = np.nan_to_num(y)
# y(y<0) = 0
# y = y + 1e-14
y.shape









    Out[18]:





(389, 1)



In [44]:

    
! pip install moviepy









    



Collecting moviepy
  Downloading moviepy-0.2.2.11.tar.gz (107kB)
    100% |████████████████████████████████| 110kB 682kB/s 
Requirement already satisfied (use --upgrade to upgrade): numpy in /Users/mathew/miniconda/lib/python3.5/site-packages (from moviepy)
Collecting decorator (from moviepy)
  Downloading decorator-4.0.9-py2.py3-none-any.whl
Collecting imageio (from moviepy)
  Downloading imageio-1.5-cp26.cp27.cp33.cp34.cp35-none-macosx_10_5_x86_64.macosx_10_6_intel.whl (4.8MB)
    100% |████████████████████████████████| 4.8MB 64kB/s 
Collecting tqdm (from moviepy)
  Downloading tqdm-4.7.0-py2.py3-none-any.whl
Building wheels for collected packages: moviepy
  Running setup.py bdist_wheel for moviepy ... - \ | done
  Stored in directory: /Users/mathew/Library/Caches/pip/wheels/ba/fa/af/be4051961aa23c8566906e9a3bd5f7bb57ac2c3f3c43f09dbc
Successfully built moviepy
Installing collected packages: decorator, imageio, tqdm, moviepy
Successfully installed decorator-4.0.9 imageio-1.5 moviepy-0.2.2.11 tqdm-4.7.0
You are using pip version 8.0.2, however version 8.1.2 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.



In [2]:

    
import moviepy as mpy









    



---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-2-ddf9c0886f50> in <module>()
----> 1 import moviepy as mpy

ImportError: No module named moviepy



In [21]:

    
X_iter.shape









    Out[21]:





(389, 2, 175)



In [24]:

    
# from moviepy.video.io.bindings import mplfig_to_npimage
# import moviepy.editor as mpy
f, ax, sc, txts = scatter(X_iter[...,-1],y.squeeze())

# def make_frame_mpl(t):
#     i = int(t*40)
#     x = X_iter[..., i]
#     sc.set_offsets(x)
#     for j, txt in zip(range(10), txts):
#         xtext, ytext = np.median(x[y == j, :], axis=0)
#         txt.set_x(xtext)
#         txt.set_y(ytext)
#     return mplfig_to_npimage(f)

# animation = mpy.VideoClip(make_frame_mpl,
#                           duration=X_iter.shape[2]/40.)
# animation.write_gif("tsne_1.gif", fps=20)



In [16]:

    
def scatter(x, colors):
    # We choose a color palette with seaborn.
    palette = np.array(sns.color_palette("hls", 10))

    # We create a scatter plot.
    f = plt.figure(figsize=(8, 8))
    ax = plt.subplot(aspect='equal')
    sc = ax.scatter(x[:,0], x[:,1], lw=0, s=40,
                    c=palette[colors.astype(np.int)])
    plt.xlim(-25, 25)
    plt.ylim(-25, 25)
    ax.axis('off')
    ax.axis('tight')

    # We add the labels for each digit.
    txts = []
#     for i in range(10):
#         # Position of each label.
#         xtext, ytext = np.median(x[colors == i, :], axis=0)
#         txt = ax.text(xtext, ytext, str(i), fontsize=24)
#         txt.set_path_effects([
#             PathEffects.Stroke(linewidth=5, foreground="w"),
#             PathEffects.Normal()])
#         txts.append(txt)

    return f, ax, sc, txts



In [ ]:

    
c = ch.values.squeeze()==3
tt1 = theta.values.squeeze()[c,499:2499]
tt1.shape[0]



In [ ]:

    
float(tt1.shape[0]) /tt1.shape[1]



In [ ]:

    
iris = sns.load_dataset('iris')



In [ ]:

    
plt.plot?



In [ ]:

    
lgn.line(tt1,color=[0,0,0],"alpha" = 0.01)



In [ ]:

    
lgn.scatter?

MPLD3 linked plot attempt



In [73]:

    
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import mpld3
from mpld3 import plugins, utils



class LinkedView(plugins.PluginBase):
    """A simple plugin showing how multiple axes can be linked"""

    JAVASCRIPT = """
    mpld3.register_plugin("linkedview", LinkedViewPlugin);
    LinkedViewPlugin.prototype = Object.create(mpld3.Plugin.prototype);
    LinkedViewPlugin.prototype.constructor = LinkedViewPlugin;
    LinkedViewPlugin.prototype.requiredProps = ["idpts", "idline", "data"];
    LinkedViewPlugin.prototype.defaultProps = {}
    function LinkedViewPlugin(fig, props){
        mpld3.Plugin.call(this, fig, props);
    };

    LinkedViewPlugin.prototype.draw = function(){
      var pts = mpld3.get_element(this.props.idpts);
      var line = mpld3.get_element(this.props.idline);
      var data = this.props.data;

      function mouseover(d, i){
        line.data = data[i];
        line.elements().transition()
            .attr("d", line.datafunc(line.data))
            .style("stroke", this.style.fill);
      }
      pts.elements().on("mouseover", mouseover);
    };
    """

    def __init__(self, points, line, linedata):
        if isinstance(points, matplotlib.lines.Line2D):
            suffix = "pts"
        else:
            suffix = None

        self.dict_ = {"type": "linkedview",
                      "idpts": utils.get_id(points, suffix),
                      "idline": utils.get_id(line),
                      "data": linedata}



In [108]:

    
Xpca_theta.shape









    Out[108]:





(389, 30)



In [ ]:

    
theta_r = np.array([[resample(theta.values.squeeze()[i,900:1890],100)] for i in xrange(0,theta.shape[0])])
theta_r = theta_r.squeeze()
theta_dm = np.array([[theta_r[i] - np.mean(theta_r[i,0:10])] for i in xrange(0,theta_r.shape[0])])
theta_dm = theta_dm.squeeze()

Xpca_theta = PCA(n_components=30).fit_transform(theta_dm)

tsne = manifold.TSNE(n_components=2,learning_rate=500,verbose=1,random_state=0)
mappedX_theta = tsne.fit_transform(Xpca_theta)

kmeans = KMeans(5, random_state=8)
Y_hat = kmeans.fit(Xpca_theta)



In [114]:

    
subset = random.sample(theta.index, theta.shape[0])
# x = np.linspace(500, 2499, 2000)
x = np.linspace(900,1890,100)
# data = np.array([[x,theta.values.squeeze()[si,499:2499]] for si in subset]) # data needs to be N by 2 x time (for x and y axes)
data = np.array([[x,theta_dm[si]] for si in subset]) # data needs to be N by 2 x time (for x and y axes)


fig, ax = plt.subplots(1,2,figsize = (10,10))

ax[0] = plt.subplot2grid((3,3), (0, 0), colspan=3)
ax[1] = plt.subplot2grid((3,3), (1, 0), colspan=3,rowspan=2)


# points = ax[1].scatter(mappedX_theta[subset,0],mappedX_theta[subset,1],s = 100,c=Y_hat.labels_[subset],alpha=0.5,cmap='rainbow')
points = ax[1].scatter(Xpca_theta[subset,0],Xpca_theta[subset,1],s = 100,c=Y_hat.labels_[subset],alpha=0.5,cmap='rainbow')

ax[1].set_xlabel('t-sne dim 1')
ax[1].set_ylabel('t-sne dim 2')



# create the line object
lines = ax[0].plot(x, 0 * x, '-w', lw=3, alpha=0.5)
# ax[0].set_ylim(-6e-3, 6e-3)
ax[0].set_ylim(-10,70)

ax[0].set_title("Mouse 36")

# transpose line data and add plugin
linedata = data.transpose(0, 2, 1).tolist()
plugins.connect(fig, LinkedView(points, lines[0], linedata))

mpld3.display()









    Out[114]:



In [ ]:

    
import random
r = random.sample(kappa.index, 10)



In [ ]:

    
subset = random.sample(kappa.index, 100)
x = np.linspace(500, 2499, 2000)
data = np.array([[x,theta.values.squeeze()[si,499:2499]] for si in subset]) # data needs to be N by 2 x time (for x and y axes)
data.shape



In [ ]:

    
kappa.shape



In [ ]:

    
x = np.linspace(0, 10, 100)
P.shape



In [ ]:

    
data_ = np.array([[x, Ai * np.sin(x / Pi)]
                 for (Ai, Pi) in zip(A, P)])

# data[0]
data_.shape



In [ ]:

    
# plt.plot(data.transpose(0, 2, 1)[1])
plt.plot(data[3,0],data[3,1])



In [ ]:

    
linedata = data.transpose(0, 2, 1)#.tolist()
linedata.shape



In [ ]:

    
mappedX_kappa[subset,0].shape



In [ ]:

    
x = np.linspace(500, 2499, 2000)

data = kappa.values.squeeze()[subset,499:2499]
data.shape



In [ ]:

    
x = np.linspace(500, 2499, 2000)
x.shape



In [ ]:

    
data = np.array([[x,kappa.values.squeeze()[si,499:2499]] for si in subset])



In [ ]:

    
# subset
# zip(A,P)
data.shape



In [ ]:

    
fig, ax = plt.subplots(2)

# scatter periods and amplitudes
np.random.seed(0)
P = np.random.random(size=10)
A = np.random.random(size=10)
x = np.linspace(0, 10, 100)
data = np.array([[x, Ai * np.sin(x / Pi)]
                 for (Ai, Pi) in zip(A, P)])
points = ax[1].scatter(P, A, c=P + A,
                       s=200, alpha=0.5)
ax[1].set_xlabel('Period')
ax[1].set_ylabel('Amplitude')

# create the line object
lines = ax[0].plot(x, 0 * x, '-w', lw=3, alpha=0.5)
ax[0].set_ylim(-1, 1)

# transpose line data and add plugin
linedata = data.transpose(0, 2, 1).tolist()
fig.plugins = [LinkedView(points, lines[0], linedata)]



In [ ]: