In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
%matplotlib inline

In [3]:
import numpy as np
from sklearn.manifold import TSNE

In [4]:
# X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [4, 3], [4, -1]])
# X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [4, 3], [0, 0]])
X = np.array([[-1, 1], [-2, 2], [-3, 3], [1, 1], [2, 2], [3, 3], [4, 4]])

In [5]:
X


Out[5]:
array([[-1,  1],
       [-2,  2],
       [-3,  3],
       [ 1,  1],
       [ 2,  2],
       [ 3,  3],
       [ 4,  4]])

In [6]:
import matplotlib.pyplot as plt

In [7]:
plt.figure(figsize=(10,10))
plt.scatter(X[:, 0], X[:, 1])
# plt.savefig('original.png')


Out[7]:
<matplotlib.collections.PathCollection at 0x28f08825cf8>

In [8]:
tsne = TSNE(n_components=2)

In [9]:
# you can only transform on what you fit, different from pca
X_transformed = tsne.fit_transform(X)
X_transformed


Out[9]:
array([[ 217.43529 ,  -83.32338 ],
       [ -55.73598 ,  139.69252 ],
       [  52.573597, -145.85677 ],
       [ 245.71147 ,   90.71747 ],
       [  80.849365,   28.184534],
       [ 109.126   ,  202.22618 ],
       [ -84.01288 ,  -34.348743]], dtype=float32)

In [10]:
plt.figure(figsize=(10,10))
plt.scatter(X_transformed[:, 0], X_transformed[:, 1])
# plt.savefig('reduced.png')


Out[10]:
<matplotlib.collections.PathCollection at 0x28f08ad8400>

In [11]:
tsne = TSNE(n_components=1)

In [12]:
# you can only transform on what you fit, different from pca
X_transformed = tsne.fit_transform(X)

In [13]:
plt.figure(figsize=(10,10))
plt.plot(X_transformed)


Out[13]:
[<matplotlib.lines.Line2D at 0x28f08a0ec88>]

Maybe we get better results on actual clusters


In [14]:
from sklearn.datasets.samples_generator import make_blobs
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4, random_state=42)

In [15]:
X


Out[15]:
array([[ 0.71263709, -0.64203025],
       [ 0.86291419,  0.67908909],
       [-0.37379039, -1.0263001 ],
       ...,
       [ 1.45712913,  1.30077321],
       [ 1.86127298,  0.69306097],
       [ 1.27278119,  0.8758933 ]])

In [16]:
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from itertools import cycle, islice

def plot_dataset(X, y_pred=[0], fname=None):
    # normalize dataset for easier parameter selection
    X = StandardScaler().fit_transform(X)

    # last color is black to properly display label -1 as noise (black)
    colors = np.append(np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',
                                     '#f781bf', '#a65628', '#984ea3',
                                     '#999999', '#e41a1c', '#dede00']),
                              int(max(y_pred) + 1)))), ['#000000'])
    plt.figure(figsize=(10, 10))

    plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])

    plt.xlim(-2.5, 2.5)
    plt.ylim(-2.5, 2.5)
    plt.xticks(())
    plt.yticks(())
    if fname:
        plt.savefig(fname)

In [19]:
plot_dataset(X, labels_true)



In [20]:
# TSNE?

In [21]:
tsne = TSNE(n_components=2)
X_transformed = tsne.fit_transform(X)
plot_dataset(X_transformed, labels_true)



In [24]:
tsne = TSNE(n_components=2, perplexity=50)
X_transformed = tsne.fit_transform(X)
plot_dataset(X_transformed, labels_true)



In [23]:
tsne = TSNE(n_components=2, perplexity=5)
X_transformed = tsne.fit_transform(X)
plot_dataset(X_transformed, labels_true)



In [ ]: