notebook.community

Edit and run



In [1]:

    
import warnings
warnings.filterwarnings('ignore')



In [2]:

    
%matplotlib inline



In [3]:

    
import numpy as np
from sklearn.manifold import TSNE



In [4]:

    
# X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [4, 3], [4, -1]])
# X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [4, 3], [0, 0]])
X = np.array([[-1, 1], [-2, 2], [-3, 3], [1, 1], [2, 2], [3, 3], [4, 4]])



In [5]:

    
X









    Out[5]:





array([[-1,  1],
       [-2,  2],
       [-3,  3],
       [ 1,  1],
       [ 2,  2],
       [ 3,  3],
       [ 4,  4]])



In [6]:

    
import matplotlib.pyplot as plt



In [7]:

    
plt.figure(figsize=(10,10))
plt.scatter(X[:, 0], X[:, 1])
# plt.savefig('original.png')









    Out[7]:





<matplotlib.collections.PathCollection at 0x28f08825cf8>



In [8]:

    
tsne = TSNE(n_components=2)



In [9]:

    
# you can only transform on what you fit, different from pca
X_transformed = tsne.fit_transform(X)
X_transformed









    Out[9]:





array([[ 217.43529 ,  -83.32338 ],
       [ -55.73598 ,  139.69252 ],
       [  52.573597, -145.85677 ],
       [ 245.71147 ,   90.71747 ],
       [  80.849365,   28.184534],
       [ 109.126   ,  202.22618 ],
       [ -84.01288 ,  -34.348743]], dtype=float32)



In [10]:

    
plt.figure(figsize=(10,10))
plt.scatter(X_transformed[:, 0], X_transformed[:, 1])
# plt.savefig('reduced.png')









    Out[10]:





<matplotlib.collections.PathCollection at 0x28f08ad8400>



In [11]:

    
tsne = TSNE(n_components=1)



In [12]:

    
# you can only transform on what you fit, different from pca
X_transformed = tsne.fit_transform(X)



In [13]:

    
plt.figure(figsize=(10,10))
plt.plot(X_transformed)









    Out[13]:





[<matplotlib.lines.Line2D at 0x28f08a0ec88>]

Maybe we get better results on actual clusters



In [14]:

    
from sklearn.datasets.samples_generator import make_blobs
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4, random_state=42)



In [15]:

    
X









    Out[15]:





array([[ 0.71263709, -0.64203025],
       [ 0.86291419,  0.67908909],
       [-0.37379039, -1.0263001 ],
       ...,
       [ 1.45712913,  1.30077321],
       [ 1.86127298,  0.69306097],
       [ 1.27278119,  0.8758933 ]])



In [16]:

    
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from itertools import cycle, islice

def plot_dataset(X, y_pred=[0], fname=None):
    # normalize dataset for easier parameter selection
    X = StandardScaler().fit_transform(X)

    # last color is black to properly display label -1 as noise (black)
    colors = np.append(np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',
                                     '#f781bf', '#a65628', '#984ea3',
                                     '#999999', '#e41a1c', '#dede00']),
                              int(max(y_pred) + 1)))), ['#000000'])
    plt.figure(figsize=(10, 10))

    plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])

    plt.xlim(-2.5, 2.5)
    plt.ylim(-2.5, 2.5)
    plt.xticks(())
    plt.yticks(())
    if fname:
        plt.savefig(fname)



In [19]:

    
plot_dataset(X, labels_true)



In [20]:

    
# TSNE?



In [21]:

    
tsne = TSNE(n_components=2)
X_transformed = tsne.fit_transform(X)
plot_dataset(X_transformed, labels_true)



In [24]:

    
tsne = TSNE(n_components=2, perplexity=50)
X_transformed = tsne.fit_transform(X)
plot_dataset(X_transformed, labels_true)



In [23]:

    
tsne = TSNE(n_components=2, perplexity=5)
X_transformed = tsne.fit_transform(X)
plot_dataset(X_transformed, labels_true)



In [ ]: