In [1]:
%matplotlib inline 
import matplotlib.pyplot as plt
from matplotlib import offsetbox
import numpy as np
import umap
from mpl_toolkits.mplot3d import Axes3D
from sklearn.datasets import load_digits
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
plt.rcParams['figure.figsize'] = (20, 25)

mnist = load_digits()

In [4]:
def show_dataset(X, y, ax=None):
    """
    Given examples in 2/3 dimensions X, and target y, show them
    :param X:
    :param y:
    :return:
    """
    if X.shape[1] == 3:
        scattered = ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap='tab10')
        plt.colorbar(scattered)
    elif X.shape[1] == 2:
        plt.scatter(X[:, 0], X[:, 1], c=y, cmap='tab10')
        plt.colorbar()
    else:
        raise RuntimeError("Dimension too big")

In [6]:
ax = plt.subplot(2, 3, 1)
alg = TSNE(n_components=2, perplexity=5.0, n_iter=2000, metric="euclidean")
new_data = alg.fit_transform(mnist.data, mnist.target)
show_dataset(new_data, mnist.target)

ax = plt.subplot(2, 3, 2)
alg2 = TSNE(n_components=2, perplexity=50.0, n_iter=2000, metric="euclidean")
new_data2 = alg2.fit_transform(mnist.data, mnist.target)
show_dataset(new_data2, mnist.target)

ax = plt.subplot(2, 3, 3)
alg3 = TSNE(n_components=2, perplexity=100.0, n_iter=2000, metric="euclidean")
new_data3 = alg3.fit_transform(mnist.data, mnist.target)
show_dataset(new_data3, mnist.target)

ax = plt.subplot(2, 3, 4)
alg4 = TSNE(n_components=2, perplexity=50.0, n_iter=2000, metric="l1")
new_data4 = alg4.fit_transform(mnist.data, mnist.target)
show_dataset(new_data4, mnist.target)

ax = plt.subplot(2, 3, 5)
alg5 = TSNE(n_components=2, perplexity=50.0, n_iter=2000, metric="cosine")
new_data5 = alg5.fit_transform(mnist.data, mnist.target)
show_dataset(new_data5, mnist.target)

ax = plt.subplot(2, 3, 6)
alg6 = TSNE(n_components=2, perplexity=50.0, n_iter=2000, metric="correlation")
new_data6 = alg6.fit_transform(mnist.data, mnist.target)
show_dataset(new_data6, mnist.target)



In [5]:
ax = plt.subplot(2, 3, 1)
alg = umap.UMAP(n_neighbors=5, min_dist=0.3, metric='euclidean')
new_data = alg.fit_transform(mnist.data, mnist.target)
show_dataset(new_data, mnist.target)

ax = plt.subplot(2, 3, 2)
alg2 = umap.UMAP(n_neighbors=50, min_dist=0.3, metric='euclidean')
new_data2 = alg2.fit_transform(mnist.data, mnist.target)
show_dataset(new_data2, mnist.target)

ax = plt.subplot(2, 3, 3)
alg3 = umap.UMAP(n_neighbors=100, min_dist=0.3, metric='euclidean')
new_data3 = alg3.fit_transform(mnist.data, mnist.target)
show_dataset(new_data3, mnist.target)

ax = plt.subplot(2, 3, 4)
alg4 = umap.UMAP(n_neighbors=50, min_dist=0.3, metric='l1')
new_data4 = alg4.fit_transform(mnist.data, mnist.target)
show_dataset(new_data4, mnist.target)

ax = plt.subplot(2, 3, 5)
alg5 = umap.UMAP(n_neighbors=50, min_dist=0.3, metric='cosine')
new_data5 = alg5.fit_transform(mnist.data, mnist.target)
show_dataset(new_data5, mnist.target)

ax = plt.subplot(2, 3, 6)
alg6 = umap.UMAP(n_neighbors=50, min_dist=0.3, metric='correlation')
new_data6 = alg6.fit_transform(mnist.data, mnist.target)
show_dataset(new_data6, mnist.target)



In [59]:
alg7 = TSNE(n_components=3, perplexity=50.0, n_iter=2000, metric="euclidean")
new_data7 = alg7.fit_transform(mnist.data, mnist.target)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
show_dataset(new_data7, mnist.target, ax)



In [8]:
alg7 = umap.UMAP(n_components=3, n_neighbors=5, metric="euclidean")
new_data7 = alg7.fit_transform(mnist.data, mnist.target)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
show_dataset(new_data7, mnist.target, ax)



In [64]:
pca = PCA(n_components=50)
reduced_data = pca.fit_transform(mnist.data)

ax = plt.subplot(3, 1, 1)
alg8 = TSNE(n_components=2, perplexity=5.0, n_iter=2000, metric="euclidean")
new_data8 = alg8.fit_transform(reduced_data, mnist.target)
show_dataset(new_data8, mnist.target)

ax = plt.subplot(3, 1, 2)
alg9 = TSNE(n_components=2, perplexity=50.0, n_iter=2000, metric="euclidean")
new_data9 = alg9.fit_transform(reduced_data, mnist.target)
show_dataset(new_data9, mnist.target)

ax = plt.subplot(3, 1, 3)
alg10 = TSNE(n_components=2, perplexity=100.0, n_iter=2000, metric="euclidean")
new_data10 = alg10.fit_transform(reduced_data, mnist.target)
show_dataset(new_data10, mnist.target)



In [65]:
pca = PCA(n_components=50)
ax = plt.subplot(3, 1, 1)
alg11 = TSNE(n_components=2, perplexity=50.0, n_iter=2000, metric="euclidean")
new_data11 = alg11.fit_transform(pca.fit_transform(mnist.data), mnist.target)
show_dataset(new_data11, mnist.target)

pca = PCA(n_components=10)
ax = plt.subplot(3, 1, 2)
alg12 = TSNE(n_components=2, perplexity=50.0, n_iter=2000, metric="euclidean")
new_data12 = alg12.fit_transform(pca.fit_transform(mnist.data), mnist.target)
show_dataset(new_data12, mnist.target)

pca = PCA(n_components=5)
ax = plt.subplot(3, 1, 3)
alg13 = TSNE(n_components=2, perplexity=50.0, n_iter=2000, metric="euclidean")
new_data13 = alg13.fit_transform(pca.fit_transform(mnist.data), mnist.target)
show_dataset(new_data13, mnist.target)