In [1]:
import warnings
warnings.filterwarnings('ignore')
In [2]:
%matplotlib inline
In [3]:
import numpy as np
In [27]:
# noisy line
# X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [4, 3], [4, -1]])
# clear line
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [4, 3], [0, 0]])
# V shape
# X = np.array([[-1, 1], [-2, 2], [-3, 3], [1, 1], [2, 2], [3, 3], [4, 4]])
In [28]:
X
Out[28]:
In [29]:
import matplotlib.pyplot as plt
In [30]:
plt.figure(figsize=(10,10))
plt.scatter(X[:, 0], X[:, 1])
# plt.savefig('original.png')
Out[30]:
In [31]:
umap_clf = umap.UMAP()
In [32]:
umap_clf.fit(X)
Out[32]:
In [33]:
X_transformed = umap_clf.transform(X)
X_transformed
Out[33]:
In [34]:
plt.figure(figsize=(10,10))
plt.scatter(X_transformed[:, 0], X_transformed[:, 1])
# plt.savefig('reduced.png')
Out[34]:
In [40]:
umap_clf = umap.UMAP(n_components=1)
In [43]:
X_transformed = umap_clf.fit_transform(X)
In [44]:
X_transformed
Out[44]:
In [42]:
plt.figure(figsize=(10,10))
plt.plot(X_transformed)
Out[42]:
In [45]:
from sklearn.datasets.samples_generator import make_blobs
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4, random_state=42)
In [46]:
X
Out[46]:
In [47]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from itertools import cycle, islice
def plot_dataset(X, y_pred=[0], fname=None):
# normalize dataset for easier parameter selection
X = StandardScaler().fit_transform(X)
# last color is black to properly display label -1 as noise (black)
colors = np.append(np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',
'#f781bf', '#a65628', '#984ea3',
'#999999', '#e41a1c', '#dede00']),
int(max(y_pred) + 1)))), ['#000000'])
plt.figure(figsize=(10, 10))
plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])
plt.xlim(-2.5, 2.5)
plt.ylim(-2.5, 2.5)
plt.xticks(())
plt.yticks(())
if fname:
plt.savefig(fname)
In [69]:
plot_dataset(X)
In [67]:
# UMAP?
In [63]:
umap_clf = umap.UMAP(n_components=2)
X_transformed = umap_clf.fit_transform(X)
plot_dataset(X_transformed, labels_true)
In [70]:
umap_clf = umap.UMAP(n_components=2, n_neighbors=50)
X_transformed = umap_clf.fit_transform(X)
plot_dataset(X_transformed, labels_true)
In [68]:
umap_clf = umap.UMAP(n_components=10, n_neighbors=5)
X_transformed = umap_clf.fit_transform(X)
plot_dataset(X_transformed, labels_true)
In [57]:
import umap
from sklearn.datasets import load_digits
digits = load_digits()
embedding = umap.UMAP().fit_transform(digits.data)
In [58]:
embedding
Out[58]:
In [59]:
plot_dataset(embedding)
In [ ]: