Usage examples of VbMfa for dimensionality reduction, including image compression and clustering.
In [17]:
import numpy as np
import sys
import sklearn.datasets as ds
import matplotlib.pyplot as plt
import matplotlib.cm as plt_cm
import matplotlib.colors as plt_colt
import matplotlib.patches as plt_patches
import seaborn as sns
import pandas as pd
%matplotlib inline
%load_ext autoreload
%autoreload 2
import vbmfa.fa
import vbmfa.mfa
In [18]:
sns.set_style('darkgrid')
In [19]:
def plot_scatter(x, classes, ax=None):
ax = plt.gca() if ax is None else ax
cmap = plt_cm.jet
norm = plt_col.Normalize(vmin=np.min(classes), vmax=np.max(classes))
mapper = plt_cm.ScalarMappable(cmap=cmap, norm=norm)
colors = mapper.to_rgba(classes)
ax.scatter(x[0, :], x[1, :], color=colors, s=20)
def plot_mse(mse):
fig, ax = plt.subplots(figsize=(10, 4))
ax.plot(mse, linewidth=2, marker='s',markersize=5, markerfacecolor='red')
ax.set_xlabel('Iteration')
ax.set_ylabel('MSE')
def plot_grid(n, ncols=4, size=(5, 5)):
nrows = int(np.ceil(n/float(ncols)))
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(size[0]*ncols, size[1]*nrows))
ax = ax.ravel()
return [fig, ax]
def plot_grid(n, ncols=4, size=(5, 5)):
nrows = int(np.ceil(n/float(ncols)))
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(size[0]*ncols, size[1]*nrows))
ax = ax.ravel()
return [fig, ax]
def plot_compress(q, n=30):
np.random.seed(0)
fa = vbfa.VbFa(data_y, q)
fa.fit()
y = fa.x_to_y()
fig, ax = plot_grid(n, ncols=10)
dim = int(np.sqrt(fa.P))
for i in range(n):
ax[i].matshow(y[:, i].reshape(dim, dim), cmap='binary')
def plot_images(images, n=30, size=2):
fig, ax = plot_grid(n, ncols=10, size=(size, size))
dim = int(np.sqrt(images.shape[0]))
with sns.axes_style('white'):
for i in range(n):
ax[i].grid()
ax[i].set_axis_off()
ax[i].matshow(images[:, i].reshape(dim, dim), cmap='binary')
In [20]:
def cluster_labels(q_s, labels):
clust = q_s.argmax(0)
clust_t = pd.crosstab(clust, labels)
clust_label = np.array(clust_t).argmax(1)
return np.array([clust_label[x] for x in clust])
In [21]:
digits = ds.load_digits()
data_y = digits.data.transpose()
data_t = digits.target
In [22]:
plot_images(data_y)
In [23]:
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 16, 1)
mfa.fit()
print('MSE: {:.3f}'.format(mfa.mse()))
plot_images(mfa.x_to_y())
In [24]:
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 16, 2)
mfa.fit()
print('MSE: {:.3f}'.format(mfa.mse()))
plot_images(mfa.x_to_y())
In [25]:
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 16, 10)
mfa.fit()
print('MSE: {:.3f}'.format(mfa.mse()))
plot_images(mfa.x_to_y())
In [26]:
c_labels = cluster_labels(mfa.q_s, data_t)
print(pd.crosstab(c_labels, data_t))
Not too bad: most elements lie on the diagonal.
In [27]:
show = [0, 1, 2, 3, 4, 5]
num = 10
fig, ax = plot_grid(len(show) * num, ncols=num, size=(2, 2))
y = mfa.x_to_y()
with sns.axes_style('whitegrid'):
for i in range(len(show)):
y_i = y[:, c_labels == show[i]]
for j in range(num):
k = int(i * 10 + j)
ax[k].matshow(y_i[:, j].reshape(8, 8), cmap='binary')
ax[k].set_axis_off()
ax[k].grid()
High-dimensional (64x64) images of faces from different perspectives. Contains in total 10 images for 40 different persons.
In [28]:
faces = ds.fetch_olivetti_faces()
num_samples = 5 # Only use the first n persons
images_per_sample = 10
n = num_samples * images_per_sample
data_y = faces.data.transpose()[:, :n]
data_t = faces.target[:n]
In [29]:
plot_images(data_y, 50)
In [30]:
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 64, 1)
mfa.fit(verbose=True)
print('MSE: {:.3f}'.format(mfa.mse()))
plot_images(mfa.x_to_y(), 50)
In [31]:
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 64, 5)
mfa.fit_highdim()
plot_images(mfa.x_to_y(), 50)
In [32]:
c_labels = cluster_labels(mfa.q_s, data_t)
print(pd.crosstab(c_labels, data_t))