Variational Bayesian Mixture of Factor Analysis

Usage examples of VbMfa for dimensionality reduction, including image compression and clustering.

Import modules



In [17]:

    
import numpy as np
import sys

import sklearn.datasets as ds

import matplotlib.pyplot as plt
import matplotlib.cm as plt_cm
import matplotlib.colors as plt_colt
import matplotlib.patches as plt_patches
import seaborn as sns

import pandas as pd

%matplotlib inline
%load_ext autoreload
%autoreload 2

import vbmfa.fa
import vbmfa.mfa









    



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload



In [18]:

    
sns.set_style('darkgrid')

Helper functions



In [19]:

    
def plot_scatter(x, classes, ax=None):
    ax = plt.gca() if ax is None else ax
    cmap = plt_cm.jet
    norm = plt_col.Normalize(vmin=np.min(classes), vmax=np.max(classes))
    mapper = plt_cm.ScalarMappable(cmap=cmap, norm=norm)
    colors = mapper.to_rgba(classes)
    ax.scatter(x[0, :], x[1, :], color=colors, s=20)
         
def plot_mse(mse):
    fig, ax = plt.subplots(figsize=(10, 4))
    ax.plot(mse, linewidth=2, marker='s',markersize=5, markerfacecolor='red')
    ax.set_xlabel('Iteration')
    ax.set_ylabel('MSE')
    
def plot_grid(n, ncols=4, size=(5, 5)):
    nrows = int(np.ceil(n/float(ncols)))
    fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(size[0]*ncols, size[1]*nrows))
    ax = ax.ravel()
    return [fig, ax]

def plot_grid(n, ncols=4, size=(5, 5)):
    nrows = int(np.ceil(n/float(ncols)))
    fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(size[0]*ncols, size[1]*nrows))
    ax = ax.ravel()
    return [fig, ax]
    
def plot_compress(q, n=30):
    np.random.seed(0)
    fa = vbfa.VbFa(data_y, q)
    fa.fit()
    y = fa.x_to_y()
    fig, ax = plot_grid(n, ncols=10)
    dim = int(np.sqrt(fa.P))
    for i in range(n):
        ax[i].matshow(y[:, i].reshape(dim, dim), cmap='binary')
        
def plot_images(images, n=30, size=2):
    fig, ax = plot_grid(n, ncols=10, size=(size, size))
    dim = int(np.sqrt(images.shape[0]))
    with sns.axes_style('white'):
        for i in range(n):
            ax[i].grid()
            ax[i].set_axis_off()
            ax[i].matshow(images[:, i].reshape(dim, dim), cmap='binary')



In [20]:

    
def cluster_labels(q_s, labels):
    clust = q_s.argmax(0)
    clust_t = pd.crosstab(clust, labels)
    clust_label = np.array(clust_t).argmax(1)
    return np.array([clust_label[x] for x in clust])

Digits



In [21]:

    
digits = ds.load_digits()
data_y = digits.data.transpose()
data_t = digits.target

Image compression

Original images



In [22]:

    
plot_images(data_y)

1 factor analyser with 16 factors



In [23]:

    
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 16, 1)
mfa.fit()
print('MSE: {:.3f}'.format(mfa.mse()))
plot_images(mfa.x_to_y())









    



MSE: 573.435

2 factor analysers with 16 factors



In [24]:

    
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 16, 2)
mfa.fit()
print('MSE: {:.3f}'.format(mfa.mse()))
plot_images(mfa.x_to_y())









    



MSE: 519.046

10 factor analyses with 16 factors



In [25]:

    
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 16, 10)
mfa.fit()
print('MSE: {:.3f}'.format(mfa.mse()))
plot_images(mfa.x_to_y())









    



MSE: 470.377

Clustering

Contingency table



In [26]:

    
c_labels = cluster_labels(mfa.q_s, data_t)
print(pd.crosstab(c_labels, data_t))









    



col_0    0    1    2   3    4    5    6    7   8   9
row_0                                               
0      122    2    9   4    1    9    6    0  23  33
1       13  129   10   9    9    3    4   33  83  19
2        4   28  143   3    0    2    1    1   9   0
3       16   11    4  83    3   10    0    2   5  44
4        3    0    0  18  122   11    5    5   1   1
5        4    4    1  16    9  113    7    0   9  17
6        4    5    9  14   19    3  152    1  14   3
7        2    3    0   6   17    9    2  137   5   4
9       10    0    1  30    1   22    4    0  25  59

Not too bad: most elements lie on the diagonal.

Cluster assignments



In [27]:

    
show = [0, 1, 2, 3, 4, 5]
num = 10
fig, ax = plot_grid(len(show) * num, ncols=num, size=(2, 2))
y = mfa.x_to_y()
with sns.axes_style('whitegrid'):
    for i in range(len(show)):
        y_i = y[:, c_labels == show[i]]
        for j in range(num):
            k = int(i * 10 + j)
            ax[k].matshow(y_i[:, j].reshape(8, 8), cmap='binary')
            ax[k].set_axis_off()
            ax[k].grid()

Olvetti faces

High-dimensional (64x64) images of faces from different perspectives. Contains in total 10 images for 40 different persons.



In [28]:

    
faces = ds.fetch_olivetti_faces()
num_samples = 5 # Only use the first n persons
images_per_sample = 10
n = num_samples * images_per_sample
data_y = faces.data.transpose()[:, :n]
data_t = faces.target[:n]

Image compression

Original images



In [29]:

    
plot_images(data_y, 50)

1 factor analyser with 64 factors



In [30]:

    
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 64, 1)
mfa.fit(verbose=True)
print('MSE: {:.3f}'.format(mfa.mse()))
plot_images(mfa.x_to_y(), 50)









    



1: 43.587
2: 45.900
MSE: 45.900

10 factor analysers with 64 factors



In [31]:

    
np.random.seed(0)
mfa = vbmfa.mfa.VbMfa(data_y, 64, 5)
mfa.fit_highdim()
plot_images(mfa.x_to_y(), 50)

Clustering



In [32]:

    
c_labels = cluster_labels(mfa.q_s, data_t)
print(pd.crosstab(c_labels, data_t))









    



col_0  0   1  2  3  4
row_0                
0      4   0  0  0  0
1      0  10  0  1  0
2      0   0  2  1  0
3      3   0  6  7  5
4      3   0  2  1  5

Person 0 is classified 4 times correctly
Person 1 is alway classified correctly
Person 3 is often classified as person 2
TODO: why is fitting error worse than using only one factor analyser?