Bayesian Probabilistic Principle Component Analysis

Comparison of Probabilistic PCA (PPCA) and Bayesian Probabilistic PCA (BPPCA) for dimensionality reduction.

References

  • Bishop, C. M. (1999). Variational principal components. 9th International Conference on Artificial Neural Networks: ICANN ’99, 1999, 509–514. doi:10.1049/cp:19991160
  • Tipping, M. E., & Bishop, C. M. (1999). Probabilistic Principal Component Analysis, 611–622.

Initialization


In [372]:
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [373]:
import numpy as np
import sklearn.datasets as ds
import os
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.cm as plt_cm
import matplotlib.colors as plt_col

In [374]:
os.sys.path.append('../src/')
import pca

Plotting functions


In [375]:
def plot_scatter(x, classes, ax=None):
    ax = plt.gca() if ax is None else ax
    cmap = plt_cm.jet
    norm = plt_col.Normalize(vmin=np.min(classes), vmax=np.max(classes))
    mapper = plt_cm.ScalarMappable(cmap=cmap, norm=norm)
    colors = mapper.to_rgba(classes)
    ax.scatter(x[0, :], x[1, :], color=colors, s=20)

def plot_digits(x, classes, ax=None):
    xx = x[0, :]
    yy = x[1, :]
    width = np.max(xx) - np.min(xx)
    height = np.max(yy) - np.min(yy)
    ax = plt.gca() if ax is None else ax
    ax.set_xlim([np.min(xx) - 0.1 * width, np.max(xx) + 0.1 * width])
    ax.set_ylim([np.min(yy) - 0.1 * height, np.max(yy) + 0.1 * height])
    cmap = plt_cm.jet
    norm = plt_col.Normalize(vmin=0, vmax=9)
    mapper = plt_cm.ScalarMappable(cmap=cmap, norm=norm)
    colors = mapper.to_rgba(range(10))
    for x1, x2, digit in zip(xx, yy, classes):
        ax.text(x1, x2, digit, color=colors[int(digit)])
         
def plot_mse(mse):
    fig, ax = plt.subplots(figsize=(10, 4))
    ax.plot(mse, linewidth=2, marker='s',markersize=5, markerfacecolor='red')
    ax.grid()
    ax.set_xlabel('Iteration')
    ax.set_ylabel('MSE')
    
def plot_grid(n, ncols=4, size=(5, 5)):
    nrows = int(np.ceil(n/float(ncols)))
    fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(size[0]*ncols, size[1]*nrows))
    ax = ax.ravel()
    return [fig, ax]

def plot_bppca(y, y_classes, maxit=7, *args, **kwargs):
    np.random.seed(0)
    bppca = pca.bppca.BPPCA(y, *args, **kwargs)
        
    fig, ax = plot_grid(maxit + 1)
    mse = [bppca.mse()]
    plot_scatter(bppca.transform(), y_classes, ax[0])
    for i in xrange(maxit):
        bppca.update()
        mse.append(bppca.mse())
        j = i + 1
        plot_scatter(bppca.transform(), y_classes, ax[j])
        ax[j].set_title('Iteration {}'.format(j))
    plot_mse(mse)
    return bppca

1 Iris dataset


In [376]:
iris = ds.load_iris()
iris_y = np.transpose(iris.data)
iris_classes = iris.target

1.1 PPCA


In [377]:
ppca = pca.ppca.PPCA()
ppca.fit(iris_y)

In [378]:
plot_scatter(ppca.transform(), iris_classes)


1.2 BPPCA


In [379]:
bppca = plot_bppca(iris_y, iris_classes)


2 Simulated data


In [380]:
def simulate(n=100, p=10, q=2, mu_fill=0.0, sigma=0.0, n_classes=2):
    classes = np.empty(n, np.int)
    samples_per_class = n / n_classes
    cur_class = 0
    for i in xrange(n):
        classes[i] = cur_class
        if (i + 1) % samples_per_class == 0:
            cur_class +=1
    # w        
    w = np.empty([p, q])
    weights = np.arange(p) + 2
    np.random.shuffle(weights)
    for i in xrange(p):
        w[i, :] = np.random.normal(weights[i], 0.1, q) 
    # x
    class_factors = np.random.randint(-50, +50, q * n_classes).reshape(q, n_classes)
    x = np.empty([q, n])
    for i in xrange(n):
        factors = class_factors[:, classes[i]]
        for j in xrange(q):
            x[j, i] = np.random.normal(factors[j], 5.0, 1)
    # mu
    mu = np.empty((p, 1))
    mu.fill(mu_fill)
    # sigma
    if sigma == 0.0:
        noise = np.zeros([p, n])
    else:
        noise = np.random.normal(0.0, sigma, p * n).reshape(p, n)

    y = w.dot(x) + mu + noise
    return [y, w, x, mu, noise, classes]

In [381]:
np.random.seed(0)
[sim_y, sim_w, sim_x, sim_mu, sim_noise, sim_classes] = simulate(n=100, p=10, q=2, mu_fill=20.0, sigma=1.0, n_classes=5)

In [382]:
plot_scatter(sim_x, sim_classes)


2.1 PPCA


In [383]:
ppca = pca.ppca.PPCA()
ppca.fit(sim_y)
plot_scatter(ppca.transform(), sim_classes)


2.2 BPPCA


In [384]:
bppca = plot_bppca(sim_y, sim_classes)


3 Digits


In [385]:
np.random.seed(0)
digits = ds.load_digits()
digits_i = np.random.choice(range(digits.data.shape[0]), 100)
digits_y = np.transpose(digits.data[digits_i, :])
digits_classes = digits.target[digits_i]

In [387]:
fig, ax = plt.subplots(nrows=2, ncols=5, figsize=[15, 10])
ax = ax.flatten()
for i in xrange(10):
    ax[i].matshow(digits.images[i], cmap=plt_cm.gray)


3.1 PPCA


In [388]:
ppca = pca.ppca.PPCA()
ppca.fit(digits_y)
plot_digits(ppca.transform(), digits_classes)


3.2 BPPCA


In [389]:
np.random.seed(0)
bppca = pca.bppca.BPPCA(digits_y)
bppca.fit(maxit=4)
plot_digits(bppca.transform(), digits_classes)


4 Automatic Relevant Determination


In [390]:
np.random.seed(0)
[ard_y, ard_w, ard_x, ard_mu, ard_noise, ard_classes] = simulate(n=100, p=10, q=4, mu_fill=0.0, sigma=0.0, n_classes=5)

In [391]:
def plot_alpha(alpha, ax=None):
    ax = plt.gca() if ax is None else ax
    ax.bar(np.arange(len(alpha)), alpha)
    
def plot_weights(weights, ax=None):
    ax = plt.gca() if ax is None else ax
    ax.boxplot(weights)

4.1 PPCA


In [392]:
ppca = pca.ppca.PPCA(q=10)
ppca.fit(ard_y)
plot_weights(ppca.w)


4.2 BPPCA


In [393]:
np.random.seed(0)
bppca = pca.bppca.BPPCA(ard_y, q=10)
bppca.hyper.alpha_a = 1
bppca.hyper.alpha_b = 1
q = bppca.q_dist
for i in xrange(10):
    bppca.update()

In [394]:
fig, ax = plt.subplots(nrows=2, ncols=1)
for a in ax:
    a.set_xlim([0, 10])
    a.set_xticks(range(10))
plot_alpha(bppca.q_dist.alpha_mean(), ax[0])
plot_weights(bppca.q_dist.w_mean, ax[1])



In [394]:


In [ ]: