Assignment 5: Gaussian mixture models

In this assignment, you will learn to perform image segmentation through implementing Gaussian mixture models and iteratively improving their performance. You will be performing this segmentation on the "Doge" and the "Party Spock" images included with the assignment.

Background

A Gaussian mixture model is a generative model for representing the underlying probability distribution of a complex collection of data, such as the collection of pixels in a grayscale photograph.

In the context of this problem, a Gaussian mixture model defines the joint probability $f(x)$ as

$$f(x) = \sum_{i=1}^{k}m_{i}N_i(x | \mu_{i}, \sigma_{i}^{2})$$

where $x$ is a grayscale value [0,1], $f(x)$ is the joint probability of that gray scale value, $m_{i}$ is the mixing coefficient on component $i$, $N_i$ is the $i^{th}$ Gaussian distribution underlying the value $x$ with mean $\mu_{i}$ and variance $\sigma_{i}^{2}$.

We will be using this model to segment photographs into different grayscale regions. The idea of segmentation is to assign a component $i$ to each pixel $x$ using the maximum posterior probability

$$component_{x} = argmax_{i}(m_{i}N_i(x|\mu_{i},\sigma_{i}^{2})$$

Then we will replace each pixel in the image with its corresponding $\mu_{i}$ to produce a result as below (original above, segmented with three components below).


In [189]:
from __future__ import division
import warnings
warnings.simplefilter(action = "ignore", category = FutureWarning)
import numpy as np
import scipy as sp
from matplotlib import image
import matplotlib.pyplot as plt
import matplotlib.cm as cm

In [190]:
"""Helper image-processing code."""
def image_to_matrix(image_file, grays=False):
    """
    Convert .png image to matrix
    of values.
    
    params:
    image_file = str
    grays = Boolean
    
    returns:
    img = (color) np.ndarray[np.ndarray[np.ndarray[float]]] 
    or (grayscale) np.ndarray[np.ndarray[float]]
    """
    img = image.imread(image_file)
    # in case of transparency values
    if(len(img.shape) == 3 and img.shape[2] > 3):
        height, width, depth = img.shape
        new_img = np.zeros([height, width, 3])
        for r in range(height):
            for c in range(width):
                new_img[r,c,:] = img[r,c,0:3]
        img = np.copy(new_img)
    if(grays and len(img.shape) == 3):
        height, width = img.shape[0:2]
        new_img = np.zeros([height, width])
        for r in range(height):
            for c in range(width):
                new_img[r,c] = img[r,c,0]
        img = new_img
    # clean up zeros
    if(len(img.shape) == 2):
        zeros = np.where(img == 0)[0]
        img[zeros] += 1e-7
    return img

def matrix_to_image(image_matrix, image_file):
    """
    Convert matrix of color/grayscale 
    values  to .png image
    and save to file.
    
    params:
    image_matrix = (color) numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]] or (grayscale) numpy.ndarray[numpy.ndarray[float]]
    image_file = str
    """
    # provide cmap to grayscale images
    cMap = None
    #print image_matrix.shape
    len(image_matrix.shape)
    if(len(image_matrix.shape) < 3):
        cMap = cm.Greys_r
    image.imsave(image_file, image_matrix, cmap=cMap)
    
def flatten_image_matrix(image_matrix):
    """
    Flatten image matrix from 
    Height by Width by Depth
    to (Height*Width) by Depth
    matrix.
    
    params:
    image_matrix = (color) numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]] or (grayscale) numpy.ndarray[numpy.ndarray[float]]
    
    returns:
    flattened_values = (color) numpy.ndarray[numpy.ndarray[float]] or (grayscale) numpy.ndarray[float]    
    """
    if(len(image_matrix.shape) == 3):
        height, width, depth = image_matrix.shape
    else:
        height, width = image_matrix.shape
        depth = 1
    flattened_values = np.zeros([height*width,depth])
    for i, r in enumerate(image_matrix):
        for j, c in enumerate(r):
            flattened_values[i*width+j,:] = c
    return flattened_values

def unflatten_image_matrix(image_matrix, width):
    """
    Unflatten image matrix from
    (Height*Width) by Depth to
    Height by Width by Depth matrix.
    
    params:
    image_matrix = (color) numpy.ndarray[numpy.ndarray[float]] or (grayscale) numpy.ndarray[float]
    width = int
    
    returns:
    unflattened_values = (color) numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]] or (grayscale) numpy.ndarray[numpy.ndarray[float]]
    """
    heightWidth = image_matrix.shape[0]
    height = int(heightWidth / width)
    if(len(image_matrix.shape) > 1):
        depth = image_matrix.shape[-1]
        unflattened_values = np.zeros([height, width, depth])
        for i in range(height):
            for j in range(width):
                unflattened_values[i,j,:] = image_matrix[i*width+j,:]
    else:
        depth = 1
        unflattened_values = np.zeros([height, width])
        for i in range(height):
            for j in range(width):
                unflattened_values[i,j] = image_matrix[i*width+j]
    return unflattened_values

def image_difference(image_values_1, image_values_2):
    """
    Calculate the total difference 
    in values between two images.
    Assumes that both images have same
    shape.
    
    params:
    image_values_1 = (color) numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]] or (grayscale) numpy.ndarray[numpy.ndarray[float]]
    image_values_2 = (color) numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]] or (grayscale) numpy.ndarray[numpy.ndarray[float]]
    
    returns:
    dist = int
    """
    flat_vals_1 = flatten_image_matrix(image_values_1)
    flat_vals_2 = flatten_image_matrix(image_values_2)
    N, depth = flat_vals_1.shape
    dist = 0.
    point_thresh = 0.005
    for i in range(N):
        if(depth > 1):
            new_dist = sum(abs(flat_vals_1[i] - flat_vals_2[i]))
            if(new_dist > depth * point_thresh):
                dist += new_dist
        else:
            new_dist = abs(flat_vals_1[i] - flat_vals_2[i])
            if(new_dist > point_thresh):
                dist += new_dist
    return dist

In [191]:
# image_dir = 'images/'
# image_file = 'doge_color.png'
# values = image_to_matrix(image_dir + image_file)
# value_vector = flatten_image_matrix(values)
# i=0
# for p in value_vector:
#     print p
#     i+=1
#     if i==10:
#         break
# matrix_to_image(values,'testfile.png')

Part 1: K-means clustering

20 pts

One easy method for image segmentation is to simply cluster all similar data points and then replace their values with the mean value. Thus, we'll warm up by using k-means clustering to provide a baseline to compare with your segmentation (clustering will come in handy later). Fill out k_means_cluster() to convert the original image values matrix to its clustered counterpart. Your convergence test should be whether the assigned clusters stop changing. Note that this convergence test is rather slow. When no initial cluster means are provided, k_means_cluster() should choose $k$ random points from the data (without replacement) to use as cluster means.

For this part of the assignment, since clustering is best used on multidimensional data we will be using the color image doge_color.png.

You can test your implementation of k-means using our reference images in k_means_test().


In [192]:
from random import randint
from functools import reduce
from numpy import random
def k_means_cluster(image_values, k=3, initial_means=None):
    """
    Separate the provided RGB values into
    k separate clusters using the k-means algorithm,
    then return an updated version of the image
    with the original values replaced with
    the corresponding cluster values.
    
    params:
    image_values = numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]]
    k = int
    initial_means = numpy.ndarray[numpy.ndarray[float]] or None
    
    returns:
    updated_image_values = numpy.ndarray[numpy.ndarray[numpy.ndarray[float]]]
    """
    #get flattened image_vector
    image_vector = flatten_image_matrix(image_values)
    num_points = len(image_vector)
    
    #get random initial_means
    if initial_means == None:
        initial_means_idx = np.random.choice(num_points,k,replace=False)
        initial_means = np.array([image_vector[i] for i in initial_means_idx])
        
    #setup variables
    current_point_index = 0
    cluster_label = [-1 for i in range(num_points)]
    threshold = 1e-05
    avg_movement = float("inf")
    
    #repeat following until centroids of clusters stop moving too much
    while(avg_movement>threshold):
        
        #for each point, find it's closest mean and assign that index to it
        for current_point in image_vector:
            #find distance to each cluster center, assign index of whichever is closest
            min_dist = float("inf")
            for i in range(k):
                current_center = initial_means[i]
                distance = np.linalg.norm(current_center-current_point)
                if distance < min_dist:
                    min_dist = distance
                    cluster_label[current_point_index] = i
                    
        current_point_index += 1
        
        #update the centroids for each cluster
        avg_movement = 0
        for centroid_index in range(k):
            idx = [i for i,x in enumerate(cluster_label) if x==centroid_index]
            new_centroid = np.mean([image_vector[i] for i in idx])
            avg_movement += np.linalg.norm(new_centroid-initial_means[centroid_index]) 
            initial_means[centroid_index] = new_centroid
        avg_movement /= k
    
    #once convergence has been achieved, replace all points belonging to cluster #i with centroid[i]
    for centroid_index in range(k):
        points_in_cluster_idx = [i for i,x in enumerate(cluster_label) if x==centroid_index]
        for idx in points_in_cluster_idx:
            image_vector[idx] = initial_means[centroid_index]
            
    updated_image_values = unflatten_image_matrix(image_vector, len(image_values[0]))
            
    return updated_image_values, initial_means

In [193]:
def k_means_test():
    """
    Testing your implementation
    of k-means on the segmented
    doge reference images.
    """
    k_min = 2
    k_max = 6
    image_dir = 'images/'
    image_name = 'doge_color.png'
    image_values = image_to_matrix(image_dir + image_name)
    # initial mean for each k value
    initial_means = [
        np.array([[0.90980393,0.8392157,0.65098041],[0.83137256,0.80784315,0.69411767]]),
        np.array([[0.90980393,0.8392157,0.65098041],[0.83137256,0.80784315,0.69411767],[0.67450982,0.52941179,0.25490198]]),
        np.array([[0.90980393,0.8392157,0.65098041],[0.83137256,0.80784315,0.69411767],[0.67450982,0.52941179,0.25490198],[0.86666667,0.8392157,0.70588237]]),
        np.array([[0.90980393,0.8392157,0.65098041],[0.83137256,0.80784315,0.69411767],[0.67450982,0.52941179,0.25490198],[0.86666667,0.8392157,0.70588237],[0,0,0]]),
        np.array([[0.90980393,0.8392157,0.65098041],[0.83137256,0.80784315,0.69411767],[0.67450982,0.52941179,0.25490198],[0.86666667,0.8392157,0.70588237],[0,0,0],[0.8392157,0.80392158,0.63921571]]),
    ]
    # test different k values to find best
    for k in range(k_min, k_max+1):
        updated_values, centroids = k_means_cluster(image_values, k, initial_means[k-k_min])
        ref_image = image_dir + 'k%d_%s'%(k, image_name)
        matrix_to_image(updated_values, ref_image)
        ref_values = image_to_matrix(ref_image)
        dist = image_difference(updated_values, ref_values)
        print('Image distance = %.2f'%(dist))
        if(int(dist) == 0):
            print('Clustering for %d clusters produced a realistic image segmentation.'%(k))
        else:
            print('Clustering for %d clusters didn\'t produce a realistic image segmentation.'%(k))

In [194]:
k_means_test()


Image distance = 0.00
Clustering for 2 clusters produced a realistic image segmentation.
Image distance = 0.00
Clustering for 3 clusters produced a realistic image segmentation.
Image distance = 0.00
Clustering for 4 clusters produced a realistic image segmentation.
Image distance = 0.00
Clustering for 5 clusters produced a realistic image segmentation.
Image distance = 0.00
Clustering for 6 clusters produced a realistic image segmentation.
C:\Users\Priyam\Anaconda3\envs\SonGoku\lib\site-packages\numpy\core\_methods.py:59: RuntimeWarning: Mean of empty slice.
  warnings.warn("Mean of empty slice.", RuntimeWarning)
C:\Users\Priyam\Anaconda3\envs\SonGoku\lib\site-packages\numpy\core\_methods.py:71: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)

Part 2: Implementing a Gaussian mixture model

40 pts

Next, we will step beyond clustering and implement a complete Gaussian mixture model.

Complete the below implementation of GaussianMixtureModel so that it can perform the following:

  1. Calculate the joint log probability of a given greyscale value. (5 points)
  2. Use expectation-maximization (EM) to train the model to represent the image as a mixture of Gaussians. (20 points) To initialize EM, set each component's mean to the grayscale value of randomly chosen pixel and variance to 1, and the mixing coefficients to a uniform distribution. Note: there are packages that can run EM automagically, but please implement your own version of EM without using these extra packages. We've set the convergence condition for you in GaussianMixtureModel.default_convergence(): if the new likelihood is within 10% of the previous likelihood for 10 consecutive iterations, the model has converged.
  3. Calculate the log likelihood of the trained model. (5 points)
  4. Segment the image according to the trained model. (5 points)
  5. Determine the best segmentation by iterating over model training and scoring, since EM isn't guaranteed to converge to the global maximum. (5 points)

We have provided a subset of the necessary tests for this part and will provide the rest soon.

Note that training the model and calculating the log likelihood of the model are both time-intensive operations.

Hint 1: for the entire assignment you should log probabilities to avoid underflow (when multiplying lots of probabilities in sequence you can end up at 0). The log form of the Gaussian probability of scalar value $x$ is:

$$ln (N(x | \mu, \sigma)) = -0.5ln(2\pi \sigma^2) - \frac{(x-\mu)^{2}}{2\sigma^2} $$

where $\mu$ is the mean and $\sigma$ is standard deviation.

You can calculate the sum of log probabilities by using scipy.misc.logsumexp(). For example, logsumexp([-2,-3]) will return the same result as numpy.log(numpy.exp(-2)+numpy.exp(-3)).

Hint 2: Rather than using lists of lists, you will find it much easier to store your data in numpy.array arrays, which you can instantiate using the command

matrix = numpy.zeros([rows, columns])

where rows is the number of rows and columns the number of columns in your matrix. numpy.zeros() generates a matrix of the specified size containing 0s at each row/column cell. You can access cells with the syntax matrix[2,3] which will return the value in row 2 and column 3.


In [195]:
def default_convergence(prev_likelihood, new_likelihood, conv_ctr, conv_ctr_cap=10):
    """
    Default condition for increasing
    convergence counter: 
    new likelihood deviates less than 10%
    from previous likelihood.

    params:
    prev_likelihood = float
    new_likelihood = float
    conv_ctr = int
    conv_ctr_cap = int

    returns:
    conv_ctr = int
    converged = boolean
    """
    
    increase_convergence_ctr = (abs(prev_likelihood) * 0.9 < 
                                abs(new_likelihood) < 
                                abs(prev_likelihood) * 1.1)
    
    if increase_convergence_ctr:
        conv_ctr+=1
    else:
        conv_ctr =0
        
    return conv_ctr, conv_ctr > conv_ctr_cap

In [196]:
from random import randint, sample
import math
from scipy.misc import logsumexp

class GaussianMixtureModel:
    """
    A Gaussian mixture model
    to represent a provided 
    grayscale image.
    """
    
    def __init__(self, image_matrix, num_components, means=None):
        """
        Initialize a Gaussian mixture model.
        
        params:
        image_matrix = (grayscale) numpy.nparray[numpy.nparray[float]]
        num_components = int
        """
        self.image_matrix = image_matrix
        self.image_vector = flatten_image_matrix(image_matrix)
        self.num_components = num_components
        if(means is None):
            self.means = np.array([0]*num_components)
        else:
            self.means = np.array(means)
        self.variances = np.array([0]*num_components)
        self.mixing_coefficients = np.array([0]*num_components)
    
    def joint_prob(self, val):
        """Calculate the joint 
        log probability of a greyscale
        value within the image.
        
        params:
        val = float
        
        returns:
        joint_prob = float
        """
        
        joint_prob = 0.0
        value = np.array(val)
        exp_value = np.zeros(self.num_components)
        exp_coeff = np.zeros(self.num_components)
        
        exp_coeff = self.mixing_coefficients/np.sqrt(2*np.pi*self.variances)
        dist = (self.means-value)**2
        exp_value = -1.0*(dist)/(2*self.variances)
        
        joint_prob = logsumexp(exp_value, b=exp_coeff)
        
        return joint_prob
    
    def initialize_training(self):
        """
        Initialize the training
        process by setting each
        component mean to a random
        pixel's value (without replacement),
        each component variance to 1, and
        each component mixing coefficient
        to a uniform value 
        (e.g. 4 components -> [0.25,0.25,0.25,0.25]).
        
        NOTE: this should be called before 
        train_model() in order for tests
        to execute correctly.
        """
        #initialize means to random k points from image_vector
        num_pixels = len(self.image_vector)
        random_idx = sample(range(num_pixels), self.num_components)
        self.means = np.array([self.image_vector[i] for i in random_idx])
        self.means = np.ravel(self.means)
        
        #initialize variance to 1
        self.variances = np.array([1]*self.num_components)
        
        #initialize mixing coeff (weights)
        self.mixing_coefficients = np.array([float(1.0/self.num_components)]*self.num_components)
        
    
    def train_model(self, convergence_function=default_convergence):
        """
        Train the mixture model 
        using the expectation-maximization
        algorithm. Since each Gaussian is
        a combination of mean and variance,
        this will fill self.means and 
        self.variances, plus 
        self.mixing_coefficients, with
        the values that maximize
        the overall model likelihood.
        
        params:
        convergence_function = function that returns True if convergence is reached
        """
        old_likelihood = float("inf")
        conv_status = False
        conv_ctr = 0
        
        while(1):
            z_n_k = (self.mixing_coefficients*np.exp(-1.0*((self.means-self.image_vector)**2)/(2*self.variances)))/np.sqrt(2*np.pi*self.variances)
            z_n_k = z_n_k/np.array([np.sum(z_n_k,1)]).T
            
            n_k = np.sum(z_n_k,0)
            self.mixing_coefficients = n_k/len(self.image_vector)
            self.means = np.sum(z_n_k*self.image_vector,0)/n_k
            self.variances = np.sum(z_n_k*((self.means-self.image_vector)**2),0)/n_k
            
            new_likelihood = self.likelihood()
            conv_ctr, conv_status = convergence_function(old_likelihood, new_likelihood, conv_ctr)
#             print conv_ctr
            if(conv_status):
                break
            old_likelihood = new_likelihood
            
        return
            
    
    def segment(self):
        """
        Using the trained model, 
        segment the image matrix into
        the pre-specified number of 
        components. Returns the original 
        image matrix with the each 
        pixel's intensity replaced 
        with its max-likelihood 
        component mean.
        
        returns:
        segment = numpy.ndarray[numpy.ndarray[float]]
        """
        
        z_n_k = (self.mixing_coefficients*np.exp(-1.0*((self.means-self.image_vector)**2)/(2*self.variances)))/np.sqrt(2*np.pi*self.variances)
        z_n_k = z_n_k/np.array([np.sum(z_n_k,1)]).T 
        
        z_n_k = z_n_k.argmax(axis=1)
        segment = np.zeros([len(self.image_vector),1])
        for centroid_index in range(self.num_components):
            points_in_cluster_idx = [i for i,x in enumerate(z_n_k) if x==centroid_index]
            for idx in points_in_cluster_idx:
                segment[idx] = self.means[centroid_index]
        segment = unflatten_image_matrix(np.ravel(segment), len(self.image_matrix[0]))
                                         
        return segment
    
    def likelihood(self):
        """Assign a log 
        likelihood to the trained
        model based on the following 
        formula for posterior probability:
        ln(Pr(X | mixing, mean, stdev)) = sum((n=1 to N),ln(sum((k=1 to K), mixing_k * N(x_n | mean_k, stdev_k) )))
        
        returns:
        log_likelihood = float [0,1]
        """
        
        dist = (self.means-self.image_vector)**2
        exp_value = -1.0*dist/(2*self.variances)
        exp_coeff = self.mixing_coefficients/np.sqrt(2*np.pi*self.variances)
        exp_coeff = np.tile(exp_coeff,(len(self.image_vector),1))
        log_likelihood = np.sum(logsumexp(exp_value, b=exp_coeff, axis=1))
        
        return log_likelihood
        
        
    def best_segment(self, iters):
        """Determine the best segmentation
        of the image by repeatedly 
        training the model and 
        calculating its likelihood. 
        Return the segment with the
        highest likelihood.
        
        params:
        iters = int
        
        returns:
        segment = numpy.ndarray[numpy.ndarray[float]]
        """
        best_seg = []
        best_likelihood = float("-inf")
        for i in range(iters):
            print i
            self.initialize_training()
            self.train_model()
            current_likelihood = self.likelihood()
            if current_likelihood > best_likelihood:
                best_likelihood = current_likelihood
                best_seg = self.segment()
        segment = best_seg
        return segment

In [197]:
def gmm_likelihood_test():
    """Testing the GMM method
    for calculating the overall
    model probability.
    Should return -364370.
    
    returns:
    likelihood = float
    """
    image_file = 'images/party_spock.png'
    image_matrix = image_to_matrix(image_file)
    num_components = 5
    gmm = GaussianMixtureModel(image_matrix, num_components)
    gmm.initialize_training()
    gmm.means = np.array([0.4627451, 0.10196079, 0.027450981, 0.011764706, 0.1254902])
    likelihood = gmm.likelihood()
    return likelihood

In [198]:
gmm_likelihood_test()


Out[198]:
-364370.25319318147

In [199]:
def gmm_joint_prob_test():
    """Testing the GMM method
    for calculating the joint 
    log probability of a given point.
    Should return -0.98196.
    
    returns:
    joint_prob = float
    """
    image_file = 'images/party_spock.png'
    image_matrix = image_to_matrix(image_file)
    num_components = 5
    gmm = GaussianMixtureModel(image_matrix, num_components)
    gmm.initialize_training()
    gmm.means = np.array([0.4627451, 0.10196079, 0.027450981, 0.011764706, 0.1254902])
    test_val = 0.4627451
    joint_prob = gmm.joint_prob(0.4627451)
    return joint_prob

In [200]:
gmm_joint_prob_test()


Out[200]:
-0.98195853390140697

In [201]:
def generate_test_mixture(data_size, means, variances, mixing_coefficients):
    """
    Generate synthetic test
    data for a GMM based on
    fixed means, variances and
    mixing coefficients.
    
    params:
    data_size = (int)
    means = [float]
    variances = [float]
    mixing_coefficients = [float]
    
    returns:
    data = np.array[float]
    """

    data = np.zeros(data_size).flatten()

    indices = np.random.choice( len(means), len(data), p=mixing_coefficients)

    for i in range(len(indices)):
        data[i] = np.random.normal(means[indices[i]], variances[indices[i]])

    return np.array([data])

In [202]:
def gmm_train_test():
    """Test the training 
    procedure for GMM using
    synthetic data.
    
    returns:
    gmm = GaussianMixtureModel
    """

    print( 'Synthetic example with 2 means')

    num_components = 2
    data_range = (1,1000)
    actual_means = [2, 4]
    actual_variances = [1]*num_components
    actual_mixing = [.5]*num_components
    dataset_1 = generate_test_mixture(data_range, actual_means, actual_variances, actual_mixing)
    gmm = GaussianMixtureModel(dataset_1, num_components)
    gmm.initialize_training()
    # start off with faulty means
    gmm.means = np.array([1,3])
    initial_likelihood = gmm.likelihood()

    gmm.train_model()
    final_likelihood = gmm.likelihood()
    likelihood_difference = final_likelihood - initial_likelihood
    likelihood_thresh = 250
    print likelihood_difference
    if(likelihood_difference >= likelihood_thresh):
        print('Congrats! Your model\'s log likelihood improved by at least %d.'%(likelihood_thresh))

    print( 'Synthetic example with 4 means:')

    num_components = 4
    actual_means = [2,4,6,8]
    actual_variances = [1]*num_components
    actual_mixing = [.25]*num_components
    dataset_1 = generate_test_mixture(data_range, 
                actual_means, actual_variances, actual_mixing)
    gmm = GaussianMixtureModel(dataset_1, num_components)
    gmm.initialize_training()
    # start off with faulty means
    gmm.means = np.array([1,3,5,9])
    initial_likelihood = gmm.likelihood()
    gmm.train_model()
    final_likelihood = gmm.likelihood()
    
    # compare likelihoods
    likelihood_difference = final_likelihood - initial_likelihood
    likelihood_thresh = 200
    print likelihood_difference
    if(likelihood_difference >= likelihood_thresh):
        print('Congrats! Your model\'s log likelihood improved by at least %d.'%(likelihood_thresh))
    return gmm

In [203]:
gmm_train_test()


Synthetic example with 2 means
294.454166124
Congrats! Your model's log likelihood improved by at least 250.
Synthetic example with 4 means:
252.70740628
Congrats! Your model's log likelihood improved by at least 200.
Out[203]:
<__main__.GaussianMixtureModel instance at 0x0000000013E9A7C8>

In [204]:
def gmm_segment_test():
    """
    Apply the trained GMM 
    to unsegmented image and
    generate a segmented image.
    
    returns:
    segmented_matrix = numpy.ndarray[numpy.ndarray[float]]
    """
    image_file = 'images/party_spock.png'
    image_matrix = image_to_matrix(image_file)
    num_components = 3
    gmm = GaussianMixtureModel(image_matrix, num_components)
    gmm.initialize_training()
    gmm.train_model()
    segment = gmm.segment()
    segment_num_components = len(np.unique(segment))
    if(segment_num_components == num_components):
        print('Congrats! Your segmentation produced an image '+
              'with the correct number of components.')
    return segment

In [205]:
gmm_segment_test()


Congrats! Your segmentation produced an image with the correct number of components.
Out[205]:
array([[ 0.1450919,  0.1450919,  0.1450919, ...,  0.1450919,  0.1450919,
         0.1450919],
       [ 0.1450919,  0.1450919,  0.1450919, ...,  0.1450919,  0.1450919,
         0.1450919],
       [ 0.1450919,  0.1450919,  0.1450919, ...,  0.1450919,  0.1450919,
         0.1450919],
       ..., 
       [ 0.5989131,  0.5989131,  0.5989131, ...,  0.5989131,  0.5989131,
         0.5989131],
       [ 0.5989131,  0.5989131,  0.5989131, ...,  0.5989131,  0.5989131,
         0.5989131],
       [ 0.5989131,  0.5989131,  0.5989131, ...,  0.5989131,  0.5989131,
         0.5989131]])

In [206]:
def gmm_best_segment_test():
    """
    Calculate the best segment
    generated by the GMM and
    compare the subsequent likelihood
    of a reference segmentation. 
    Note: this test will take a while 
    to run.
    
    returns:
    best_seg = np.ndarray[np.ndarray[float]]
    """
    image_file = 'images/party_spock.png'
    image_matrix = image_to_matrix(image_file)
    image_matrix_flat = flatten_image_matrix(image_matrix)
    num_components = 3
    gmm = GaussianMixtureModel(image_matrix, num_components)
    gmm.initialize_training()
    iters = 10
    # generate best segment from 10 iterations
    # and extract its likelihood
    best_seg = gmm.best_segment(iters)
    matrix_to_image(best_seg, 'images/best_segment_spock.png')
    best_likelihood = gmm.likelihood()
    
    # extract likelihood from reference image
    ref_image_file = 'images/party_spock%d_baseline.png'%(num_components)
    ref_image = image_to_matrix(ref_image_file, grays=True)
    gmm_ref = GaussianMixtureModel(image_matrix, num_components)
    ref_vals = ref_image.flatten()
    ref_means = list(set(ref_vals))
    ref_variances = [0]*num_components
    ref_mixing = [0]*num_components
    for i in range(num_components):
        relevant_vals = ref_vals[ref_vals==ref_means[i]]
        ref_mixing[i] = len(relevant_vals) / len(ref_vals)
        ref_variances[i] = np.mean((image_matrix_flat[ref_vals==ref_means[i]] - ref_means[i])**2)
    gmm_ref.means = np.array(ref_means)
    gmm_ref.variances = np.array(ref_variances)
    gmm_ref.mixing_coefficients = np.array(ref_mixing)
    ref_likelihood = gmm_ref.likelihood()
    
    # compare best likelihood and reference likelihood
    likelihood_diff = best_likelihood - ref_likelihood
    likelihood_thresh = 1e4
    if(likelihood_diff >= likelihood_thresh):
        print('Congrats! Your image segmentation is an improvement over ' +
              'the baseline by at least %.2f.'%(likelihood_thresh))
    return best_seg

In [207]:
best_segment = gmm_best_segment_test()
matrix_to_image(best_segment, 'best_segment.png')


0
1
2
3
4
5
6
7
8
9
Congrats! Your image segmentation is an improvement over the baseline by at least 10000.00.

Part 3: Model experimentation

20 points

We'll now experiment with a few methods for improving GMM performance.

3a: Improved initialization

12.5 points

To run EM in our baseline Gaussian mixture model, we use random initialization to determine the initial values for our component means. We can do better than this!

Fill in the below GaussianMixtureModelImproved.initialize_training() with an improvement in component initialization. Please don't use any external packages for anything other than basic calculations (e.g. scipy.misc.logsumexp). Note that your improvement might significantly slow down runtime, although we don't expect you to spend more than 10 minutes on initialization.

Hint: you'll probably want an unsupervised learning method to initialize your component means. Clustering is one useful example of unsupervised learning, and you may want to look at 1-dimensional methods such as Jenks natural breaks optimization.


In [208]:
class GaussianMixtureModelImproved(GaussianMixtureModel):
    """A Gaussian mixture model
    for a provided grayscale image, 
    with improved training 
    performance."""
    
    def initialize_training(self):
        """
        Initialize the training
        process by setting each
        component mean to a random
        pixel's value (without replacement),
        each component variance to 1, and
        each component mixing coefficient
        to a uniform value 
        (e.g. 4 components -> [0.25,0.25,0.25,0.25]).
        """
        self.variances = np.array([1]*self.num_components)
        self.mixing_coefficients = np.zeros(self.num_components)
        
        num_pixels = len(self.image_vector)
        k = self.num_components
        
        #implement k-means for this greyscale image
        initial_means_idx = np.random.choice(num_pixels,k,replace=False)
        initial_means = np.array([self.image_vector[i] for i in initial_means_idx])
#         print initial_means
        
        #setup variables
        current_point_index = 0
        cluster_label = [-1 for i in range(num_pixels)]
        threshold = 100
        avg_movement = float("inf")

        #repeat following until centroids of clusters stop moving too much
        while(avg_movement>threshold):

            #for each point, find it's closest mean and assign that index to it
            for current_point in self.image_vector:
                #find distance to each cluster center, assign index of whichever is closest
                min_dist = float("inf")
                for i in range(k):
                    current_center = initial_means[i]
                    distance = (current_center-current_point)**2
#                     print [i,distance]
                    if distance < min_dist:
                        min_dist = distance
                        cluster_label[current_point_index] = i

                current_point_index += 1

            #update the centroids for each cluster
            avg_movement = 0
            for centroid_index in range(k):
                idx = [i for i,x in enumerate(cluster_label) if x==centroid_index]
#                 print len(idx)
                new_centroid = np.mean([self.image_vector[i] for i in idx])
                avg_movement += (new_centroid - initial_means[centroid_index])**2
                initial_means[centroid_index] = new_centroid
            
            avg_movement /= k

        #once convergence has been achieved, replace all points belonging to cluster #i with centroid[i]
        for centroid_index in range(k):
            points_in_cluster_idx = [i for i,x in enumerate(cluster_label) if x==centroid_index]
            self.mixing_coefficients[centroid_index] = len(points_in_cluster_idx)/num_pixels
        
        self.means = np.ravel(initial_means)
        
#         print self.means
#         print self.variances
#         print self.mixing_coefficients

In [209]:
def gmm_improvement_test():
    """
    Tests whether the new mixture 
    model is actually an improvement
    over the previous one: if the
    new model has a higher likelihood
    than the previous model for the
    provided initial means.
    
    returns:
    original_segment = numpy.ndarray[numpy.ndarray[float]]
    improved_segment = numpy.ndarray[numpy.ndarray[float]]
    """
    image_file = 'images/party_spock.png'
    image_matrix = image_to_matrix(image_file)
    num_components = 3
    initial_means = [0.4627451, 0.20392157, 0.36078432]
    # first train original model with fixed means
    gmm = GaussianMixtureModel(image_matrix, num_components)
    gmm.initialize_training()
    gmm.means = np.array(initial_means)
    gmm.train_model()
    original_segment = gmm.segment()
    original_likelihood = gmm.likelihood()
    # then train improved model
    gmm_improved = GaussianMixtureModelImproved(image_matrix, num_components)
    gmm_improved.initialize_training()
    gmm_improved.train_model()
    improved_segment = gmm_improved.segment()
    improved_likelihood = gmm_improved.likelihood()
    # then calculate likelihood difference
    diff_thresh = 1e3
    likelihood_diff = improved_likelihood - original_likelihood
    if(likelihood_diff >= diff_thresh):
        print('Congrats! Improved model scores a likelihood that was at ' +
              'least %d higher than the original model.'%(diff_thresh))
    return original_segment, improved_segment

In [210]:
best_segment, best_segment_improved = gmm_improvement_test()
matrix_to_image(best_segment, 'best_segment_original.png')
matrix_to_image(best_segment_improved, 'best_segment_improved.png')


Congrats! Improved model scores a likelihood that was at least 1000 higher than the original model.

3b: Convergence condition

7.5 points

You might be skeptical of the convergence criterion we've provided in default_convergence(). To test out another convergence condition, implement new_convergence_condition() to return true if all the new model parameters (means, variances, and mixing coefficients) are within 10% of the previous variables for 10 consecutive iterations. This will mean re-implementing train_model(), which you will also do below in GaussianMixtureModelConvergence.

You can compare the two convergence functions in convergence_condition_test().


In [211]:
def new_convergence_function(previous_variables, new_variables, conv_ctr, conv_ctr_cap=10):
    """
    Convergence function
    based on parameters:
    when all variables vary by
    less than 10% from the previous
    iteration's variables, increase
    the convergence counter.
    
    params:
    
    previous_variables = [numpy.ndarray[float]] containing [means, variances, mixing_coefficients]
    new_variables = [numpy.ndarray[float]] containing [means, variances, mixing_coefficients]
    conv_ctr = int
    conv_ctr_cap = int
    
    return:
    conv_ctr = int
    converged = boolean
    """
    
    converged = True
    for i in range(3):
        converged &= np.mean(previous_variables[i])*0.9 < np.mean(new_variables[i]) < np.mean(previous_variables[i])*1.1
    
    if converged:
        conv_ctr += 1
    else:
        conv_ctr = 0
        
    converged &= conv_ctr > conv_ctr_cap
    
    return conv_ctr, converged

In [212]:
class GaussianMixtureModelConvergence(GaussianMixtureModel):
    """
    Class to test the 
    new convergence function
    in the same GMM model as
    before.
    """

    def train_model(self, convergence_function=new_convergence_function):
        old_var = [self.means, self.variances, self.mixing_coefficients]
        conv_status = False
        conv_ctr = 0
        
        while(1):
            z_n_k = (self.mixing_coefficients*np.exp(-1.0*((self.means-self.image_vector)**2)/(2*self.variances)))/np.sqrt(2*np.pi*self.variances)
            z_n_k = z_n_k/np.array([np.sum(z_n_k,1)]).T
            
            n_k = np.sum(z_n_k,0)
            self.mixing_coefficients = n_k/len(self.image_vector)
            self.means = np.sum(z_n_k*self.image_vector,0)/n_k
            self.variances = np.sum(z_n_k*((self.means-self.image_vector)**2),0)/n_k
            
            new_var = [self.means, self.variances, self.mixing_coefficients]
            conv_ctr, conv_status = convergence_function(old_var, new_var, conv_ctr)
            if(conv_status):
                break
            old_var = new_var
            
        return

In [213]:
def convergence_condition_test():
    """
    Compare the performance of 
    the default convergence function
    with the new convergence function.
    
    return:
    default_convergence_likelihood = float
    new_convergence_likelihood = float
    """

    image_file = 'images/party_spock.png'
#     print image_file
    image_matrix = image_to_matrix(image_file)
    num_components = 3
    initial_means = [0.4627451, 0.10196079, 0.027450981]
    
    # first test original model
    gmm = GaussianMixtureModel(image_matrix, num_components)
    gmm.initialize_training()
    gmm.means = np.array(initial_means)
    gmm.train_model()
    default_convergence_likelihood = gmm.likelihood()
    
    # now test new convergence model
    gmm_new = GaussianMixtureModelConvergence(image_matrix, num_components)
    gmm_new.initialize_training()
    gmm_new.means = np.array(initial_means)
    gmm_new.train_model()
    new_convergence_likelihood = gmm_new.likelihood()
    
    # test convergence difference
    convergence_diff = abs(default_convergence_likelihood - new_convergence_likelihood)
    convergence_thresh = 200
    print convergence_diff
    if(convergence_diff >= convergence_thresh):
        print('Congrats! The likelihood difference between the original '
              + 'and the new convergence models should be at least %.2f'%(convergence_thresh))
    return default_convergence_likelihood, new_convergence_likelihood

In [214]:
convergence_condition_test()


3584.43060592
Congrats! The likelihood difference between the original and the new convergence models should be at least 200.00
Out[214]:
(92659.526094801258, 89075.09548887884)

Part 4: Bayesian information criterion

20 points

In our previous solutions, our only criterion for choosing a model was whether it maximizes the posterior likelihood regardless of how many parameters this requires. As a result, the "best" model may simply be the model with the most parameters, which would be overfit to the training data.

To avoid overfitting, we can use the Bayesian information criterion (a.k.a. BIC) which penalizes models based on the number of parameters they use. In the case of the Gaussian mixture model, this is equal to the number of components times the number of variables per component (mean, variance and mixing coefficient) = 3*components.

4a: Implement BIC

5 points

Implement bayes_info_criterion() to calculate the BIC of a trained GaussianMixtureModel.


In [215]:
def bayes_info_criterion(gmm):
    
    BIC = -2.0*gmm.likelihood() + np.log(len(gmm.image_vector))*(3*gmm.num_components)
    
    return BIC

In [216]:
def bayes_info_test():
    """
    Test for your
    implementation of
    BIC on fixed GMM values.
    Should be about 727045.
    
    returns:
    BIC = float
    """
    
    image_file = 'images/party_spock.png'
    image_matrix = image_to_matrix(image_file)
    num_components = 3
    initial_means = [0.4627451, 0.10196079, 0.027450981]
    gmm = GaussianMixtureModel(image_matrix, num_components)
    gmm.initialize_training()
    gmm.means = np.copy(initial_means)
    BIC = bayes_info_criterion(gmm)
    return BIC

In [217]:
bayes_info_test()


Out[217]:
727045.37420266762

4b: Test BIC

15 points

Now implement BIC_model_test(), in which you will use the BIC and likelihood to determine the optimal number of components in the Party Spock image. Use the original GaussianMixtureModel for your models. Iterate from k=2 to k=7 and use the provided means to train a model that minimizes its BIC and a model that maximizes its likelihood.

Then, fill out BIC_likelihood_question() to return the number of components in both the min-BIC and the max-likelihood model.


In [218]:
def BIC_likelihood_model_test():
    """Test to compare the 
    models with the lowest BIC
    and the highest likelihood.
    
    returns:
    min_BIC_model = GaussianMixtureModel
    max_likelihood_model = GaussianMixtureModel
    """
    comp_means = [
        [0.023529412, 0.1254902],
        [0.023529412, 0.1254902, 0.20392157],
        [0.023529412, 0.1254902, 0.20392157, 0.36078432],
        [0.023529412, 0.1254902, 0.20392157, 0.36078432, 0.59215689],
        [0.023529412, 0.1254902, 0.20392157, 0.36078432, 0.59215689, 0.71372563],
        [0.023529412, 0.1254902, 0.20392157, 0.36078432, 0.59215689, 0.71372563, 0.964706]
    ]
    
    image_file = 'images/party_spock.png'
    image_matrix = image_to_matrix(image_file)
    min_BIC = float("inf")
    min_BIC_model = []
    max_likelihood = float("-inf")
    max_likelihood_model = []

    for i in range(2,8):
        num_components = i
        gmm = GaussianMixtureModel(image_matrix, num_components)
        gmm.initialize_training()
        gmm.means = np.array(comp_means[i-2])
        gmm.train_model()
        curr_BIC = bayes_info_criterion(gmm)
        curr_likelihood = gmm.likelihood()
        if curr_BIC < min_BIC:
            min_BIC = curr_BIC
            min_BIC_model = gmm
        if curr_likelihood > max_likelihood:
            max_likelihood = curr_likelihood
            max_likelihood_model = gmm
        
    return min_BIC_model, max_likelihood_model

In [219]:
def BIC_likelihood_question():
    """
    Choose the best number of
    components for each metric
    (min BIC and maximum likelihood).
    
    returns:
    pairs = dict
    """
    BIC_model, likelihood_model = BIC_likelihood_model_test()
    bic = BIC_model.num_components
    likelihood = likelihood_model.num_components
    pairs = {
        'BIC' : bic,
        'likelihood' : likelihood 
    }
    return pairs

In [220]:
BIC_likelihood_question()


Out[220]:
{'BIC': 7, 'likelihood': 7}

You're done! Submit this iPython notebook on T-Square by November 20th at 9:35 AM.