Machine Intelligence II - Team MensaNord

Sheet 11

Nikolai Zaki
Alexander Moore
Johannes Rieke
Georg Hoelger
Oliver Atanaszov



In [1]:

    
from __future__ import division, print_function
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
#import seaborn as sns
#sns.set_style('whitegrid')

Exercise 2

1.1 Create dataset



In [2]:

    
M = 2
w1, w2 = [2,2], [1,1]  # means
sigma2 = 0.2  # standard deviations
N = 100
P1, P2 = 2/3, 1/3

def create_data(sigma1=0.7):
    X = np.zeros((N, 2))
    which_gaussian = np.zeros(N)

    for n in range(N):
        if np.random.rand() < P1:  # sample from first Gaussian
            X[n] = np.random.multivariate_normal(w1, np.eye(len(w1)) * sigma1**2)
            which_gaussian[n] = 0
        else:  # sample from second Gaussian
            X[n] = np.random.multivariate_normal(w2, np.eye(len(w2)) * sigma2**2)
            which_gaussian[n] = 1
            
    return X, which_gaussian

sigma1 = 0.7
X, which_gaussian = create_data(sigma1)



In [3]:

    
def plot_data(X, which_gaussian, centers, stds):
    plt.scatter(*X[which_gaussian == 0].T, c='r', label='Cluster 1')
    plt.scatter(*X[which_gaussian == 1].T, c='b', label='Cluster 2')
    plt.plot(centers[0][0], centers[0][1], 'k+', markersize=15, label='Centers')
    plt.plot(centers[1][0], centers[1][1], 'k+', markersize=15)
    plt.gca().add_artist(plt.Circle(centers[0], stds[0], ec='k', fc='none'))
    plt.gca().add_artist(plt.Circle(centers[1], stds[1], ec='k', fc='none'))
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.legend()
    
plot_data(X, which_gaussian, [w1, w2], [sigma1, sigma2])
plt.title('Ground truth')









    Out[3]:





<matplotlib.text.Text at 0x110ed0940>

1.2 Run Expectation-Maximization algorithm

See slide 18 of the lecture for an outline of the algorithm.



In [4]:

    
from scipy.stats import multivariate_normal



In [5]:

    
def variance(X):
    """Calculate a single variance value for the vectors in X."""
    mu = X.mean(axis=0)
    return np.mean([np.linalg.norm(x - mu)**2 for x in X])

def run_expectation_maximization(X, w=None, sigma_squared=None, verbose=False):
    # Initialization.
    P_prior = np.ones(2) * 1 / M
    P_likelihood = np.zeros((N, M))
    P_posterior = np.zeros((M, N))
    
    mu = X.mean(axis=0)  # mean of the original data
    var = variance(X)  # variance of the original data

    if w is None:
        w = np.array([mu + np.random.rand(M) - 0.5, mu + np.random.rand(M) - 0.5])

    if sigma_squared is None:
        sigma_squared = np.array([var + np.random.rand() - 0.5,var + np.random.rand() - 0.5])
        #sigma_squared = np.array([var, var])

    if verbose:
        print('Initial centers:', w)
        print('Initial variances:', sigma_squared)
        print()
        print()


    theta = 0.001
    distance = np.inf
    step = 0


    # Optimization loop.
    while distance > theta:
    #for i in range(1):
        step += 1
        if verbose:
            print('Step', step)
            print('-'*50)

        # Store old parameter values to calculate distance later on.
        w_old = w.copy()
        sigma_squared_old = sigma_squared.copy()
        P_prior_old = P_prior.copy()

        if verbose:
            print('Distances of X[0] to proposed centers:', np.linalg.norm(X[0] - w[0]), np.linalg.norm(X[0] - w[1]))

        # E-Step: Calculate likelihood for each data point.
        for (alpha, q), _ in np.ndenumerate(P_likelihood):
            P_likelihood[alpha, q] = multivariate_normal.pdf(X[alpha], w[q], sigma_squared[q])

        if verbose:
            print('Likelihoods of X[0]:', P_likelihood[0])

        # E-Step: Calculate assignment probabilities (posterior) for each data point.
        for (q, alpha), _ in np.ndenumerate(P_posterior):
            P_posterior[q, alpha] = (P_likelihood[alpha, q] * P_prior[q]) / np.sum([P_likelihood[alpha, r] * P_prior[r] for r in range(M)])

        if verbose:
            print('Assignment probabilities of X[0]:', P_posterior[:, 0])
            print()

        distance = 0
        # M-Step: Calculate new parameter values.
        for q in range(M):
            w[q] = np.sum([P_posterior[q, alpha] * X[alpha] for alpha in range(N)], axis=0) / np.sum(P_posterior[q])
            #print(np.sum([P_posterior[q, alpha] * X[alpha] for alpha in range(N)], axis=0))
            #print(np.sum(P_posterior[q]))
            w_distance = np.linalg.norm(w[q] - w_old[q])
            if verbose:
                print('Distance of centers:', w_distance)
            distance = max(distance, w_distance)

            sigma_squared[q] = 1 / M * np.sum([np.linalg.norm(X[alpha] - w_old[q])**2 * P_posterior[q, alpha] for alpha in range(N)]) / np.sum(P_posterior[q])
            sigma_squared_distance = np.abs(sigma_squared[q] - sigma_squared_old[q])
            if verbose:
                print('Distance of variances:', sigma_squared_distance)
            distance = max(distance, sigma_squared_distance)

            P_prior[q] = np.mean(P_posterior[q])
            P_prior_distance = np.abs(P_prior[q] - P_prior_old[q])
            if verbose:
                print('Distance of priors:', P_prior_distance)
            distance = max(distance, P_prior_distance)


        if verbose:
            print('Maximum distance:', distance)

            print()
            print('New centers:', w)
            print('New variances:', sigma_squared)
            print('New priors:', P_prior)

            print('='*50)
            print()

    which_gaussian_EM = P_posterior.argmax(axis=0)
    
    return which_gaussian_EM, w, np.sqrt(sigma_squared), step

which_gaussian_em, cluster_centers_em, cluster_stds_em, num_steps_em = run_expectation_maximization(X, verbose=True)









    



Initial centers: [[ 1.2938712   2.13233145]
 [ 1.73146591  1.43440641]]
Initial variances: [ 1.535102    0.72676323]


Step 1
--------------------------------------------------
Distances of X[0] to proposed centers: 1.28249075345 0.936357844332
Likelihoods of X[0]: [ 0.06067654  0.11980118]
Assignment probabilities of X[0]: [ 0.33619963  0.66380037]

Distance of centers: 0.50780022779
Distance of variances: 0.776594791989
Distance of priors: 0.0997384021048
Distance of centers: 0.140708188768
Distance of variances: 0.229453957597
Distance of priors: 0.0997384021048
Maximum distance: 0.776594791989

New centers: [[ 1.76568541  1.94457517]
 [ 1.75141172  1.57369373]]
New variances: [ 0.75850721  0.49730927]
New priors: [ 0.4002616  0.5997384]
==================================================

Step 2
--------------------------------------------------
Distances of X[0] to proposed centers: 1.32061724655 1.0383404055
Likelihoods of X[0]: [ 0.06646231  0.10824933]
Assignment probabilities of X[0]: [ 0.29066068  0.70933932]

Distance of centers: 0.0866827972814
Distance of variances: 0.121523898915
Distance of priors: 0.00789147674195
Distance of centers: 0.0554130646934
Distance of variances: 0.0152431248689
Distance of priors: 0.00789147674195
Maximum distance: 0.121523898915

New centers: [[ 1.85173291  1.95505056]
 [ 1.6960329   1.57174612]]
New variances: [ 0.63698331  0.48206615]
New priors: [ 0.39237012  0.60762988]
==================================================

Step 3
--------------------------------------------------
Distances of X[0] to proposed centers: 1.38232504942 0.995907439876
Likelihoods of X[0]: [ 0.05575628  0.11801625]
Assignment probabilities of X[0]: [ 0.2337614  0.7662386]

Distance of centers: 0.0866635885231
Distance of variances: 0.0218410127134
Distance of priors: 0.00204481751786
Distance of centers: 0.0574783698928
Distance of variances: 0.00772835213839
Distance of priors: 0.00204481751786
Maximum distance: 0.0866635885231

New centers: [[ 1.93014169  1.99196453]
 [ 1.64443986  1.54640994]]
New variances: [ 0.61514229  0.4743378 ]
New priors: [ 0.39441494  0.60558506]
==================================================

Step 4
--------------------------------------------------
Distances of X[0] to proposed centers: 1.46128790696 0.941059605969
Likelihoods of X[0]: [ 0.04560943  0.13192176]
Assignment probabilities of X[0]: [ 0.18378878  0.81621122]

Distance of centers: 0.0903846685873
Distance of variances: 0.018670194412
Distance of priors: 0.00475084344349
Distance of centers: 0.063820993217
Distance of variances: 0.0223983003295
Distance of priors: 0.00475084344349
Maximum distance: 0.0903846685873

New centers: [[ 2.00772777  2.03833035]
 [ 1.59063628  1.51208365]]
New variances: [ 0.5964721  0.4519395]
New priors: [ 0.39916578  0.60083422]
==================================================

Step 5
--------------------------------------------------
Distances of X[0] to proposed centers: 1.54751277415 0.878620088165
Likelihoods of X[0]: [ 0.0358425   0.14990745]
Assignment probabilities of X[0]: [ 0.13707186  0.86292814]

Distance of centers: 0.0904753269855
Distance of variances: 0.0242308499119
Distance of priors: 0.00863043146233
Distance of centers: 0.0716544031869
Distance of variances: 0.0332229974927
Distance of priors: 0.00863043146233
Maximum distance: 0.0904753269855

New centers: [[ 2.08343138  2.08787576]
 [ 1.53242772  1.47029708]]
New variances: [ 0.57224125  0.4187165 ]
New priors: [ 0.40779621  0.59220379]
==================================================

Step 6
--------------------------------------------------
Distances of X[0] to proposed centers: 1.63522529572 0.807975215483
Likelihoods of X[0]: [ 0.02688797  0.17431879]
Assignment probabilities of X[0]: [ 0.0960166  0.9039834]

Distance of centers: 0.0804626311535
Distance of variances: 0.0306550611118
Distance of priors: 0.0140218899694
Distance of centers: 0.0782144077892
Distance of variances: 0.041954101223
Distance of priors: 0.0140218899694
Maximum distance: 0.0804626311535

New centers: [[ 2.15124506  2.13118327]
 [ 1.46959084  1.42372435]]
New variances: [ 0.54158619  0.3767624 ]
New priors: [ 0.4218181  0.5781819]
==================================================

Step 7
--------------------------------------------------
Distances of X[0] to proposed centers: 1.71326297 0.730867706557
Likelihoods of X[0]: [ 0.01955534  0.20791449]
Assignment probabilities of X[0]: [ 0.06421238  0.93578762]

Distance of centers: 0.0593304294688
Distance of variances: 0.0313483462176
Distance of priors: 0.0206258926959
Distance of centers: 0.0824189205323
Distance of variances: 0.0486942366202
Distance of priors: 0.0206258926959
Maximum distance: 0.0824189205323

New centers: [[ 2.20357929  2.15913373]
 [ 1.40284474  1.37537318]]
New variances: [ 0.51023784  0.32806816]
New priors: [ 0.442444  0.557556]
==================================================

Step 8
--------------------------------------------------
Distances of X[0] to proposed centers: 1.76971336003 0.65008509509
Likelihoods of X[0]: [ 0.01449367  0.25476013]
Assignment probabilities of X[0]: [ 0.04319563  0.95680437]

Distance of centers: 0.0325721538398
Distance of variances: 0.0245782318987
Distance of priors: 0.0273427224482
Distance of centers: 0.0840230261891
Distance of variances: 0.0530322865967
Distance of priors: 0.0273427224482
Maximum distance: 0.0840230261891

New centers: [[ 2.23513069  2.1672241 ]
 [ 1.33359582  1.32778687]]
New variances: [ 0.48565961  0.27503587]
New priors: [ 0.46978672  0.53021328]
==================================================

Step 9
--------------------------------------------------
Distances of X[0] to proposed centers: 1.79760965996 0.568745092221
Likelihoods of X[0]: [ 0.01176712  0.3213978 ]
Assignment probabilities of X[0]: [ 0.03142048  0.96857952]

Distance of centers: 0.0132039627682
Distance of variances: 0.0150185910039
Distance of priors: 0.0324006593102
Distance of centers: 0.0821411523268
Distance of variances: 0.0532880806386
Distance of priors: 0.0324006593102
Maximum distance: 0.0821411523268

New centers: [[ 2.24474967  2.15817866]
 [ 1.26521496  1.28227613]]
New variances: [ 0.47064102  0.22174779]
New priors: [ 0.50218738  0.49781262]
==================================================

Step 10
--------------------------------------------------
Distances of X[0] to proposed centers: 1.79801922397 0.49049176862
Likelihoods of X[0]: [ 0.01090254  0.41722465]
Assignment probabilities of X[0]: [ 0.02568369  0.97431631]

Distance of centers: 0.0209838942725
Distance of variances: 0.00807104573872
Distance of priors: 0.0341891108698
Distance of centers: 0.0743873025937
Distance of variances: 0.0462558672195
Distance of priors: 0.0341891108698
Maximum distance: 0.0743873025937

New centers: [[ 2.23454693  2.13984215]
 [ 1.20478466  1.23889815]]
New variances: [ 0.46256997  0.17549193]
New priors: [ 0.53637649  0.46362351]
==================================================

Step 11
--------------------------------------------------
Distances of X[0] to proposed centers: 1.77787149687 0.419952855031
Likelihoods of X[0]: [ 0.01129367  0.5487078 ]
Assignment probabilities of X[0]: [ 0.0232583  0.9767417]

Distance of centers: 0.0331075952032
Distance of variances: 0.00309759397328
Distance of priors: 0.0337091725362
Distance of centers: 0.0633855744697
Distance of variances: 0.0349899824475
Distance of priors: 0.0337091725362
Maximum distance: 0.0633855744697

New centers: [[ 2.20939173  2.11831707]
 [ 1.1573989   1.19679921]]
New variances: [ 0.45947238  0.14050194]
New priors: [ 0.57008566  0.42991434]
==================================================

Step 12
--------------------------------------------------
Distances of X[0] to proposed centers: 1.74484735996 0.358813120978
Likelihoods of X[0]: [ 0.01261042  0.71640428]
Assignment probabilities of X[0]: [ 0.02280915  0.97719085]

Distance of centers: 0.0393524612524
Distance of variances: 0.00112099866479
Distance of priors: 0.0325097772511
Distance of centers: 0.0559437626865
Distance of variances: 0.0281186590559
Distance of priors: 0.0325097772511
Maximum distance: 0.0559437626865

New centers: [[ 2.1764603   2.09677323]
 [ 1.1212752   1.15408173]]
New variances: [ 0.46059338  0.11238328]
New priors: [ 0.60259544  0.39740456]
==================================================

Step 13
--------------------------------------------------
Distances of X[0] to proposed centers: 1.70629847211 0.30358264693
Likelihoods of X[0]: [ 0.01465191  0.93981412]
Assignment probabilities of X[0]: [ 0.02309394  0.97690606]

Distance of centers: 0.039060417337
Distance of variances: 0.0029444538384
Distance of priors: 0.0300784224462
Distance of centers: 0.0515858134381
Distance of variances: 0.025159462777
Distance of priors: 0.0300784224462
Maximum distance: 0.0515858134381

New centers: [[ 2.14280807  2.07694286]
 [ 1.0928334   1.111045  ]]
New variances: [ 0.46353783  0.08722382]
New priors: [ 0.63267386  0.36732614]
==================================================

Step 14
--------------------------------------------------
Distances of X[0] to proposed centers: 1.66849588435 0.252094006399
Likelihoods of X[0]: [ 0.01704549  1.26756799]
Assignment probabilities of X[0]: [ 0.02263716  0.97736284]

Distance of centers: 0.034315110544
Distance of variances: 0.00327409081096
Distance of priors: 0.0256935185557
Distance of centers: 0.0435446772317
Distance of variances: 0.0209874173236
Distance of priors: 0.0256935185557
Maximum distance: 0.0435446772317

New centers: [[ 2.11401244  2.05827903]
 [ 1.06935953  1.07436917]]
New variances: [ 0.46681192  0.0662364 ]
New priors: [ 0.65836738  0.34163262]
==================================================

Step 15
--------------------------------------------------
Distances of X[0] to proposed centers: 1.6349858712 0.208622484959
Likelihoods of X[0]: [ 0.01946228  1.72996731]
Assignment probabilities of X[0]: [ 0.02122021  0.97877979]

Distance of centers: 0.0285906935859
Distance of variances: 0.00383329000325
Distance of priors: 0.020357789814
Distance of centers: 0.0305898696341
Distance of variances: 0.0142920144724
Distance of priors: 0.020357789814
Maximum distance: 0.0305898696341

New centers: [[ 2.0927107   2.03920906]
 [ 1.04816647  1.05231029]]
New variances: [ 0.47064521  0.05194439]
New priors: [ 0.67872517  0.32127483]
==================================================

Step 16
--------------------------------------------------
Distances of X[0] to proposed centers: 1.60645510881 0.179231322392
Likelihoods of X[0]: [ 0.02179911  2.24901079]
Assignment probabilities of X[0]: [ 0.02006602  0.97993398]

Distance of centers: 0.0236706560864
Distance of variances: 0.00419837557221
Distance of priors: 0.0157511380439
Distance of centers: 0.0213937251412
Distance of variances: 0.00889112295599
Distance of priors: 0.0157511380439
Maximum distance: 0.0236706560864

New centers: [[ 2.07735658  2.02119377]
 [ 1.02921643  1.04238117]]
New variances: [ 0.47484359  0.04305327]
New priors: [ 0.69447631  0.30552369]
==================================================

Step 17
--------------------------------------------------
Distances of X[0] to proposed centers: 1.58284240356 0.162384561939
Likelihoods of X[0]: [ 0.02396337  2.72156238]
Assignment probabilities of X[0]: [ 0.01962167  0.98037833]

Distance of centers: 0.0171727542498
Distance of variances: 0.00336028887461
Distance of priors: 0.0110757438338
Distance of centers: 0.0143879593013
Distance of variances: 0.00543625403387
Distance of priors: 0.0110757438338
Maximum distance: 0.0171727542498

New centers: [[ 2.06656087  2.00783876]
 [ 1.01565886  1.03756393]]
New variances: [ 0.47820388  0.03761701]
New priors: [ 0.70555205  0.29444795]
==================================================

Step 18
--------------------------------------------------
Distances of X[0] to proposed centers: 1.56575043198 0.153212733066
Likelihoods of X[0]: [ 0.02564327  3.09691616]
Assignment probabilities of X[0]: [ 0.01945504  0.98054496]

Distance of centers: 0.0108531142082
Distance of variances: 0.00219242950523
Distance of priors: 0.00689739613727
Distance of centers: 0.00838825600846
Distance of variances: 0.00286222174484
Distance of priors: 0.00689739613727
Maximum distance: 0.0108531142082

New centers: [[ 2.05959999  1.99951192]
 [ 1.00769778  1.0349212 ]]
New variances: [ 0.48039631  0.03475479]
New priors: [ 0.71244945  0.28755055]
==================================================

Step 19
--------------------------------------------------
Distances of X[0] to proposed centers: 1.55493323822 0.148393183343
Likelihoods of X[0]: [ 0.02675015  3.33596566]
Assignment probabilities of X[0]: [ 0.01948053  0.98051947]

Distance of centers: 0.00618212430444
Distance of variances: 0.00131794805778
Distance of priors: 0.00386225139859
Distance of centers: 0.00428017570175
Distance of variances: 0.00130248663703
Distance of priors: 0.00386225139859
Maximum distance: 0.00618212430444

New centers: [[ 2.0555336   1.99485541]
 [ 1.00364436  1.03354654]]
New variances: [ 0.48171426  0.0334523 ]
New priors: [ 0.7163117  0.2836883]
==================================================

Step 20
--------------------------------------------------
Distances of X[0] to proposed centers: 1.54876232034 0.146035077882
Likelihoods of X[0]: [ 0.0274006   3.45907617]
Assignment probabilities of X[0]: [ 0.0196092  0.9803908]

Distance of centers: 0.00320741480494
Distance of variances: 0.000720465611464
Distance of priors: 0.00197502289852
Distance of centers: 0.00200895372633
Distance of variances: 0.000555521978547
Distance of priors: 0.00197502289852
Maximum distance: 0.00320741480494

New centers: [[ 2.05338416  1.99247478]
 [ 1.00175028  1.03287695]]
New variances: [ 0.48243472  0.03289678]
New priors: [ 0.71828672  0.28171328]
==================================================

Step 21
--------------------------------------------------
Distances of X[0] to proposed centers: 1.5455579853 0.144936857054
Likelihoods of X[0]: [ 0.02774526  3.51563962]
Assignment probabilities of X[0]: [ 0.01972526  0.98027474]

Distance of centers: 0.00154615812569
Distance of variances: 0.00035991254255
Distance of priors: 0.000943450500374
Distance of centers: 0.000904903843528
Distance of variances: 0.000235310989163
Distance of priors: 0.000943450500374
Maximum distance: 0.00154615812569

New centers: [[ 2.05233623  1.99133791]
 [ 1.00090095  1.03256471]]
New variances: [ 0.48279463  0.03266147]
New priors: [ 0.71923017  0.28076983]
==================================================

Step 22
--------------------------------------------------
Distances of X[0] to proposed centers: 1.54401269913 0.144439090475
Likelihoods of X[0]: [ 0.02791346  3.54063108]
Assignment probabilities of X[0]: [ 0.01979553  0.98020447]

Distance of centers: 0.000711658930598
Distance of variances: 0.000169188055068
Distance of priors: 0.00043205889623
Distance of centers: 0.000400384492043
Distance of variances: 0.000100763735283
Distance of priors: 0.00043205889623
Maximum distance: 0.000711658930598

New centers: [[ 2.05185089  1.99081744]
 [ 1.00052642  1.03242317]]
New variances: [ 0.48296382  0.03256071]
New priors: [ 0.71966223  0.28033777]
==================================================



In [6]:

    
plot_data(X, which_gaussian_em, cluster_centers_em, cluster_stds_em)
plt.title('Predicted by Expectation-Maximization')









    Out[6]:





<matplotlib.text.Text at 0x115c06d68>

1.3 Run K-means algorithm

For simplicity, we use the sklearn version of K-means here. The detailed algorithm was already implemented in a previous exercise.



In [7]:

    
from sklearn.cluster import KMeans



In [8]:

    
def run_k_means(X):
    km = KMeans(2)
    km.fit(X)
    which_gaussian_km = km.predict(X)
    cluster_stds = np.array([np.sqrt(variance(X[which_gaussian_km == 0])), np.sqrt(variance(X[which_gaussian_km == 1]))])
    return which_gaussian_km, km.cluster_centers_, cluster_stds

which_gaussian_km, cluster_centers_km, cluster_stds_km = run_k_means(X)



In [9]:

    
plot_data(X, which_gaussian_km, cluster_centers_km, cluster_stds_km)
plt.title('Predicted by K-Means')









    Out[9]:





<matplotlib.text.Text at 0x116f19828>

K-means clusters the data point by establishing a straight separation line. This cannot fully capture the nature of the data, e.g. the points around the lower left Gaussian, which actually belong to the upper right Gaussian.

1.4 Initialize EM algorithm with cluster parameters from K-Means



In [10]:

    
_, _, _, num_steps_em_km = run_expectation_maximization(X, cluster_centers_km, cluster_stds_km**2)



In [11]:

    
print('Took', num_steps_em, 'steps with random initalization')
print('Took', num_steps_em_km, 'steps with initialization from K-means')









    



Took 22 steps with random initalization
Took 14 steps with initialization from K-means

1.5 Repeat analysis for different $\sigma_1$ values



In [12]:

    
sigma1s = [0.1, 0.5, 1, 1.5]
fig, axes = plt.subplots(len(sigma1s), 3, figsize=(15, 15), sharex=True, sharey=True)

for i, (sigma1, horizontal_axes) in enumerate(zip(sigma1s, axes)):
    X, which_gaussian = create_data(sigma1)
    
    plt.sca(horizontal_axes[0])
    plot_data(X, which_gaussian, [w1, w2], [sigma1, sigma2])
    if i == 0:
        plt.title('Ground truth')
        
    which_gaussian_em, cluster_centers_em, cluster_stds_em, num_steps_em = run_expectation_maximization(X)
    plt.sca(horizontal_axes[1])
    plot_data(X, which_gaussian_em, cluster_centers_em, cluster_stds_em)
    if i == 0:
        plt.title('Predicted by Expectation-Maximization')
        
    which_gaussian_km, cluster_centers_km, cluster_stds_km = run_k_means(X)
    plt.sca(horizontal_axes[2])
    plot_data(X, which_gaussian_km, cluster_centers_km, cluster_stds_km)
    if i == 0:
        plt.title('Predicted by K-Means')

Each row corresponds to increasing $\sigma_1$ (the values are 0.1, 0.5, 1, 1.5).

K-means and Expectation-Maximization show similar results for small $\sigma_1$, i.e. if the clusters are clearly separated. With increasing $\sigma_1$, the Gaussians overlap more and more, and K-means fails to cluster them correctly.



In [ ]: