In [74]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import gmm

In [4]:
# Number of samples per component
n_samples = 500

# Generate random sample, two components
np.random.seed(0)
C = np.array([[0., -0.1], [1.7, .4]])
X = np.r_[np.dot(np.random.randn(n_samples, 2), C),
          .7 * np.random.randn(n_samples, 2) + np.array([-6, 3])]

In [49]:
# Make dataset
n1 = 300
m1 = np.asarray([0, 0])
C1 = np.array([[8, 10], [10, 6]])
X1 = np.random.multivariate_normal(m1, C1, n1)

n2 = 500
m2 = np.asarray([12, 5])
C2 = np.array([[5., -5], [-5, 10]])
X2 = np.random.multivariate_normal(m2, C2, n2)

X = np.vstack((X1, X2))
y = np.ones(n1 + n2)
y[n1:] = -1

fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(X[y==1, 0], X[y==1, 1], c='b', marker='x')
ax.scatter(X[y==-1, 0], X[y==-1, 1], c='r', marker='x')
fig.show()



In [93]:
import gmm
reload(gmm)
rng = np.random.RandomState(123)
clf = gmm.GMM(n_components=2, n_init=1, maxiter=200, random_state=rng)

In [94]:
clf.fit(X)


+++ Iter 5 change less than 0.0

In [101]:
def make_ellipses(means, covs, ax):
    for n, color in enumerate('mc'):
        v, w = np.linalg.eigh(covs[n])
        u = w[0] / np.linalg.norm(w[0])
        angle = np.arctan2(u[1], u[0])
        angle = 180 * angle / np.pi  # convert to degrees
        print(v)
        #v *= 9
        ell = mpl.patches.Ellipse(means[n, :2], v[0], v[1],
                                  180 + angle, color=color)
        ell.set_clip_box(ax.bbox)
        ell.set_alpha(0.5)
        ax.add_artist(ell)

In [102]:
y_pred_gmm = clf.assignments
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(X[y_pred_gmm==0, 0], X[y_pred_gmm==0, 1], color='r', marker='x')
ax.scatter(X[y_pred_gmm==1, 0], X[y_pred_gmm==1, 1], color='b', marker='x')
ax.plot(clf.mu[0, 0], clf.mu[0, 1], 'ko')
ax.plot(clf.mu[1, 0], clf.mu[1, 1], 'go')
make_ellipses(clf.mu, clf.cov, ax)
fig.show()


[  1.86299339  13.41687131]
[  2.82741661  16.09966763]

In [96]:
clf.cov


Out[96]:
array([[[  4.6770461 ,  -4.95926696],
        [ -4.95926696,  10.6028186 ]],

       [[ 10.38915026,   6.57125645],
        [  6.57125645,   8.53793398]]])

In [57]:
import kmeans
reload(kmeans)
n_centers = 2
y_pred_kmeans, centers, obj_val_seq = kmeans.kmeans(X, n_centers)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(X[y_pred_kmeans==0, 0], X[y_pred_kmeans==0, 1], color='r', marker='x')
ax.scatter(X[y_pred_kmeans==1, 0], X[y_pred_kmeans==1, 1], color='b', marker='x')
fig.show()


+++ Iter 5 change less than 0.0