In [1]:
import numpy as np
import math
In [2]:
from sklearn import mixture
In [3]:
np.random.seed(1)
g = mixture.GMM(n_components=2)
In [4]:
# Generate random observations with two modes centered on 0 and 10 to use for training.
obs = np.concatenate((np.random.randn(100, 1), 10 + np.random.randn(300, 1)))
In [5]:
import matplotlib.pyplot as plt
%matplotlib inline
pylab.rcParams['figure.figsize'] = (20.0, 8.0) #adjust to your screen
In [6]:
# plot
print np.shape(obs)
plt.plot(obs, '+')
Out[6]:
In [7]:
# fit the GMM to the observations
g.fit(obs)
Out[7]:
In [8]:
# what are the weights of the gaussians (rounded)?
np.round(g.weights_, 2)
Out[8]:
In [9]:
# what are the means of the gaussians?
np.round(g.means_, 2)
Out[9]:
In [10]:
np.round(g.covars_, 2)
Out[10]:
In [11]:
# what gaussian likely generated the following observations?
g.predict([[0], [2], [9], [10]])
Out[11]:
In [12]:
# log prob of observations
np.round(g.score([[0], [2], [9], [10]]), 2)
Out[12]:
In [13]:
# Refit the model on new data (initial parameters remain the same), this time with an even split between the two modes.
g.fit(20 * [[0]] + 20 * [[10]])
Out[13]:
In [14]:
np.round(g.weights_, 2)
Out[14]:
In [15]:
# Author: Ron Weiss <ronweiss@gmail.com>, Gael Varoquaux
# License: BSD 3 clause
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
pylab.rcParams['figure.figsize'] = (20.0, 8.0) #adjust to your screen
In [16]:
from sklearn import datasets
from sklearn.cross_validation import StratifiedKFold
from sklearn.externals.six.moves import xrange
from sklearn.mixture import GMM
In [17]:
def make_ellipses(gmm, ax):
for n, color in enumerate('rgb'):
v, w = np.linalg.eigh(gmm._get_covars()[n][:2, :2])
u = w[0] / np.linalg.norm(w[0])
angle = np.arctan2(u[1], u[0])
angle = 180 * angle / np.pi # convert to degrees
v *= 9
ell = mpl.patches.Ellipse(gmm.means_[n, :2], v[0], v[1],
180 + angle, color=color)
ell.set_clip_box(ax.bbox)
ell.set_alpha(0.5)
ax.add_artist(ell)
In [18]:
iris = datasets.load_iris()
# Break up the dataset into non-overlapping training (75%) and testing
# (25%) sets.
skf = StratifiedKFold(iris.target, n_folds=4)
# Only take the first fold.
train_index, test_index = next(iter(skf))
X_train = iris.data[train_index]
y_train = iris.target[train_index]
X_test = iris.data[test_index]
y_test = iris.target[test_index]
In [24]:
print np.shape(X_train), np.shape(y_train)
print X_train
print y_train
In [29]:
n_classes = 3
# Try GMMs using different types of covariances.
classifiers = dict((covar_type, GMM(n_components=n_classes,
covariance_type=covar_type, init_params='wc', n_iter=20))
for covar_type in ['spherical', 'diag', 'tied', 'full'])
print classifiers.items()[0]
n_classifiers = 4
plt.figure(figsize=(3 * n_classifiers / 2, 6))
plt.subplots_adjust(bottom=.01, top=0.95, hspace=.15, wspace=.05, left=.01, right=.99)
In [28]:
for index, (name, classifier) in enumerate(classifiers.items()):
# Since we have class labels for the training data, we can
# initialize the GMM parameters in a supervised manner.
classifier.means_ = np.array([X_train[y_train == i].mean(axis=0) for i in xrange(n_classes)])
# Train the other parameters using the EM algorithm.
classifier.fit(X_train)
h = plt.subplot(2, n_classifiers / 2, index + 1)
make_ellipses(classifier, h)
for n, color in enumerate('rgb'):
data = iris.data[iris.target == n]
plt.scatter(data[:, 0], data[:, 1], 0.8, color=color,label=iris.target_names[n])
# Plot the test data with crosses
for n, color in enumerate('rgb'):
data = X_test[y_test == n]
plt.plot(data[:, 0], data[:, 1], 'x', color=color)
y_train_pred = classifier.predict(X_train)
train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100
plt.text(0.05, 0.9, 'Train accuracy: %.1f' % train_accuracy,
transform=h.transAxes)
y_test_pred = classifier.predict(X_test)
test_accuracy = np.mean(y_test_pred.ravel() == y_test.ravel()) * 100
plt.text(0.05, 0.8, 'Test accuracy: %.1f' % test_accuracy,
transform=h.transAxes)
plt.xticks(())
plt.yticks(())
plt.title(name)
plt.legend(loc='lower right', prop=dict(size=12))
plt.show()
In [20]: