In [25]:
import numpy as np
import pandas as pd
import scipy
import sklearn
import sklearn.datasets
%pylab inline
In [26]:
def sample_from_gaussian(mean, covariance, n_samples):
return scipy.random.multivariate_normal(mean=mean, cov=covariance, size=(n_samples,))
def sample_unit_variance_gaussian(mean, n_samples):
mean = np.array(mean)
return sample_from_gaussian(mean=mean, n_samples=n_samples, covariance=np.identity(mean.shape[0]))
In [27]:
N_labeled = 1000
labeled_data = []
labeled_labels = []
N_unlabeled = 10000
unlabeled_data = []
unlabeled_labels = []
unlabeled_data.extend(sample_unit_variance_gaussian((0,5), N_unlabeled))
unlabeled_labels.extend([1]*N_unlabeled)
unlabeled_data.extend(sample_unit_variance_gaussian((0,-5), N_unlabeled))
unlabeled_labels.extend([-1]*N_unlabeled)
labeled_data.extend(sample_unit_variance_gaussian((5,0), N_labeled))
labeled_labels.extend([1]*N_labeled)
labeled_data.extend(sample_unit_variance_gaussian((-5,0), N_labeled))
labeled_labels.extend([-1]*N_labeled)
unlabeled_data = np.array(unlabeled_data)
labeled_data = np.array(labeled_data)
In [28]:
scatter(*labeled_data.T)
scatter(*unlabeled_data.T)
Out[28]:
In [29]:
sklearn.datasets.dump_svmlight_file(unlabeled_data, unlabeled_labels, 'gmm_degenerate.svm.t', zero_based=False)
sklearn.datasets.dump_svmlight_file(labeled_data, labeled_labels, 'gmm_degenerate.svm', zero_based=False)
In [30]:
import sklearn
import sklearn.svm
In [23]:
svm = sklearn.svm.LinearSVC(loss='l2', dual=True)
svm.fit(labeled_data, labeled_labels)
Out[23]:
In [24]:
# create a mesh to plot in
X = np.concatenate([labeled_data, unlabeled_data])
y = np.concatenate([labeled_labels, unlabeled_labels])
h = 0.02
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
plt.figure(figsize=(12,12))
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
plt.subplots_adjust(wspace=0.4, hspace=0.4)
Z = svm.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
plt.show()
In [ ]: