In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

Kernel Density Estimation


In [ ]:
from sklearn.datasets import make_blobs

X, y = make_blobs(n_features=1, n_samples=30, random_state=1)

In [ ]:
plt.hist(X);
plt.scatter(X.ravel(), -np.ones(len(X)))
plt.yticks(())

In [ ]:
from sklearn.neighbors import KernelDensity

kde = KernelDensity(bandwidth=1).fit(X)

In [ ]:
line = np.linspace(X.min() - 2, X.max() + 2, 100)
line_scores = np.exp(kde.score_samples(line[:, np.newaxis]))
plt.plot(line, line_scores)
plt.twinx().scatter(X.ravel(), np.ones(len(X)))

In [ ]:
kde.score(X)

In [ ]:
kde = KernelDensity(bandwidth=0.2).fit(X)

In [ ]:
line = np.linspace(X.min() - 2, X.max() + 2, 1000)
line_scores = np.exp(kde.score_samples(line[:, np.newaxis]))
plt.plot(line, line_scores)
plt.twinx().scatter(X.ravel(), np.ones(len(X)))

In [ ]:
kde.score(X)

In [ ]:
from sklearn.grid_search import GridSearchCV
param_grid = {'bandwidth': np.logspace(-1, 1, 20)}
grid = GridSearchCV(KernelDensity(), param_grid, cv=10)
grid.fit(X)

In [ ]:
grid.best_params_

In [ ]:
line_scores = np.exp(grid.best_estimator_.score_samples(line[:, np.newaxis]))
plt.plot(line, line_scores)
plt.twinx().scatter(X.ravel(), -np.ones(len(X)))

PCA as probabilistic model


In [ ]:
from sklearn.datasets import make_low_rank_matrix
from sklearn.decomposition import PCA

X = make_low_rank_matrix(n_features=100, effective_rank=10, random_state=0)

In [ ]:
pca = PCA(n_components=20).fit(X)
pca.score(X)

In [ ]:
pca = PCA(n_components=50).fit(X)
pca.score(X)

In [ ]:
from sklearn.learning_curve import validation_curve
param_range = range(2, 40, 2)
training_scores, validation_scores = validation_curve(PCA(), X, None, param_name="n_components",
                                                      param_range=param_range, cv=10)

In [ ]:
from figures import plot_validation_curve
plot_validation_curve(param_range, training_scores, validation_scores)

In [ ]:


In [ ]: