In [35]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

EstimatorCV Objects for Efficient Parameter Search

Recursive Feature Eliminiation


In [36]:
from sklearn.datasets import load_iris
from sklearn.cross_validation import train_test_split

iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=0)

In [37]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

feature_elimination_lr = RFE(LogisticRegression(C=100), n_features_to_select=2)

In [38]:
feature_elimination_lr.fit(X_train, y_train)
feature_elimination_lr.score(X_test, y_test)


Out[38]:
0.94736842105263153

In [39]:
from sklearn.grid_search import GridSearchCV

param_grid = {'n_features_to_select': range(1, 5)}
grid_search = GridSearchCV(feature_elimination_lr, param_grid, cv=5)
grid_search.fit(X_train, y_train)
grid_search.score(X_test, y_test)


Out[39]:
0.97368421052631582

In [40]:
grid_search.best_params_


Out[40]:
{'n_features_to_select': 4}

In [41]:
from sklearn.feature_selection import RFECV
rfecv = RFECV(LogisticRegression(C=100)).fit(X_train, y_train)
rfecv.score(X_test, y_test)


Out[41]:
0.97368421052631582

In [42]:
rfecv.n_features_


Out[42]:
4

Efficient hyper-parameter selection for Lasso


In [43]:
from sklearn.datasets import make_regression
X, y = make_regression(noise=60, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [44]:
from sklearn.linear_model import Lasso, LassoCV
lasso = Lasso().fit(X_train, y_train)
print("lasso score with default alpha: %f" % lasso.score(X_test, y_test))

lassocv =  LassoCV().fit(X_train, y_train)
print("lasso score with automatic alpha: %f" % lassocv.score(X_test, y_test))


lasso score with default alpha: 0.691687
lasso score with automatic alpha: 0.776175

In [45]:
grid_search = GridSearchCV(Lasso(), param_grid={'alpha': np.logspace(-5, 1, 20)})
grid_search.fit(X_train, y_train)
print("lasso score with grid-searched alpha: %f" % grid_search.score(X_test, y_test))


lasso score with grid-searched alpha: 0.755467

In [46]:
print("best alpha found by LassoCV: %f" % lassocv.alpha_)
print("best alpha found by GridSearchCV: %f" % grid_search.best_params_['alpha'])


best alpha found by LassoCV: 8.042118
best alpha found by GridSearchCV: 10.000000

In [47]:
%timeit Lasso().fit(X_train, y_train)


100 loops, best of 3: 3.44 ms per loop

In [48]:
%timeit LassoCV().fit(X_train, y_train)


10 loops, best of 3: 159 ms per loop

In [49]:
%timeit grid_search.fit(X_train, y_train)


1 loops, best of 3: 524 ms per loop

In [ ]: