In [1]:
%matplotlib inline

In [2]:
# Performs and exhaustive search throuh scikit-learn using
# built-in methods. Also, walks through using randomized
# optimization as an alternative to brute force search.

In [3]:
from sklearn.datasets import make_classification

In [4]:
X, y = make_classification(1000, n_features=5)
from sklearn.linear_model import LogisticRegression

In [5]:
lr = LogisticRegression(class_weight='auto')

In [6]:
# Specify the parameters that we want to search. For GridSearch,
# we can just specify the ranges we care about, but for
# RandomizedSearchCV, we'll need to actually specify the
# distribution over the same space from which to sample.

In [7]:
lr.fit(X, y)


Out[7]:
LogisticRegression(C=1.0, class_weight='auto', dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr',
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0)

In [8]:
grid_search_params = {'penalty': ['l1', 'l2'],
                      'C': [1,2,3,4]}

In [9]:
import scipy.stats as st
import numpy as np

In [10]:
random_search_params = {'penalty': ['l1', 'l2'],
                        'C': st.randint(1,4)}

In [11]:
from sklearn.grid_search import GridSearchCV, RandomizedSearchCV

In [12]:
gs = GridSearchCV(lr, grid_search_params)

In [13]:
gs.fit(X, y)


Out[13]:
GridSearchCV(cv=None, error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight='auto', dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr',
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0),
       fit_params={}, iid=True, loss_func=None, n_jobs=1,
       param_grid={'penalty': ['l1', 'l2'], 'C': [1, 2, 3, 4]},
       pre_dispatch='2*n_jobs', refit=True, score_func=None, scoring=None,
       verbose=0)

In [14]:
# We want to find the optimal set of parameters. We can also look
# at the marginal performance of the grid search.

In [15]:
gs.grid_scores_


Out[15]:
[mean: 0.95600, std: 0.00148, params: {'penalty': 'l1', 'C': 1},
 mean: 0.95600, std: 0.00148, params: {'penalty': 'l2', 'C': 1},
 mean: 0.95700, std: 0.00136, params: {'penalty': 'l1', 'C': 2},
 mean: 0.95600, std: 0.00148, params: {'penalty': 'l2', 'C': 2},
 mean: 0.95700, std: 0.00136, params: {'penalty': 'l1', 'C': 3},
 mean: 0.95600, std: 0.00148, params: {'penalty': 'l2', 'C': 3},
 mean: 0.95700, std: 0.00136, params: {'penalty': 'l1', 'C': 4},
 mean: 0.95600, std: 0.00148, params: {'penalty': 'l2', 'C': 4}]

In [16]:
# get the max score:
gs.grid_scores_[1][1]


Out[16]:
0.95599999999999996

In [17]:
max(gs.grid_scores_, key=lambda x: x[1])


Out[17]:
mean: 0.95700, std: 0.00136, params: {'penalty': 'l1', 'C': 2}

In [ ]:
# the choices obtained are the best choices for our logistic
# regression classifier.