In [1]:
%matplotlib inline
In [2]:
# Performs and exhaustive search throuh scikit-learn using
# built-in methods. Also, walks through using randomized
# optimization as an alternative to brute force search.
In [3]:
from sklearn.datasets import make_classification
In [4]:
X, y = make_classification(1000, n_features=5)
from sklearn.linear_model import LogisticRegression
In [5]:
lr = LogisticRegression(class_weight='auto')
In [6]:
# Specify the parameters that we want to search. For GridSearch,
# we can just specify the ranges we care about, but for
# RandomizedSearchCV, we'll need to actually specify the
# distribution over the same space from which to sample.
In [7]:
lr.fit(X, y)
Out[7]:
In [8]:
grid_search_params = {'penalty': ['l1', 'l2'],
'C': [1,2,3,4]}
In [9]:
import scipy.stats as st
import numpy as np
In [10]:
random_search_params = {'penalty': ['l1', 'l2'],
'C': st.randint(1,4)}
In [11]:
from sklearn.grid_search import GridSearchCV, RandomizedSearchCV
In [12]:
gs = GridSearchCV(lr, grid_search_params)
In [13]:
gs.fit(X, y)
Out[13]:
In [14]:
# We want to find the optimal set of parameters. We can also look
# at the marginal performance of the grid search.
In [15]:
gs.grid_scores_
Out[15]:
In [16]:
# get the max score:
gs.grid_scores_[1][1]
Out[16]:
In [17]:
max(gs.grid_scores_, key=lambda x: x[1])
Out[17]:
In [ ]:
# the choices obtained are the best choices for our logistic
# regression classifier.