In [1]:
from evolutionary_search import EvolutionaryAlgorithmSearchCV
import sklearn.datasets
import numpy as np
import pandas as pd
data = sklearn.datasets.load_digits()
X = data["data"]
y = data["target"]
# make it a 2-class problem by only classifying the digit "5" vs the rest
y = np.array([1 if yy == 5 else 0 for yy in y])
X.shape, y.shape
Out[1]:
In [2]:
from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC
Parameter grid: 625 parameter combinations
In [3]:
paramgrid = {"kernel": ["rbf"],
"C" : np.logspace(-9, 9, num=25, base=10),
"gamma" : np.logspace(-9, 9, num=25, base=10)}
print("Size: ", len(paramgrid["kernel"])*len(paramgrid["C"])*len(paramgrid["gamma"]))
In [4]:
cv = GridSearchCV(estimator=SVC(),
param_grid=paramgrid,
scoring="accuracy",
cv=StratifiedKFold(n_splits=2),
verbose=1)
%time cv.fit(X, y)
Out[4]:
In [5]:
cv.best_score_, cv.best_params_
Out[5]:
An example of the "cannonical" cvresults table in sklearn:
In [6]:
pd.DataFrame(cv.cv_results_).sort_values("mean_test_score", ascending=False).head()
Out[6]:
Same parameter space, but only test 250 random combinations.
In [7]:
cv = RandomizedSearchCV(estimator=SVC(),
param_distributions=paramgrid,
n_iter=250,
scoring="accuracy",
cv=StratifiedKFold(n_splits=2),
verbose=1)
%time cv.fit(X, y)
Out[7]:
In [8]:
cv.best_score_, cv.best_params_
Out[8]:
An example of the "cannonical" cvresults table in sklearn:
In [9]:
pd.DataFrame(cv.cv_results_).sort_values("mean_test_score", ascending=False).head()
Out[9]:
Again same parameter space, optimize for 10 generations.
In [10]:
if __name__=="__main__":
#pool = Pool(4)
cv = EvolutionaryAlgorithmSearchCV(estimator=SVC(),
params=paramgrid,
scoring="accuracy",
cv=StratifiedKFold(n_splits=2),
verbose=True,
population_size=50,
gene_mutation_prob=0.10,
tournament_size=3,
generations_number=10)
#pmap = pool.map)
%time cv.fit(X, y)
In [11]:
cv.best_score_, cv.best_params_
Out[11]:
Our cvresults table (note, includes all individuals with their mean, max, min, and std test score).
In [12]:
pd.DataFrame(cv.cv_results_).sort_values("mean_test_score", ascending=False).head()
Out[12]:
In [ ]: