In [1]:
%matplotlib inline
import seaborn as sns
In [2]:
sns.set_context('notebook', font_scale=1.5)
An image classification example
opencv
and scikit-image
for feature extractionkeras
for feature augmentationscikit-learn
for classificationflask
for web applicationscikit-learn
scikit-learn
classesfit
transform
predict
fit_transform
for transformationsfit_predict
for clusteringscore
for classification and regressionget_params
set_params
In [3]:
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import RidgeClassifierCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
In [4]:
import pandas as pd
In [5]:
iris = pd.read_csv('iris.csv')
iris.head()
Out[5]:
In [6]:
sns.pairplot(iris, hue='Species')
pass
In [7]:
X = iris.iloc[:, :4].values
y = iris.iloc[:, 4].astype('category').cat.codes.values
In [8]:
X[:3]
Out[8]:
In [9]:
y[:3]
Out[9]:
In [10]:
poly = PolynomialFeatures(2)
X_poly = poly.fit_transform(X)
In [11]:
X_poly[:3]
Out[11]:
In [12]:
scaler = StandardScaler()
X_poly_scaled = scaler.fit_transform(X_poly)
In [13]:
X_poly_scaled[:3]
Out[13]:
In [14]:
selector = VarianceThreshold(threshold=0.1)
X_new = selector.fit_transform(X_poly_scaled)
In [15]:
X_poly_scaled.shape, X_new.shape
Out[15]:
In [16]:
X_train, X_test, y_train, y_test = train_test_split(X_new, y, random_state=1)
In [17]:
X_train[:3]
Out[17]:
In [18]:
y_train[:3]
Out[18]:
In [19]:
X_test[:3]
Out[19]:
In [20]:
y_test[:3]
Out[20]:
In [21]:
alphas = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
clf = RidgeClassifierCV(alphas=alphas, cv=5)
In [22]:
clf.fit(X_train, y_train)
Out[22]:
In [23]:
y_pred = clf.predict(X_test)
In [24]:
print(classification_report(y_test, y_pred))
In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
In [26]:
pipe = Pipeline([
('polynomaial_features', PolynomialFeatures(2)),
('standard_scalar', StandardScaler()),
('feature_selection', VarianceThreshold(threshold=0.1)),
('classification', clf)
])
In [27]:
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
print(classification_report(y_test, y_pred))
In [28]:
params = {'n_estimators': [5, 10, 25], 'max_depth': [1, 3, None]}
rf = RandomForestClassifier()
clf2 = GridSearchCV(rf, params, cv=5, n_jobs=-1)
In [29]:
pipe2 = Pipeline([
('polynomaial_features', PolynomialFeatures(2)),
('feature_selection', VarianceThreshold(threshold=0.1)),
('classification', clf2)
])
In [30]:
pipe2.fit(X_train, y_train)
y_pred2 = pipe2.predict(X_test)
print(classification_report(y_test, y_pred2))
In [31]:
classifier = pipe2.named_steps['classification']
classifier.best_params_
Out[31]: