In [12]:
%matplotlib notebook
import random
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
In [ ]:
class Estimator(object):
def get_params(self):
"""
Returns a dictionary of all hyperparams
"""
pass
def set_params(self, **params):
"""
Sets hyperparams on the instance
"""
pass
def fit(self, X, y=None, **kwargs):
"""
Fit the estimator from data
"""
# Modify state of self
return self
In [ ]:
class Predictor(Estimator):
def predict(self, X, **kwargs):
"""
Predict values for each row in X
"""
return y_hat
In [ ]:
class Model(Predictor):
def score(self, X, y=None, **kwargs):
"""
Return a score between 0 and 1
"""
return score
In [ ]:
class Transformer(Estimator):
def transform(self, X, **kwargs):
"""
Modifies X and returns a new X'
"""
return X_prime
In [5]:
# Load dataset
from sklearn import datasets
data = datasets.load_digits()
In [26]:
def show_instance(instance):
fig, ax = plt.subplots(figsize=(3, 3))
ax.imshow(instance, cmap=plt.cm.gray_r, interpolation='nearest')
return ax
# Show random instance
instance = random.choice(data.images)
_ = show_instance(instance)
In [58]:
from sklearn.model_selection import train_test_split as tts
# Identify our data and target
X = data.data
y = data.target
# Create random train and test splits
splits = tts(X, y, test_size=0.2)
X_train, X_test, y_train, y_test = splits
In [60]:
print("Training Data X: {}, y: {}".format(X_train.shape, y_train.shape))
print("Testing Data X: {}, y: {}".format(X_test.shape, y_test.shape))
In [101]:
# Fit a support vector classifier
from sklearn.svm import SVC
model = SVC()
model.fit(X_train,y_train)
print(model.score(X_test, y_test))
model
Out[101]:
In [82]:
def predict_random(model, X=X, y=y):
# Make a prediction on a random sample
idx = random.randint(0, len(X))
g = show_instance(data.images[idx])
yhat = model.predict([X[idx]])[0]
ytrue = y[idx]
print("Prediction: {} Actual: {} ({})".format(yhat, ytrue, model.__class__.__name__))
return g
In [84]:
predict_random(model)
Out[84]:
In [100]:
# Fit a random forest classifier
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train,y_train)
print(model.score(X_test, y_test))
model
Out[100]:
In [86]:
predict_random(model)
Out[86]:
In [102]:
from sklearn.metrics import classification_report
y_hat = model.predict(X_test)
print(classification_report(y_test, y_hat))
In [93]:
import time
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
In [94]:
def fit_model(model, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test):
start = time.time()
model.fit(X_train, y_train)
duration = time.time() - start
score = model.score(X_test, y_test)
print("{} fit in {:0.2f} seconds score: {:0.4f}".format(model.__class__.__name__, duration, score))
print(model.get_params())
In [99]:
models = (
LogisticRegression(),
SVC(),
RandomForestClassifier(),
Perceptron(),
KNeighborsClassifier(),
KNeighborsClassifier(n_neighbors=15),
KNeighborsClassifier(n_neighbors=2),
GaussianNB(),
MultinomialNB(),
)
for model in models:
fit_model(model)
print()