In [ ]:

    
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
np.set_printoptions(precision=2)

Built-In and custom scoring functions

Using built-in scoring functions



In [ ]:

    
from sklearn.datasets import make_classification
from sklearn.cross_validation import train_test_split

X, y = make_classification(random_state=0)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)



In [ ]:

    
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(X_train, y_train)



In [ ]:

    
print(lr.score(X_test, y_test))



In [ ]:

    
pred = lr.predict(X_test)



In [ ]:

    
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, pred))

Binary confusion matrix:

True Positive (TP)	False Negative (FN)
False Positive (FP)	True Negative (TN)

$$ \text{precision} = \frac{TP}{FP + TP} $$$$ \text{recall} = \frac{TP}{FN + TP} $$$$ \text{accuracy} = \frac{TP + TN}{FP + FN + TP + TN} $$$$ f_1 = 2 \frac{\text{precision} \cdot \text{recall}}{\text{precision} + \text{recall}} $$



In [ ]:

    
from sklearn.metrics import classification_report
print(classification_report(y_test, pred))



In [ ]:

    
from sklearn.metrics import  precision_score, f1_score
print("precision: %f  f1_score: %f" % (precision_score(y_test, pred), f1_score(y_test, pred)))



In [ ]:

    
from sklearn.metrics import roc_auc_score, average_precision_score, log_loss

probs = lr.predict_proba(X_test)[:, 1]

print("area under the roc_curve: %f" % roc_auc_score(y_test, probs))
print("average precision: %f" % average_precision_score(y_test, probs))
print("log loss: %f" % log_loss(y_test, probs))

Scorers for cross-validation and grid-search



In [ ]:

    
from sklearn.metrics.scorer import SCORERS
print(SCORERS.keys())



In [ ]:

    
from sklearn.cross_validation import cross_val_score

cross_val_score(LogisticRegression(), X, y)



In [ ]:

    
print("Accuracy scoring: %s" % cross_val_score(LogisticRegression(), X, y, scoring="accuracy"))
print("F1 scoring: %s" % cross_val_score(LogisticRegression(), X, y, scoring="f1"))
print("AUC scoring: %s" % cross_val_score(LogisticRegression(), X, y, scoring="roc_auc"))
print("Log loss scoring: %s" % cross_val_score(LogisticRegression(), X, y, scoring="log_loss"))



In [ ]:

    
from sklearn.grid_search import GridSearchCV

param_grid = {'C': np.logspace(start=-3, stop=3, num=10)}
grid_search = GridSearchCV(LogisticRegression(), param_grid, scoring="log_loss")
grid_search.fit(X, y)



In [ ]:

    
grid_search.grid_scores_



In [ ]:

    
grid_search.best_params_

Defining your own scoring callable

From scratch



In [ ]:

    
def my_accuracy_scoring(est, X, y):
    return np.mean(est.predict(X) == y)

print(cross_val_score(LogisticRegression(), X, y))
print(cross_val_score(LogisticRegression(), X, y, scoring=my_accuracy_scoring))

From a score function



In [ ]:

    
from sklearn.metrics import fbeta_score
fbeta_score(y_test, pred, beta=10)



In [ ]:

    
from sklearn.metrics.scorer import make_scorer
my_fbeta_scorer = make_scorer(fbeta_score, beta=10)

print(cross_val_score(LogisticRegression(), X, y, scoring=my_fbeta_scorer))

Accessing the estimator



In [ ]:

    
def my_sparse_scoring(est, X, y):
    return np.mean(est.predict(X) == y) - np.mean(est.coef_ != 0)



In [ ]:

    
from sklearn.grid_search import GridSearchCV
from sklearn.svm import LinearSVC

grid = GridSearchCV(LinearSVC(C=.01, dual=False),
                    param_grid={'penalty' : ['l1', 'l2']},
                    scoring=my_sparse_scoring)
grid.fit(X, y)
print(grid.best_params_)