In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
np.set_printoptions(precision=2)
In [ ]:
from sklearn.datasets import make_classification
from sklearn.cross_validation import train_test_split
X, y = make_classification(random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
In [ ]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train, y_train)
In [ ]:
print(lr.score(X_test, y_test))
In [ ]:
pred = lr.predict(X_test)
In [ ]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, pred))
Binary confusion matrix:
True Positive (TP) | False Negative (FN) |
False Positive (FP) | True Negative (TN) |
In [ ]:
from sklearn.metrics import classification_report
print(classification_report(y_test, pred))
In [ ]:
from sklearn.metrics import precision_score, f1_score
print("precision: %f f1_score: %f" % (precision_score(y_test, pred), f1_score(y_test, pred)))
In [ ]:
from sklearn.metrics import roc_auc_score, average_precision_score, log_loss
probs = lr.predict_proba(X_test)[:, 1]
print("area under the roc_curve: %f" % roc_auc_score(y_test, probs))
print("average precision: %f" % average_precision_score(y_test, probs))
print("log loss: %f" % log_loss(y_test, probs))
In [ ]:
from sklearn.metrics.scorer import SCORERS
print(SCORERS.keys())
In [ ]:
from sklearn.cross_validation import cross_val_score
cross_val_score(LogisticRegression(), X, y)
In [ ]:
print("Accuracy scoring: %s" % cross_val_score(LogisticRegression(), X, y, scoring="accuracy"))
print("F1 scoring: %s" % cross_val_score(LogisticRegression(), X, y, scoring="f1"))
print("AUC scoring: %s" % cross_val_score(LogisticRegression(), X, y, scoring="roc_auc"))
print("Log loss scoring: %s" % cross_val_score(LogisticRegression(), X, y, scoring="log_loss"))
In [ ]:
from sklearn.grid_search import GridSearchCV
param_grid = {'C': np.logspace(start=-3, stop=3, num=10)}
grid_search = GridSearchCV(LogisticRegression(), param_grid, scoring="log_loss")
grid_search.fit(X, y)
In [ ]:
grid_search.grid_scores_
In [ ]:
grid_search.best_params_
In [ ]:
def my_accuracy_scoring(est, X, y):
return np.mean(est.predict(X) == y)
print(cross_val_score(LogisticRegression(), X, y))
print(cross_val_score(LogisticRegression(), X, y, scoring=my_accuracy_scoring))
In [ ]:
from sklearn.metrics import fbeta_score
fbeta_score(y_test, pred, beta=10)
In [ ]:
from sklearn.metrics.scorer import make_scorer
my_fbeta_scorer = make_scorer(fbeta_score, beta=10)
print(cross_val_score(LogisticRegression(), X, y, scoring=my_fbeta_scorer))
In [ ]:
def my_sparse_scoring(est, X, y):
return np.mean(est.predict(X) == y) - np.mean(est.coef_ != 0)
In [ ]:
from sklearn.grid_search import GridSearchCV
from sklearn.svm import LinearSVC
grid = GridSearchCV(LinearSVC(C=.01, dual=False),
param_grid={'penalty' : ['l1', 'l2']},
scoring=my_sparse_scoring)
grid.fit(X, y)
print(grid.best_params_)