linear_model.logisticRegression
svm.LinearSVC
In [1]:
import sklearn
import mglearn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
In [3]:
X, y = mglearn.datasets.make_forge()
In [10]:
fig, axes = plt.subplots(1, 2, figsize=(10,3))
for model, ax in zip([LinearSVC(), LogisticRegression()], axes):
clf = model.fit(X, y)
mglearn.plots.plot_2d_separator(clf, X, fill=False, eps=0.5,
ax=ax, alpha=0.7)
mglearn.discrete_scatter(X[:,0], X[:,1], y, ax=ax)
ax.set_title("{}".format(clf.__class__.__name__))
ax.set_xlabel("Feature 0")
ax.set_ylabel("Feature 1")
axes[0].legend(loc=4)
Out[10]:
In [11]:
mglearn.plots.plot_linear_svc_regularization()
In [19]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
In [20]:
print(cancer.keys())
In [21]:
print(cancer['target_names'])
In [22]:
print(cancer['feature_names'])
In [25]:
type(cancer)
Out[25]:
In [26]:
cancer.data.shape
Out[26]:
In [28]:
cancer_df = pd.DataFrame(X_train, columns=cancer.feature_names)
cancer_df.head()
Out[28]:
In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
cancer.data, cancer.target, stratify=cancer.target, random_state=42)
In [17]:
logreg = LogisticRegression().fit(X_train, y_train)
In [18]:
print("Training set score: {:.3f}".format(logreg.score(X_train, y_train)))
print("Test set score: {:.3f}".format(logreg.score(X_test, y_test)))
C=1
provides good performance for train and test sets
In [31]:
logreg100 = LogisticRegression(C=100).fit(X_train, y_train)
print("Training set score: {:.3f}".format(logreg100.score(X_train, y_train)))
print("Test set score: {:.3f}".format(logreg100.score(X_test, y_test)))
In [32]:
logreg001 = LogisticRegression(C=0.01).fit(X_train, y_train)
print("Training set score: {:.3f}".format(logreg001.score(X_train, y_train)))
print("Test set score: {:.3f}".format(logreg001.score(X_test, y_test)))
In [35]:
plt.plot(logreg.coef_.T, 'o', label="C=1")
plt.plot(logreg100.coef_.T, '^', label="C=100")
plt.plot(logreg001.coef_.T, 'v', label="C=0.01")
plt.xticks(range(cancer.data.shape[1]), cancer.feature_names, rotation=90)
plt.hlines(0,0, cancer.data.shape[1])
plt.ylim(-5, 5)
plt.xlabel("Coefficient Index")
plt.xlabel("Coefficient Magnitude")
plt.legend()
Out[35]:
In [37]:
for C, marker in zip([0.01, 1, 100], ['v', 'o', '^']):
lr_l1 = LogisticRegression(C=C, penalty="l1").fit(X_train, y_train)
print("Training accuracy of L1 logreg with C={:.3f}: {:.2f}".format(
C, lr_l1.score(X_train, y_train)))
print("Test accuracy of L1 logreg with C={:.3f}: {:.2f}".format(
C, lr_l1.score(X_test, y_test)))
plt.plot(lr_l1.coef_.T, marker, label="C={:.3f}".format(C))
plt.xticks(range(cancer.data.shape[1]), cancer.feature_names, rotation=90)
plt.hlines(0,0, cancer.data.shape[1])
plt.xlabel("Coefficient Index")
plt.xlabel("Coefficient Magnitude")
plt.ylim(-5, 5)
plt.legend()
Out[37]:
In [ ]: