In [27]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics
from sklearn import svm, datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
import matplotlib.pyplot as plt

breast_cancer = load_breast_cancer()

print(breast_cancer.target.shape)


(569,)

In [52]:
random_indices = np.random.choice(breast_cancer.target.shape[0], 200, replace=True)

X = breast_cancer.data[random_indices]
y = breast_cancer.target[random_indices]

In [63]:
# load data
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25)

# model can be any trained classifier that supports predict_proba()
clf = LogisticRegression()
clf.fit(X_train, y_train)

y_preds = clf.predict_proba(X_test)

# take the second column because the classifier outputs scores for
# the 0 class as well
preds = y_preds[:,1]

# fpr means false-positive-rate
# tpr means true-positive-rate
fpr, tpr, _ = metrics.roc_curve(y_test, preds)

auc_score = metrics.auc(fpr, tpr)

plt.title('ROC Curve')
plt.plot(fpr, tpr, label='AUC = {:.2f}'.format(auc_score))

# it's helpful to add a diagonal to indicate where chance 
# scores lie (i.e. just flipping a coin)
plt.plot([0,1],[0,1],'r--')

plt.xlim([-0.1,1.1])
plt.ylim([-0.1,1.1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.legend(loc='lower right',fontsize=14)

plt.gcf().set_size_inches(7,5)

plt.show()



In [ ]: