In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, \
recall_score, f1_score, roc_auc_score, roc_curve, confusion_matrix
from sklearn.model_selection import train_test_split
%matplotlib inline
In [ ]:
def plot_roc_curve(roc_auc, fpr, tpr):
# Function to plot ROC Curve
# Inputs:
# roc_auc - AU ROC value (float)
# fpr - false positive rate (output of roc_curve()) array
# tpr - true positive rate (output of roc_curve()) array
plt.figure(figsize=(8,6))
lw = 2
plt.plot(fpr, tpr, color='orange', lw=lw, label='ROC curve (AUROC = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--', label='random')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.grid()
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()
In [ ]:
df = pd.read_csv('../data/exercise_dataset_LU11.csv')
print('Shape:', df.shape)
df.head()
In [ ]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 1:],
df.iloc[:, 0],
test_size=0.33,
random_state=42)
In [ ]:
# Code here:
In [ ]:
# Code here:
In [ ]:
# Code here for accuracy score, AU ROC:
In [ ]:
# Code here for ROC curve:
# Call plot_roc_curve():