In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("./../..")
In [2]:
%reload_ext yellowbrick
%matplotlib inline
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn.cross_validation import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import precision_recall_curve
from yellowbrick.style.palettes import get_color_cycle, PALETTES
from yellowbrick.style.colors import resolve_colors
from yellowbrick.base import ModelVisualizer
from yellowbrick.classifier import ThresholdVisualizer, thresholdviz
In [3]:
# Retrieve Data Set
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data', header=None)
df.rename(columns={57:'is_spam'}, inplace=True)
# Build the classifier and get the predictions
model = BernoulliNB(3)
X = df[[col for col in df.columns if col != 'is_spam']]
y = df['is_spam']
In [4]:
viz = ThresholdVisualizer(model, n_trials=100, title="Spam vs Ham Thresholds", quantiles=(0.10, 0.5, .9))
viz.fit_show(X, y)
Out[4]:
In [5]:
thresholdviz(model, X, y)
Out[5]:
In [ ]:
In [ ]:
In [ ]: