notebook.community

Edit and run



In [1]:

    
%matplotlib inline 

import matplotlib.pyplot as plt 

import pandas as pd

import sklearn.datasets
from sklearn.model_selection import train_test_split as tts

from sklearn.ensemble import RandomForestClassifier

from yellowbrick.classifier import ClassBalance
from yellowbrick.classifier import ClassPredictionError
from yellowbrick.classifier import ClassificationReport

from ipywidgets import interact









    



/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.
  from numpy.core.umath_tests import inner1d



In [2]:

    
import os

from yellowbrick.download import download_all

## The path to the test data sets
FIXTURES  = os.path.join(os.getcwd(), "data")

## Dataset loading mechanisms
datasets = {
    "occupancy": os.path.join(FIXTURES, "occupancy", "occupancy.csv"),
}


def load_data(name, download=True):
    """
    Loads and wrangles the passed in dataset by name.
    If download is specified, this method will download any missing files.
    """

    # Get the path from the datasets
    path = datasets[name]

    # Check if the data exists, otherwise download or raise
    if not os.path.exists(path):
        if download:
            download_all()
        else:
            raise ValueError((
                "'{}' dataset has not been downloaded, "
                "use the download.py module to fetch datasets"
            ).format(name))


    # Return the data frame
    return pd.read_csv(path)



In [3]:

    
# Load the classification data set
data = load_data("occupancy")

# Specify the features of interest and the classes of the target
features = ["temperature", "relative humidity", "light", "C02", "humidity"]
classes = ["unoccupied", "occupied"]

# Extract the numpy arrays from the data frame
X = data[features].as_matrix()
y = data.occupancy.as_matrix()

# Create the train and test data
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)









    



/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/ipykernel_launcher.py:9: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  if __name__ == '__main__':
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/ipykernel_launcher.py:10: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  # Remove the CWD from sys.path while we load stuff.



In [4]:

    
models = {
    cls.__name__: cls
    for cls in (ClassBalance, ClassPredictionError, ClassificationReport)
}

@interact(model=list(models.keys()))

def graph_classifers(model="ClassBalance"):
    _, axes = plt.subplots(ncols=1, figsize=(15,5))  
        
    viz = models[model](RandomForestClassifier(), classes=classes)
    viz.fit(X_train, y_train)
    viz.score(X_test, y_test)
    viz.finalize()
    
    return axes



In [ ]: