In [1]:
import os
import sys
import pandas as pd
sys.path.append("..")
DATA = os.path.normpath("../data/")
def load_data(name):
path = os.path.join(DATA, name, name + ".csv")
return pd.read_csv(path)
In [2]:
data = load_data("occupancy")
In [4]:
from yellowbrick.classifier import ConfusionMatrix
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split as tts
features = ["temperature", "relative humidity", "light", "C02", "humidity"]
# Extract the numpy arrays from the data frame
X = data[features].as_matrix()
y = data.occupancy.as_matrix()
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)
oz = ConfusionMatrix(LogisticRegression(), size=(1080, 720))
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()
In [ ]: