In [1]:
import numpy as np
import sys
if "../" not in sys.path:
sys.path.append("../")
import pandas as pd
from sklearn.cross_validation import train_test_split
from supervised.naive_bayes import GaussianNB
# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
In [2]:
df = pd.read_csv('../dataset/pima-indians-diabetes.csv', header=None)
X, y = df[df.columns[:-1]], df[df.columns[-1]]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=2046)
In [3]:
model = GaussianNB()
model.fit(X_train, y_train)
In [4]:
y_pred = model.predict(X_test)
In [5]:
# accuracy
np.mean(y_test == y_pred)
# http://www.is.umk.pl/projects/datasets.html#Diabetes
# other algorithms on this dataset
Out[5]: