In [1]:
import numpy as np
import sys
if "../" not in sys.path:
  sys.path.append("../")

import pandas as pd
from sklearn.cross_validation import train_test_split

from supervised.naive_bayes import GaussianNB
# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2


/Users/mac/anaconda/envs/py35/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [2]:
df = pd.read_csv('../dataset/pima-indians-diabetes.csv', header=None)

X, y = df[df.columns[:-1]], df[df.columns[-1]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=2046)

In [3]:
model = GaussianNB()
model.fit(X_train, y_train)

In [4]:
y_pred = model.predict(X_test)

In [5]:
# accuracy
np.mean(y_test == y_pred)

# http://www.is.umk.pl/projects/datasets.html#Diabetes
# other algorithms on this dataset


Out[5]:
0.76377952755905509