In [1]:
# If you see a numpy error because of numpy.dtype changed, fix it by installing pandas by hand with the following command
# pip install --no-binary pandas -I pandas
import seaborn as sns
import pandas as pd

In [2]:
# Load dataset
# This doesn't work, SSL error
# iris = sns.load_dataset('iris')
# I'm using a submodule for the seaborn data
iris = pd.read_csv('seaborn-data/iris.csv')

In [3]:
iris.head()


Out[3]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa

In [4]:
# Create the inputs with only the features
X_iris = iris.drop('species', axis=1)

In [6]:
# Show the shape of the input vectors
X_iris.shape


Out[6]:
(150, 4)

In [7]:
# Create the labels that we want to predict
Y_iris = iris['species']

In [8]:
# Show the shape of the label vector
Y_iris.shape


Out[8]:
(150,)

In [10]:
# Scikit-Learn Estimator API
from sklearn.naive_bayes import GaussianNB # 1. Choose model class
# Instantiate the model
model = GaussianNB()
# Fit the model
model.fit(X_iris, Y_iris)


Out[10]:
GaussianNB(priors=None)

In [ ]: