In [ ]:
from sklearn import datasets
from sklearn import metrics 
from sklearn.naive_bayes import GaussianNB 
#load the iris datasets 
dataset=datasets.load_iris() 
#fit a Naive Bayes model to the data 
model=GaussianNB() 
model.fit(dataset.data,dataset.target) 
print(model)
#makepredictions
expected=dataset.target 
predicted=model.predict(dataset.data) 
#summarize the fit of the model 
print(metrics.classification_report(expected,predicted)) 
print(metrics.confusion_matrix(expected,predicted))

Let's get back to the saga of Leo and Kate


In [ ]:
import pandas as pd
%matplotlib inline
import numpy as np

In [ ]:
titanic = pd.read_csv("data/titanic.csv")

In [ ]:
titanic['sex_female'] = titanic['sex'].apply(lambda x:1 if x=='female' else 0)

In [ ]:
dataset = titanic[['survived']].join([pd.get_dummies(titanic['pclass'],prefix="pclass"),titanic.sex_female])

In [ ]:
#drop pclass_1st to avoid dummy variable trap
x = np.asarray(dataset[['pclass_1st','pclass_2nd','pclass_3rd','sex_female']])
y = np.asarray(dataset['survived'])

In [ ]:
model.fit(x,y)

In [ ]:
expected = y 
predicted = model.predict(x)

In [ ]:
def measure_performance(X,y,clf, show_accuracy=True, show_classification_report=True, show_confussion_matrix=True):
    y_pred=clf.predict(X)
    if show_accuracy:
        print "Accuracy:{0:.3f}".format(metrics.accuracy_score(y, y_pred)),"\n"
    if show_classification_report:
        print "Classification report"
        print metrics.classification_report(y,y_pred),"\n"
    if show_confussion_matrix:
        print "Confusion matrix"
        print metrics.confusion_matrix(y,y_pred),"\n"

In [ ]:
measure_performance(x,y,model)

In [ ]: