In [ ]:
from sklearn import datasets
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB
#load the iris datasets
dataset=datasets.load_iris()
#fit a Naive Bayes model to the data
model=GaussianNB()
model.fit(dataset.data,dataset.target)
print(model)
#makepredictions
expected=dataset.target
predicted=model.predict(dataset.data)
#summarize the fit of the model
print(metrics.classification_report(expected,predicted))
print(metrics.confusion_matrix(expected,predicted))
In [ ]:
import pandas as pd
%matplotlib inline
import numpy as np
In [ ]:
titanic = pd.read_csv("data/titanic.csv")
In [ ]:
titanic['sex_female'] = titanic['sex'].apply(lambda x:1 if x=='female' else 0)
In [ ]:
dataset = titanic[['survived']].join([pd.get_dummies(titanic['pclass'],prefix="pclass"),titanic.sex_female])
In [ ]:
#drop pclass_1st to avoid dummy variable trap
x = np.asarray(dataset[['pclass_1st','pclass_2nd','pclass_3rd','sex_female']])
y = np.asarray(dataset['survived'])
In [ ]:
model.fit(x,y)
In [ ]:
expected = y
predicted = model.predict(x)
In [ ]:
def measure_performance(X,y,clf, show_accuracy=True, show_classification_report=True, show_confussion_matrix=True):
y_pred=clf.predict(X)
if show_accuracy:
print "Accuracy:{0:.3f}".format(metrics.accuracy_score(y, y_pred)),"\n"
if show_classification_report:
print "Classification report"
print metrics.classification_report(y,y_pred),"\n"
if show_confussion_matrix:
print "Confusion matrix"
print metrics.confusion_matrix(y,y_pred),"\n"
In [ ]:
measure_performance(x,y,model)
In [ ]: