In [29]:
import pandas as pd
%matplotlib inline
from sklearn import datasets
from pandas.tools.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import tree
In [2]:
iris = datasets.load_iris()
In [3]:
x = iris.data[:,2:]
y = iris.target
In [5]:
plt.figure(2, figsize=(8, 6))
plt.scatter(x[:, 0], x[:, 1], c=y, cmap=plt.cm.CMRmap)
plt.xlabel('Petal length (cm)')
plt.ylabel('Petal width (cm)')
Out[5]:
In [8]:
dt = tree.DecisionTreeClassifier()
dt = dt.fit(x,y)
In [30]:
from sklearn.cross_validation import train_test_split
In [13]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.5,train_size=0.5)
dt = dt.fit(x_train,y_train)
In [31]:
from sklearn import metrics
import numpy as np
In [16]:
def measure_performance(X,y,clf, show_accuracy=True, show_classification_report=True, show_confussion_matrix=True):
y_pred=clf.predict(X)
if show_accuracy:
print("Accuracy:{0:.5f}".format(metrics.accuracy_score(y, y_pred)),"\n")
if show_classification_report:
print("Classification report")
print(metrics.classification_report(y,y_pred),"\n")
if show_confussion_matrix:
print("Confusion matrix")
print(metrics.confusion_matrix(y,y_pred),"\n")
measure_performance(x_test,y_test,dt)
In [17]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.25,train_size=0.75)
dt = dt.fit(x_train,y_train)
def measure_performance(X,y,clf, show_accuracy=True, show_classification_report=True, show_confussion_matrix=True):
y_pred=clf.predict(X)
if show_accuracy:
print("Accuracy:{0:.75f}".format(metrics.accuracy_score(y, y_pred)),"\n")
if show_classification_report:
print("Classification report")
print(metrics.classification_report(y,y_pred),"\n")
if show_confussion_matrix:
print("Confusion matrix")
print(metrics.confusion_matrix(y,y_pred),"\n")
measure_performance(x_test,y_test,dt)
datasets.load_breast_cancer()) and perform basic exploratory analysis. What attributes to we have? What are we trying to predict?For context of the data, see the documentation here: https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29
In [32]:
breast = datasets.load_breast_cancer()
breast.data
Out[32]:
In [33]:
x = breast.data[:,:] # the attributes
y = breast.target
plt.figure(2, figsize=(8, 6))
plt.scatter(x[:, 0], x[:, 1], c=y, cmap=plt.cm.CMRmap)
Out[33]:
In [ ]:
In [34]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.5,train_size=0.5)
dt = dt.fit(x_train,y_train)
def measure_performance(X,y,clf, show_accuracy=True, show_classification_report=True, show_confussion_matrix=True):
y_pred=clf.predict(X)
if show_accuracy:
print("Accuracy:{0:.5f}".format(metrics.accuracy_score(y, y_pred)),"\n")
if show_classification_report:
print("Classification report")
print(metrics.classification_report(y,y_pred),"\n")
if show_confussion_matrix:
print("Confusion matrix")
print(metrics.confusion_matrix(y,y_pred),"\n")
measure_performance(x_test,y_test,dt)
In [35]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.25,train_size=0.75)
dt = dt.fit(x_train,y_train)
def measure_performance(X,y,clf, show_accuracy=True, show_classification_report=True, show_confussion_matrix=True):
y_pred=clf.predict(X)
if show_accuracy:
print("Accuracy:{0:.75f}".format(metrics.accuracy_score(y, y_pred)),"\n")
if show_classification_report:
print("Classification report")
print(metrics.classification_report(y,y_pred),"\n")
if show_confussion_matrix:
print("Confusion matrix")
print(metrics.confusion_matrix(y,y_pred),"\n")
measure_performance(x_test,y_test,dt)
In [ ]: