In [1]:
import pandas as pd
%matplotlib inline

In [2]:
from sklearn import datasets
from pandas.tools.plotting import scatter_matrix

In [3]:
import matplotlib.pyplot as plt

In [4]:
iris = datasets.load_iris() # load iris data set

In [5]:
x = iris.data[:,2:] # the attributes
y = iris.target # the target variable

In [6]:
from sklearn import tree

In [7]:
dt = tree.DecisionTreeClassifier()

In [8]:
dt = dt.fit(x,y)

Now what?


In [9]:
from sklearn.cross_validation import train_test_split

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.33,train_size=0.66)

In [11]:
dt = dt.fit(x_train,y_train)

In [12]:
from sklearn import metrics

In [13]:
import numpy as np

In [14]:
def measure_performance(X,y,clf, show_accuracy=True, show_classification_report=True, show_confussion_matrix=True):
    y_pred=clf.predict(X)
    if show_accuracy:
        print("Accuracy:{0:.3f}".format(metrics.accuracy_score(y, y_pred)),"\n")
    if show_classification_report:
        print("Classification report")
        print(metrics.classification_report(y,y_pred),"\n")
    if show_confussion_matrix:
        print("Confusion matrix")
        print(metrics.confusion_matrix(y,y_pred),"\n")

In [15]:
measure_performance(x_test,y_test,dt) #measure on the test data (rather than train)


Accuracy:0.980 

Classification report
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        16
          1       0.95      1.00      0.97        18
          2       1.00      0.94      0.97        16

avg / total       0.98      0.98      0.98        50
 

Confusion matrix
[[16  0  0]
 [ 0 18  0]
 [ 0  1 15]] 


In [16]:
def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(iris.target_names))
    plt.xticks(tick_marks, iris.target_names, rotation=45)
    plt.yticks(tick_marks, iris.target_names)
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [17]:
y_pred = dt.fit(x_train, y_train).predict(x_test) #generate a prediction based on the model created to output a predicted y

In [18]:
cm = metrics.confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)
print('Confusion matrix, without normalization')
print(cm)
plt.figure()
plot_confusion_matrix(cm)


Confusion matrix, without normalization
[[16  0  0]
 [ 0 18  0]
 [ 0  1 15]]

In [ ]: