In [1]:
import pandas as pd
%matplotlib inline
from sklearn import datasets
from sklearn import tree
import matplotlib.pyplot as plt
import numpy as np

In [2]:
iris = datasets.load_iris()

In [3]:
x = iris.data[:,2:] # the attributes
y = iris.target # the target variable

In [14]:
from sklearn.ensemble import RandomForestClassifier

In [15]:
dt = tree.DecisionTreeClassifier()

In [6]:
dt = dt.fit(x,y)

In [7]:
from sklearn.cross_validation import train_test_split

In [19]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.25,train_size=0.75)

In [20]:
dt = dt.fit(x_train,y_train)

In [21]:
from sklearn import metrics

In [22]:
import numpy as np

In [23]:
def measure_performance(X,y,clf, show_accuracy=True, show_classification_report=True, show_confussion_matrix=True):
    y_pred=clf.predict(X)
    if show_accuracy:
        print("Accuracy:{0:.3f}".format(metrics.accuracy_score(y, y_pred)),"\n")
    if show_classification_report:
        print("Classification report")
        print(metrics.classification_report(y,y_pred),"\n")
    if show_confussion_matrix:
        print("Confusion matrix")
        print(metrics.confusion_matrix(y,y_pred),"\n")

In [24]:
measure_performance(x_test,y_test,dt)


Accuracy:0.921 

Classification report
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        11
          1       0.82      1.00      0.90        14
          2       1.00      0.77      0.87        13

avg / total       0.93      0.92      0.92        38
 

Confusion matrix
[[11  0  0]
 [ 0 14  0]
 [ 0  3 10]] 


In [25]:
forest=RandomForestClassifier(n_estimators=5)

In [ ]:


In [26]:
forest.fit(x_train, y_train)


Out[26]:
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=5, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [27]:
print("Acurracy on Training Set: %f" % forest.score(x_train, y_train))
print("Acurracy on Test Set: %f" % forest.score(x_test, y_test))


Acurracy on Training Set: 0.991071
Acurracy on Test Set: 0.921053

In [ ]: