In [1]:
import pandas as pd
%matplotlib inline
from sklearn import cross_validation
from sklearn import datasets
from sklearn import tree
from sklearn import metrics
import numpy as np
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
In [2]:
iris = datasets.load_iris()
In [3]:
x = iris.data[:,2:]
y = iris.target
In [4]:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(x, y, stratify=y, random_state=42, test_size=0.25,train_size=0.75)
In [5]:
forest = RandomForestClassifier(n_estimators=100, random_state=0)
forest.fit(X_train, y_train)
Out[5]:
In [8]:
print("The Accuracy of the training set currently is: %f" % forest.score(X_train, y_train))
print("The Accuracy of the test set currently is: %f" % forest.score(X_test, y_test))
In [9]:
dt = tree.DecisionTreeClassifier()
In [10]:
x_train, x_test, y_train, y_test = cross_validation.train_test_split(x,y,test_size=0.25,train_size=0.75)
In [11]:
dt = dt.fit(x_train,y_train)
In [15]:
def performance_depicter(X,y,clf, show_accuracy=True, show_classification_report=True, show_confussion_matrix=True):
y_pred=clf.predict(X)
if show_accuracy:
print("Accuracy:{0:.3f}".format(metrics.accuracy_score(y, y_pred)),"\n")
if show_classification_report:
print("Classification report")
print(metrics.classification_report(y,y_pred),"\n")
if show_confussion_matrix:
print("Confusion matrix")
print(metrics.confusion_matrix(y,y_pred),"\n")
In [16]:
performance_depicter(x_train,y_train,dt)
In [17]:
performance_depicter(x_test,y_test,dt)
I think the Random Forest Model appear is less accurate regarding the training data set. The original decision tree appears to be more precise overall.
In [ ]: