Using the readings, try and create a RandomForestClassifier for the iris dataset Using a 25/75 training/test split, compare the results with the original decision tree model and describe the result to the best of your ability in your PR
In [45]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_moons
from sklearn import cross_validation
import pandas as pd
%matplotlib inline
from sklearn import datasets
from sklearn import metrics
import matplotlib.pyplot as plt
import numpy as np
from sklearn import tree
In [4]:
iris = datasets.load_iris()
In [22]:
iris.data
Out[22]:
In [27]:
x = iris.data[:,2:] # the attributes
y = iris.target # the target variable
In [29]:
x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, stratify=y,random_state=42)
forest = RandomForestClassifier(n_estimators=5, random_state=2)
forest.fit(x_train, y_train)
Out[29]:
In [31]:
x_test
Out[31]:
In [8]:
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
max_depth=None, max_features='auto', max_leaf_nodes=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=5, n_jobs=1,
oob_score=False, random_state=2, verbose=0, warm_start=False)
Out[8]:
In [35]:
x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, random_state=0, test_size=0.25, train_size=0.75)
forest = RandomForestClassifier(n_estimators=100, random_state=0)
forest.fit(x_train, y_train)
Out[35]:
In [36]:
print("accuracy on training set: %f" % forest.score(x_train, y_train))
print("accuracy on test set: %f" % forest.score(x_test, y_test))
Decision Tree Validation
In [39]:
dt = tree.DecisionTreeClassifier()
In [41]:
x_train, x_test, y_train, y_test = cross_validation.train_test_split(x,y,test_size=0.25,train_size=0.75)
In [42]:
dt = dt.fit(x_train,y_train)
In [43]:
def measure_performance(X,y,clf, show_accuracy=True, show_classification_report=True, show_confussion_matrix=True):
y_pred=clf.predict(X)
if show_accuracy:
print("Accuracy:{0:.3f}".format(metrics.accuracy_score(y, y_pred)),"\n")
if show_classification_report:
print("Classification report")
print(metrics.classification_report(y,y_pred),"\n")
if show_confussion_matrix:
print("Confusion matrix")
print(metrics.confusion_matrix(y,y_pred),"\n")
In [46]:
measure_performance(x_test,y_test,dt)
In [47]:
measure_performance(x_train,y_train,dt)
In [ ]: