notebook.community

Edit and run



In [1]:

    
import pandas as pd
%matplotlib inline
from sklearn import datasets
from sklearn import tree
import matplotlib.pyplot as plt
import numpy as np



In [2]:

    
iris = datasets.load_iris()



In [3]:

    
x = iris.data[:,2:] # the attributes
y = iris.target # the target variable



In [14]:

    
from sklearn.ensemble import RandomForestClassifier



In [15]:

    
dt = tree.DecisionTreeClassifier()



In [6]:

    
dt = dt.fit(x,y)



In [7]:

    
from sklearn.cross_validation import train_test_split



In [19]:

    
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.25,train_size=0.75)



In [20]:

    
dt = dt.fit(x_train,y_train)



In [21]:

    
from sklearn import metrics



In [22]:

    
import numpy as np



In [23]:

    
def measure_performance(X,y,clf, show_accuracy=True, show_classification_report=True, show_confussion_matrix=True):
    y_pred=clf.predict(X)
    if show_accuracy:
        print("Accuracy:{0:.3f}".format(metrics.accuracy_score(y, y_pred)),"\n")
    if show_classification_report:
        print("Classification report")
        print(metrics.classification_report(y,y_pred),"\n")
    if show_confussion_matrix:
        print("Confusion matrix")
        print(metrics.confusion_matrix(y,y_pred),"\n")



In [24]:

    
measure_performance(x_test,y_test,dt)









    



Accuracy:0.921 

Classification report
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        11
          1       0.82      1.00      0.90        14
          2       1.00      0.77      0.87        13

avg / total       0.93      0.92      0.92        38
 

Confusion matrix
[[11  0  0]
 [ 0 14  0]
 [ 0  3 10]]



In [25]:

    
forest=RandomForestClassifier(n_estimators=5)



In [ ]:



In [26]:

    
forest.fit(x_train, y_train)









    Out[26]:





RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=5, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)



In [27]:

    
print("Acurracy on Training Set: %f" % forest.score(x_train, y_train))
print("Acurracy on Test Set: %f" % forest.score(x_test, y_test))









    



Acurracy on Training Set: 0.991071
Acurracy on Test Set: 0.921053



In [ ]: