notebook.community

Edit and run



In [7]:

    
import numpy as np
import random
from sklearn import metrics
from sklearn import datasets
from sklearn import tree



In [8]:

    
iris = datasets.load_iris() # load iris data set

x = iris.data[:,2:]
y = iris.target



In [10]:

    
linked_data = list(zip(x, y))
random.shuffle(linked_data)
x, y = zip(*linked_data)



In [11]:

    
fold = 5
list_of_scores = []

#This is to do all the things to the things
for i in range(fold):
    x_test = []
    x_train = []
    y_test = []
    y_train = []
    start = int(len(x)/fold*i)
    stop = int((len(x)/fold*i)+(len(x)/fold))
    x_test = x[start:stop]
    x_train = x[stop:]+ x[:start]
    y_test = y[start:stop]
    y_train = y[stop:]+ y[:start]
    
    #Make a fit using the training data
    dt = tree.DecisionTreeClassifier().fit(x_train,y_train)
    
    #Make a y prediction based on the test data
    y_pred= dt.predict(x_test)
    
    #Compare the accuracy of prediction
    score = metrics.accuracy_score(y_test, y_pred)
    list_of_scores.append(score)
    
average_score = sum(list_of_scores)/len(list_of_scores)
print(average_score)









    



0.953333333333



In [122]:

    
#Now see how it compares to sklearn



In [136]:

    
from sklearn.cross_validation import cross_val_score
scores = cross_val_score(dt,x,y,cv=5)
np.mean(scores)









    Out[136]:





0.94000000000000006



In [ ]: