In [7]:
import numpy as np
import random
from sklearn import metrics
from sklearn import datasets
from sklearn import tree

In [8]:
iris = datasets.load_iris() # load iris data set

x = iris.data[:,2:]
y = iris.target

In [10]:
linked_data = list(zip(x, y))
random.shuffle(linked_data)
x, y = zip(*linked_data)

In [11]:
fold = 5
list_of_scores = []

#This is to do all the things to the things
for i in range(fold):
    x_test = []
    x_train = []
    y_test = []
    y_train = []
    start = int(len(x)/fold*i)
    stop = int((len(x)/fold*i)+(len(x)/fold))
    x_test = x[start:stop]
    x_train = x[stop:]+ x[:start]
    y_test = y[start:stop]
    y_train = y[stop:]+ y[:start]
    
    #Make a fit using the training data
    dt = tree.DecisionTreeClassifier().fit(x_train,y_train)
    
    #Make a y prediction based on the test data
    y_pred= dt.predict(x_test)
    
    #Compare the accuracy of prediction
    score = metrics.accuracy_score(y_test, y_pred)
    list_of_scores.append(score)
    
average_score = sum(list_of_scores)/len(list_of_scores)
print(average_score)


0.953333333333

In [122]:
#Now see how it compares to sklearn

In [136]:
from sklearn.cross_validation import cross_val_score
scores = cross_val_score(dt,x,y,cv=5)
np.mean(scores)


Out[136]:
0.94000000000000006

In [ ]: