Use the pseudocode you came up with in class to write your own 5-fold cross-validation function that splits the data set into



In [438]:

    
import pandas as pd
%matplotlib inline
from sklearn import datasets, tree, metrics
from sklearn.cross_validation import train_test_split
import matplotlib.pyplot as plt
import numpy as np



In [439]:

    
iris = datasets.load_iris()



In [440]:

    
iris.keys()









    Out[440]:





dict_keys(['target', 'DESCR', 'data', 'feature_names', 'target_names'])

Don't forget to shuffle the input before assigning to the splits



In [441]:

    
x = iris.data[:,2:]
y = iris.target



In [442]:

    
from random import shuffle
# Given list1 and list2
x_shuf = []
y_shuf = []
index_shuf = list(range(len(x)))
shuffle(index_shuf)
for i in index_shuf:
    x_shuf.append(x[i])
    y_shuf.append(y[i])



In [443]:

    
#Splitting the lists



In [444]:

    
list_chunk_length = int(len(x_shuf)/5)



In [445]:

    
#Function 
def chunks(l, n):
    n = max(1, n)
    return [l[i:i + n] for i in range(0, len(l), n)]



In [446]:

    
x_chunks = chunks(x_shuf, list_chunk_length)
y_chunks = chunks(y_shuf, list_chunk_length)

You can use the fit



In [447]:

    
dt = tree.DecisionTreeClassifier()



In [448]:

    
x_test = x_chunks[0] # the attributes
y_test = y_chunks[0] # the target variable



In [449]:

    
x_train = sum(x_chunks[1:], [])
y_train = sum(y_chunks[1:], [])



In [450]:

    
dt = dt.fit(x_train,y_train)

Predicting and scoring



In [451]:

    
y_pred=dt.predict(x_test)



In [452]:

    
Accuracy_score = metrics.accuracy_score(y_test, y_pred)



In [453]:

    
print("Accuracy:{0:.3f}".format(metrics.accuracy_score(y_test, y_pred)),"\nClassification report:")
print(metrics.classification_report(y_test,y_pred),"\n")
print(metrics.confusion_matrix(y_test,y_pred),"\n")









    



Accuracy:0.933 
Classification report:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        14
          1       0.88      0.88      0.88         8
          2       0.88      0.88      0.88         8

avg / total       0.93      0.93      0.93        30
 

[[14  0  0]
 [ 0  7  1]
 [ 0  1  7]]



In [454]:

    
Accuracy_score









    Out[454]:





0.93333333333333335

Looping through the whole process?



In [455]:

    
#test = list1.pop(0)



In [456]:

    
#list1.append(test)



In [457]:

    
#list1



In [458]:

    
dt = tree.DecisionTreeClassifier()

Average_list = []

for x, y in zip(x_chunks, y_chunks):
    
    #Popping first item off the list
    x_test = x_chunks.pop(0)
    
    #Making one list out of many
    x_train = sum(x_chunks, [])
    
    #Adding the popped item back on to the orgininal list again. We need it.
    x_chunks.append(x_test)
    
    #Popping first item off the list
    y_test = y_chunks.pop(0)
    y_train = sum(y_chunks, [])
    
    #Popping it back on again
    y_chunks.append(y_test)
    
    #fitting training
    dt = dt.fit(x_train,y_train)
    
    #Predicting
    y_pred=dt.predict(x_test)
    
    #Getting the accurancy score
    Accuracy_score = metrics.accuracy_score(y_test, y_pred)
    
    #Creating a list of averages:
    Average_list.append(Accuracy_score)



In [459]:

    
#The average score:
sum(Average_list) / 5









    Out[459]:





0.95333333333333337

Testing against the computer model



In [460]:

    
from sklearn.cross_validation import cross_val_score



In [461]:

    
scores = cross_val_score(dt,x,y,cv=5)



In [462]:

    
import numpy as np



In [463]:

    
np.mean(scores)









    Out[463]:





0.94999999999999996



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [425]:

    
#My testing material

list1 =[[1, 2, 3, 4, 5], [6, 7, 8, 9 , 10], [11, 12, 13, 14, 15]]
list2 =[[1, 2, 3, 4, 5], [6, 7, 8, 9 , 10], [11, 12, 13, 14, 15]]

for x, y in zip(list1, list2):
    
    test = list1.pop(0)
    print(test)
    list1.append(test)
    print(list1)

    
    test2 = list2.pop(0)
    print(test2)
    list2.append(test2)
    print(list2)









    



[1, 2, 3, 4, 5]
[[6, 7, 8, 9, 10], [11, 12, 13, 14, 15], [1, 2, 3, 4, 5]]
[1, 2, 3, 4, 5]
[[6, 7, 8, 9, 10], [11, 12, 13, 14, 15], [1, 2, 3, 4, 5]]
[6, 7, 8, 9, 10]
[[11, 12, 13, 14, 15], [1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]
[6, 7, 8, 9, 10]
[[11, 12, 13, 14, 15], [1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]
[11, 12, 13, 14, 15]
[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]
[11, 12, 13, 14, 15]
[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]