Use the pseudocode you came up with in class to write your own 5-fold cross-validation function that splits the data set into


In [438]:
import pandas as pd
%matplotlib inline
from sklearn import datasets, tree, metrics
from sklearn.cross_validation import train_test_split
import matplotlib.pyplot as plt
import numpy as np

In [439]:
iris = datasets.load_iris()

In [440]:
iris.keys()


Out[440]:
dict_keys(['target', 'DESCR', 'data', 'feature_names', 'target_names'])

Don't forget to shuffle the input before assigning to the splits


In [441]:
x = iris.data[:,2:]
y = iris.target

In [442]:
from random import shuffle
# Given list1 and list2
x_shuf = []
y_shuf = []
index_shuf = list(range(len(x)))
shuffle(index_shuf)
for i in index_shuf:
    x_shuf.append(x[i])
    y_shuf.append(y[i])

In [443]:
#Splitting the lists

In [444]:
list_chunk_length = int(len(x_shuf)/5)

In [445]:
#Function 
def chunks(l, n):
    n = max(1, n)
    return [l[i:i + n] for i in range(0, len(l), n)]

In [446]:
x_chunks = chunks(x_shuf, list_chunk_length)
y_chunks = chunks(y_shuf, list_chunk_length)

 You can use the fit


In [447]:
dt = tree.DecisionTreeClassifier()

In [448]:
x_test = x_chunks[0] # the attributes
y_test = y_chunks[0] # the target variable

In [449]:
x_train = sum(x_chunks[1:], [])
y_train = sum(y_chunks[1:], [])

In [450]:
dt = dt.fit(x_train,y_train)

 Predicting and scoring


In [451]:
y_pred=dt.predict(x_test)

In [452]:
Accuracy_score = metrics.accuracy_score(y_test, y_pred)

In [453]:
print("Accuracy:{0:.3f}".format(metrics.accuracy_score(y_test, y_pred)),"\nClassification report:")
print(metrics.classification_report(y_test,y_pred),"\n")
print(metrics.confusion_matrix(y_test,y_pred),"\n")


Accuracy:0.933 
Classification report:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        14
          1       0.88      0.88      0.88         8
          2       0.88      0.88      0.88         8

avg / total       0.93      0.93      0.93        30
 

[[14  0  0]
 [ 0  7  1]
 [ 0  1  7]] 


In [454]:
Accuracy_score


Out[454]:
0.93333333333333335

Looping through the whole process?


In [455]:
#test = list1.pop(0)

In [456]:
#list1.append(test)

In [457]:
#list1

In [458]:
dt = tree.DecisionTreeClassifier()

Average_list = []

for x, y in zip(x_chunks, y_chunks):
    
    #Popping first item off the list
    x_test = x_chunks.pop(0)
    
    #Making one list out of many
    x_train = sum(x_chunks, [])
    
    #Adding the popped item back on to the orgininal list again. We need it.
    x_chunks.append(x_test)
    
    #Popping first item off the list
    y_test = y_chunks.pop(0)
    y_train = sum(y_chunks, [])
    
    #Popping it back on again
    y_chunks.append(y_test)
    
    #fitting training
    dt = dt.fit(x_train,y_train)
    
    #Predicting
    y_pred=dt.predict(x_test)
    
    #Getting the accurancy score
    Accuracy_score = metrics.accuracy_score(y_test, y_pred)
    
    #Creating a list of averages:
    Average_list.append(Accuracy_score)

In [459]:
#The average score:
sum(Average_list) / 5


Out[459]:
0.95333333333333337

Testing against the computer model


In [460]:
from sklearn.cross_validation import cross_val_score

In [461]:
scores = cross_val_score(dt,x,y,cv=5)

In [462]:
import numpy as np

In [463]:
np.mean(scores)


Out[463]:
0.94999999999999996

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [425]:
#My testing material

list1 =[[1, 2, 3, 4, 5], [6, 7, 8, 9 , 10], [11, 12, 13, 14, 15]]
list2 =[[1, 2, 3, 4, 5], [6, 7, 8, 9 , 10], [11, 12, 13, 14, 15]]

for x, y in zip(list1, list2):
    
    test = list1.pop(0)
    print(test)
    list1.append(test)
    print(list1)

    
    test2 = list2.pop(0)
    print(test2)
    list2.append(test2)
    print(list2)


[1, 2, 3, 4, 5]
[[6, 7, 8, 9, 10], [11, 12, 13, 14, 15], [1, 2, 3, 4, 5]]
[1, 2, 3, 4, 5]
[[6, 7, 8, 9, 10], [11, 12, 13, 14, 15], [1, 2, 3, 4, 5]]
[6, 7, 8, 9, 10]
[[11, 12, 13, 14, 15], [1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]
[6, 7, 8, 9, 10]
[[11, 12, 13, 14, 15], [1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]
[11, 12, 13, 14, 15]
[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]
[11, 12, 13, 14, 15]
[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]