In [2]:
import pandas as pd
%matplotlib inline
from sklearn import datasets
from sklearn import tree
from sklearn.utils import shuffle
from sklearn import metrics
import matplotlib.pyplot as plt
import numpy as np
import random

In [3]:
iris = datasets.load_iris()

In [4]:
x = iris.data[:,2:] 
y = iris.target

In [5]:
input_dataset = np.column_stack([x,y])

In [6]:
input_dataset


Out[6]:
array([[ 1.4,  0.2,  0. ],
       [ 1.4,  0.2,  0. ],
       [ 1.3,  0.2,  0. ],
       [ 1.5,  0.2,  0. ],
       [ 1.4,  0.2,  0. ],
       [ 1.7,  0.4,  0. ],
       [ 1.4,  0.3,  0. ],
       [ 1.5,  0.2,  0. ],
       [ 1.4,  0.2,  0. ],
       [ 1.5,  0.1,  0. ],
       [ 1.5,  0.2,  0. ],
       [ 1.6,  0.2,  0. ],
       [ 1.4,  0.1,  0. ],
       [ 1.1,  0.1,  0. ],
       [ 1.2,  0.2,  0. ],
       [ 1.5,  0.4,  0. ],
       [ 1.3,  0.4,  0. ],
       [ 1.4,  0.3,  0. ],
       [ 1.7,  0.3,  0. ],
       [ 1.5,  0.3,  0. ],
       [ 1.7,  0.2,  0. ],
       [ 1.5,  0.4,  0. ],
       [ 1. ,  0.2,  0. ],
       [ 1.7,  0.5,  0. ],
       [ 1.9,  0.2,  0. ],
       [ 1.6,  0.2,  0. ],
       [ 1.6,  0.4,  0. ],
       [ 1.5,  0.2,  0. ],
       [ 1.4,  0.2,  0. ],
       [ 1.6,  0.2,  0. ],
       [ 1.6,  0.2,  0. ],
       [ 1.5,  0.4,  0. ],
       [ 1.5,  0.1,  0. ],
       [ 1.4,  0.2,  0. ],
       [ 1.5,  0.1,  0. ],
       [ 1.2,  0.2,  0. ],
       [ 1.3,  0.2,  0. ],
       [ 1.5,  0.1,  0. ],
       [ 1.3,  0.2,  0. ],
       [ 1.5,  0.2,  0. ],
       [ 1.3,  0.3,  0. ],
       [ 1.3,  0.3,  0. ],
       [ 1.3,  0.2,  0. ],
       [ 1.6,  0.6,  0. ],
       [ 1.9,  0.4,  0. ],
       [ 1.4,  0.3,  0. ],
       [ 1.6,  0.2,  0. ],
       [ 1.4,  0.2,  0. ],
       [ 1.5,  0.2,  0. ],
       [ 1.4,  0.2,  0. ],
       [ 4.7,  1.4,  1. ],
       [ 4.5,  1.5,  1. ],
       [ 4.9,  1.5,  1. ],
       [ 4. ,  1.3,  1. ],
       [ 4.6,  1.5,  1. ],
       [ 4.5,  1.3,  1. ],
       [ 4.7,  1.6,  1. ],
       [ 3.3,  1. ,  1. ],
       [ 4.6,  1.3,  1. ],
       [ 3.9,  1.4,  1. ],
       [ 3.5,  1. ,  1. ],
       [ 4.2,  1.5,  1. ],
       [ 4. ,  1. ,  1. ],
       [ 4.7,  1.4,  1. ],
       [ 3.6,  1.3,  1. ],
       [ 4.4,  1.4,  1. ],
       [ 4.5,  1.5,  1. ],
       [ 4.1,  1. ,  1. ],
       [ 4.5,  1.5,  1. ],
       [ 3.9,  1.1,  1. ],
       [ 4.8,  1.8,  1. ],
       [ 4. ,  1.3,  1. ],
       [ 4.9,  1.5,  1. ],
       [ 4.7,  1.2,  1. ],
       [ 4.3,  1.3,  1. ],
       [ 4.4,  1.4,  1. ],
       [ 4.8,  1.4,  1. ],
       [ 5. ,  1.7,  1. ],
       [ 4.5,  1.5,  1. ],
       [ 3.5,  1. ,  1. ],
       [ 3.8,  1.1,  1. ],
       [ 3.7,  1. ,  1. ],
       [ 3.9,  1.2,  1. ],
       [ 5.1,  1.6,  1. ],
       [ 4.5,  1.5,  1. ],
       [ 4.5,  1.6,  1. ],
       [ 4.7,  1.5,  1. ],
       [ 4.4,  1.3,  1. ],
       [ 4.1,  1.3,  1. ],
       [ 4. ,  1.3,  1. ],
       [ 4.4,  1.2,  1. ],
       [ 4.6,  1.4,  1. ],
       [ 4. ,  1.2,  1. ],
       [ 3.3,  1. ,  1. ],
       [ 4.2,  1.3,  1. ],
       [ 4.2,  1.2,  1. ],
       [ 4.2,  1.3,  1. ],
       [ 4.3,  1.3,  1. ],
       [ 3. ,  1.1,  1. ],
       [ 4.1,  1.3,  1. ],
       [ 6. ,  2.5,  2. ],
       [ 5.1,  1.9,  2. ],
       [ 5.9,  2.1,  2. ],
       [ 5.6,  1.8,  2. ],
       [ 5.8,  2.2,  2. ],
       [ 6.6,  2.1,  2. ],
       [ 4.5,  1.7,  2. ],
       [ 6.3,  1.8,  2. ],
       [ 5.8,  1.8,  2. ],
       [ 6.1,  2.5,  2. ],
       [ 5.1,  2. ,  2. ],
       [ 5.3,  1.9,  2. ],
       [ 5.5,  2.1,  2. ],
       [ 5. ,  2. ,  2. ],
       [ 5.1,  2.4,  2. ],
       [ 5.3,  2.3,  2. ],
       [ 5.5,  1.8,  2. ],
       [ 6.7,  2.2,  2. ],
       [ 6.9,  2.3,  2. ],
       [ 5. ,  1.5,  2. ],
       [ 5.7,  2.3,  2. ],
       [ 4.9,  2. ,  2. ],
       [ 6.7,  2. ,  2. ],
       [ 4.9,  1.8,  2. ],
       [ 5.7,  2.1,  2. ],
       [ 6. ,  1.8,  2. ],
       [ 4.8,  1.8,  2. ],
       [ 4.9,  1.8,  2. ],
       [ 5.6,  2.1,  2. ],
       [ 5.8,  1.6,  2. ],
       [ 6.1,  1.9,  2. ],
       [ 6.4,  2. ,  2. ],
       [ 5.6,  2.2,  2. ],
       [ 5.1,  1.5,  2. ],
       [ 5.6,  1.4,  2. ],
       [ 6.1,  2.3,  2. ],
       [ 5.6,  2.4,  2. ],
       [ 5.5,  1.8,  2. ],
       [ 4.8,  1.8,  2. ],
       [ 5.4,  2.1,  2. ],
       [ 5.6,  2.4,  2. ],
       [ 5.1,  2.3,  2. ],
       [ 5.1,  1.9,  2. ],
       [ 5.9,  2.3,  2. ],
       [ 5.7,  2.5,  2. ],
       [ 5.2,  2.3,  2. ],
       [ 5. ,  1.9,  2. ],
       [ 5.2,  2. ,  2. ],
       [ 5.4,  2.3,  2. ],
       [ 5.1,  1.8,  2. ]])

In [7]:
np.random.shuffle(input_dataset)

In [8]:
input_dataset


Out[8]:
array([[ 4.5,  1.5,  1. ],
       [ 5.1,  1.8,  2. ],
       [ 1.3,  0.2,  0. ],
       [ 3.5,  1. ,  1. ],
       [ 1.4,  0.2,  0. ],
       [ 5.5,  1.8,  2. ],
       [ 5.7,  2.1,  2. ],
       [ 4.2,  1.2,  1. ],
       [ 1.6,  0.2,  0. ],
       [ 6. ,  2.5,  2. ],
       [ 5.1,  2.3,  2. ],
       [ 5.8,  1.8,  2. ],
       [ 4.2,  1.3,  1. ],
       [ 1.4,  0.1,  0. ],
       [ 4.9,  1.5,  1. ],
       [ 1.5,  0.2,  0. ],
       [ 4.8,  1.8,  2. ],
       [ 3.3,  1. ,  1. ],
       [ 1.4,  0.2,  0. ],
       [ 1.3,  0.2,  0. ],
       [ 5.1,  2.4,  2. ],
       [ 4.9,  1.8,  2. ],
       [ 4.9,  1.5,  1. ],
       [ 6.4,  2. ,  2. ],
       [ 5.7,  2.3,  2. ],
       [ 1.7,  0.5,  0. ],
       [ 1.4,  0.2,  0. ],
       [ 4.4,  1.2,  1. ],
       [ 1.3,  0.4,  0. ],
       [ 5.1,  1.9,  2. ],
       [ 5.8,  1.6,  2. ],
       [ 1.5,  0.2,  0. ],
       [ 4.7,  1.5,  1. ],
       [ 4.5,  1.6,  1. ],
       [ 1. ,  0.2,  0. ],
       [ 5. ,  1.5,  2. ],
       [ 5.8,  2.2,  2. ],
       [ 1.5,  0.3,  0. ],
       [ 6.3,  1.8,  2. ],
       [ 5.4,  2.3,  2. ],
       [ 4.1,  1. ,  1. ],
       [ 1.2,  0.2,  0. ],
       [ 4.1,  1.3,  1. ],
       [ 4.5,  1.3,  1. ],
       [ 5.6,  2.1,  2. ],
       [ 4.3,  1.3,  1. ],
       [ 1.4,  0.2,  0. ],
       [ 1.5,  0.1,  0. ],
       [ 6.1,  2.3,  2. ],
       [ 5.5,  2.1,  2. ],
       [ 1.3,  0.3,  0. ],
       [ 4.9,  1.8,  2. ],
       [ 5.6,  2.2,  2. ],
       [ 4.5,  1.7,  2. ],
       [ 1.4,  0.2,  0. ],
       [ 4.7,  1.4,  1. ],
       [ 4.2,  1.5,  1. ],
       [ 3.9,  1.2,  1. ],
       [ 5. ,  1.9,  2. ],
       [ 4.5,  1.5,  1. ],
       [ 5.6,  1.8,  2. ],
       [ 4. ,  1.3,  1. ],
       [ 5.3,  1.9,  2. ],
       [ 4.8,  1.4,  1. ],
       [ 4.8,  1.8,  2. ],
       [ 1.1,  0.1,  0. ],
       [ 3.6,  1.3,  1. ],
       [ 5. ,  1.7,  1. ],
       [ 6.1,  1.9,  2. ],
       [ 1.4,  0.3,  0. ],
       [ 1.6,  0.6,  0. ],
       [ 4.7,  1.6,  1. ],
       [ 4.4,  1.4,  1. ],
       [ 5.9,  2.3,  2. ],
       [ 4.1,  1.3,  1. ],
       [ 6.1,  2.5,  2. ],
       [ 5.1,  2. ,  2. ],
       [ 1.5,  0.1,  0. ],
       [ 4.3,  1.3,  1. ],
       [ 1.4,  0.2,  0. ],
       [ 5.6,  1.4,  2. ],
       [ 1.5,  0.4,  0. ],
       [ 1.4,  0.2,  0. ],
       [ 4.8,  1.8,  1. ],
       [ 5.2,  2. ,  2. ],
       [ 3.5,  1. ,  1. ],
       [ 4.6,  1.4,  1. ],
       [ 6.7,  2. ,  2. ],
       [ 1.5,  0.2,  0. ],
       [ 1.4,  0.2,  0. ],
       [ 1.3,  0.2,  0. ],
       [ 1.2,  0.2,  0. ],
       [ 4.6,  1.3,  1. ],
       [ 6.9,  2.3,  2. ],
       [ 5.3,  2.3,  2. ],
       [ 1.7,  0.3,  0. ],
       [ 1.3,  0.3,  0. ],
       [ 4.6,  1.5,  1. ],
       [ 4.5,  1.5,  1. ],
       [ 4.4,  1.4,  1. ],
       [ 4.2,  1.3,  1. ],
       [ 3.8,  1.1,  1. ],
       [ 1.4,  0.3,  0. ],
       [ 4. ,  1.3,  1. ],
       [ 1.7,  0.2,  0. ],
       [ 4.7,  1.2,  1. ],
       [ 1.5,  0.2,  0. ],
       [ 3.9,  1.4,  1. ],
       [ 1.9,  0.4,  0. ],
       [ 5.6,  2.4,  2. ],
       [ 5.1,  1.5,  2. ],
       [ 5.1,  1.6,  1. ],
       [ 4.7,  1.4,  1. ],
       [ 4. ,  1.3,  1. ],
       [ 1.9,  0.2,  0. ],
       [ 5. ,  2. ,  2. ],
       [ 3.3,  1. ,  1. ],
       [ 5.6,  2.4,  2. ],
       [ 3. ,  1.1,  1. ],
       [ 1.5,  0.4,  0. ],
       [ 4.5,  1.5,  1. ],
       [ 1.4,  0.3,  0. ],
       [ 1.6,  0.2,  0. ],
       [ 5.5,  1.8,  2. ],
       [ 4. ,  1. ,  1. ],
       [ 1.5,  0.2,  0. ],
       [ 3.9,  1.1,  1. ],
       [ 1.5,  0.1,  0. ],
       [ 1.5,  0.4,  0. ],
       [ 3.7,  1. ,  1. ],
       [ 6.7,  2.2,  2. ],
       [ 1.5,  0.1,  0. ],
       [ 5.1,  1.9,  2. ],
       [ 1.5,  0.2,  0. ],
       [ 5.9,  2.1,  2. ],
       [ 1.6,  0.2,  0. ],
       [ 6. ,  1.8,  2. ],
       [ 5.7,  2.5,  2. ],
       [ 1.6,  0.2,  0. ],
       [ 5.2,  2.3,  2. ],
       [ 6.6,  2.1,  2. ],
       [ 1.3,  0.2,  0. ],
       [ 5.4,  2.1,  2. ],
       [ 4. ,  1.2,  1. ],
       [ 4.5,  1.5,  1. ],
       [ 1.6,  0.2,  0. ],
       [ 4.9,  2. ,  2. ],
       [ 1.6,  0.4,  0. ],
       [ 4.4,  1.3,  1. ],
       [ 1.7,  0.4,  0. ]])

In [9]:
list1= []
list2 = []
for item in input_dataset:
    list1.append([float(item[0]), float(item[1])])
    list2.append(float(item[2]))

In [10]:
list1


Out[10]:
[[4.5, 1.5],
 [5.1, 1.8],
 [1.3, 0.2],
 [3.5, 1.0],
 [1.4, 0.2],
 [5.5, 1.8],
 [5.7, 2.1],
 [4.2, 1.2],
 [1.6, 0.2],
 [6.0, 2.5],
 [5.1, 2.3],
 [5.8, 1.8],
 [4.2, 1.3],
 [1.4, 0.1],
 [4.9, 1.5],
 [1.5, 0.2],
 [4.8, 1.8],
 [3.3, 1.0],
 [1.4, 0.2],
 [1.3, 0.2],
 [5.1, 2.4],
 [4.9, 1.8],
 [4.9, 1.5],
 [6.4, 2.0],
 [5.7, 2.3],
 [1.7, 0.5],
 [1.4, 0.2],
 [4.4, 1.2],
 [1.3, 0.4],
 [5.1, 1.9],
 [5.8, 1.6],
 [1.5, 0.2],
 [4.7, 1.5],
 [4.5, 1.6],
 [1.0, 0.2],
 [5.0, 1.5],
 [5.8, 2.2],
 [1.5, 0.3],
 [6.3, 1.8],
 [5.4, 2.3],
 [4.1, 1.0],
 [1.2, 0.2],
 [4.1, 1.3],
 [4.5, 1.3],
 [5.6, 2.1],
 [4.3, 1.3],
 [1.4, 0.2],
 [1.5, 0.1],
 [6.1, 2.3],
 [5.5, 2.1],
 [1.3, 0.3],
 [4.9, 1.8],
 [5.6, 2.2],
 [4.5, 1.7],
 [1.4, 0.2],
 [4.7, 1.4],
 [4.2, 1.5],
 [3.9, 1.2],
 [5.0, 1.9],
 [4.5, 1.5],
 [5.6, 1.8],
 [4.0, 1.3],
 [5.3, 1.9],
 [4.8, 1.4],
 [4.8, 1.8],
 [1.1, 0.1],
 [3.6, 1.3],
 [5.0, 1.7],
 [6.1, 1.9],
 [1.4, 0.3],
 [1.6, 0.6],
 [4.7, 1.6],
 [4.4, 1.4],
 [5.9, 2.3],
 [4.1, 1.3],
 [6.1, 2.5],
 [5.1, 2.0],
 [1.5, 0.1],
 [4.3, 1.3],
 [1.4, 0.2],
 [5.6, 1.4],
 [1.5, 0.4],
 [1.4, 0.2],
 [4.8, 1.8],
 [5.2, 2.0],
 [3.5, 1.0],
 [4.6, 1.4],
 [6.7, 2.0],
 [1.5, 0.2],
 [1.4, 0.2],
 [1.3, 0.2],
 [1.2, 0.2],
 [4.6, 1.3],
 [6.9, 2.3],
 [5.3, 2.3],
 [1.7, 0.3],
 [1.3, 0.3],
 [4.6, 1.5],
 [4.5, 1.5],
 [4.4, 1.4],
 [4.2, 1.3],
 [3.8, 1.1],
 [1.4, 0.3],
 [4.0, 1.3],
 [1.7, 0.2],
 [4.7, 1.2],
 [1.5, 0.2],
 [3.9, 1.4],
 [1.9, 0.4],
 [5.6, 2.4],
 [5.1, 1.5],
 [5.1, 1.6],
 [4.7, 1.4],
 [4.0, 1.3],
 [1.9, 0.2],
 [5.0, 2.0],
 [3.3, 1.0],
 [5.6, 2.4],
 [3.0, 1.1],
 [1.5, 0.4],
 [4.5, 1.5],
 [1.4, 0.3],
 [1.6, 0.2],
 [5.5, 1.8],
 [4.0, 1.0],
 [1.5, 0.2],
 [3.9, 1.1],
 [1.5, 0.1],
 [1.5, 0.4],
 [3.7, 1.0],
 [6.7, 2.2],
 [1.5, 0.1],
 [5.1, 1.9],
 [1.5, 0.2],
 [5.9, 2.1],
 [1.6, 0.2],
 [6.0, 1.8],
 [5.7, 2.5],
 [1.6, 0.2],
 [5.2, 2.3],
 [6.6, 2.1],
 [1.3, 0.2],
 [5.4, 2.1],
 [4.0, 1.2],
 [4.5, 1.5],
 [1.6, 0.2],
 [4.9, 2.0],
 [1.6, 0.4],
 [4.4, 1.3],
 [1.7, 0.4]]

In [11]:
list_chunk_length = int(len(list1)/5)
list_chunk_length


Out[11]:
30

In [12]:
#Split them into chunks
def chunks(l, n):
    n = max(1, n)
    return [l[i:i + n] for i in range(0, len(l), n)]

In [13]:
#Put petal width and length into five different chunks
x = chunks(list1, list_chunk_length)

In [14]:
#Put targets into five different chunks
y = chunks(list2, list_chunk_length)

In [15]:
#Example of a chunk
x[0]


Out[15]:
[[4.5, 1.5],
 [5.1, 1.8],
 [1.3, 0.2],
 [3.5, 1.0],
 [1.4, 0.2],
 [5.5, 1.8],
 [5.7, 2.1],
 [4.2, 1.2],
 [1.6, 0.2],
 [6.0, 2.5],
 [5.1, 2.3],
 [5.8, 1.8],
 [4.2, 1.3],
 [1.4, 0.1],
 [4.9, 1.5],
 [1.5, 0.2],
 [4.8, 1.8],
 [3.3, 1.0],
 [1.4, 0.2],
 [1.3, 0.2],
 [5.1, 2.4],
 [4.9, 1.8],
 [4.9, 1.5],
 [6.4, 2.0],
 [5.7, 2.3],
 [1.7, 0.5],
 [1.4, 0.2],
 [4.4, 1.2],
 [1.3, 0.4],
 [5.1, 1.9]]

In [16]:
from sklearn.cross_validation import train_test_split

In [17]:
dt = tree.DecisionTreeClassifier()

In [34]:
#Barney's solution: Create a for loop that will pop one chunk off, sum the rest of the chunks,
#use the rest of the chunks to train, then test on the first chunk

list_of_average_scores = []

for petals, targets in zip(x, y):
    x_test = x.pop(0)
    x_train = sum(x, []) #Adding the rest of the chunks to an empty list
    x.append(x_test) #Add the popped off chunk back again for the loop
    y_test = y.pop(0)
    y_train = sum(y, [])
    y.append(y_test)
    dt = dt.fit(x_train, y_train) #Fitting
    y_pred=dt.predict(x_test) #Predicting
    accuracy_score = metrics.accuracy_score(y_test, y_pred) #Getting the accuracy score
    list_of_average_scores.append(accuracy_score)

In [35]:
np.mean(list_of_average_scores)


Out[35]:
0.94000000000000006

In [36]:
#Now use cross validation

In [37]:
from sklearn.cross_validation import cross_val_score

In [38]:
scores = cross_val_score(dt,list1,list2,cv=5)

In [39]:
np.mean(scores)


Out[39]:
0.94666666666666666

In [ ]: