In [6]:
import pandas as pd
%matplotlib inline
from sklearn import datasets
from sklearn import tree
import matplotlib.pyplot as plt
import numpy as np
from random import shuffle
from sklearn.metrics import accuracy_score
In [2]:
iris = datasets.load_iris() # load iris data set
In [3]:
iris.keys()
Out[3]:
In [4]:
iris['target_names']
Out[4]:
In [5]:
iris['target']
Out[5]:
In [6]:
iris['data']
Out[6]:
In [7]:
x = iris.data[:,2:] # the attributes
y = iris.target # the target variable
In [8]:
for a, b in zip(x, y):
print(a, b)
In [9]:
y
Out[9]:
In [17]:
# shuffling data (which is X), and target (which is Y) and adding into two seperate lists
shuf_x = []
shuf_y = []
shuf_index = list(range(len(x)))
shuffle(shuf_index)
for i in shuf_index:
shuf_x.append(x[i])
shuf_y.append(y[i])
In [18]:
chunk_length = int(len(shuf_x)/ 5)
chunk_length
Out[18]:
In [19]:
chunk_length = int(len(shuf_y)/ 5)
chunk_length
Out[19]:
In [20]:
def chunks(l, num):
num = max(1, num)
return [l[i:i + num] for i in range(0, len(l), num)]
In [21]:
chunk_y = chunks(shuf_y, chunk_length)
In [22]:
chunk_x = chunks(shuf_x, chunk_length)
In [23]:
dt = tree.DecisionTreeClassifier()
In [30]:
Average_list = []
for x, y in zip(chunk_x, chunk_y):
#Popping first item off the list
x_test = chunk_x.pop(0)
x_train = sum(chunk_x, [])
#Adding it back on again
chunk_x.append(x_test)
#Popping first item off the list
y_test = chunk_y.pop(0)
y_train = sum(chunk_y, [])
#Popping it back on again
chunk_y.append(y_test)
#fitting training
dt = dt.fit(x_train,y_train)
#Predicting
y_pred=dt.predict(x_test)
#Getting the accurancy score
Accuracy_score = accuracy_score(y_test, y_pred)
#Creating a list of averages:
Average_list.append(Accuracy_score)
In [31]:
print(Average_list)
In [2]:
from sklearn.cross_validation import cross_val_score
In [7]:
iris = datasets.load_iris()
In [8]:
x = iris.data[:,2:]
y = iris.target
In [9]:
dt = tree.DecisionTreeClassifier()
In [10]:
dt = dt.fit(x,y)
In [11]:
# http://scikit-learn.org/stable/modules/cross_validation.html#computing-cross-validated-metrics
scores = cross_val_score(dt,x,y,cv=5) #We're passing in our values and getting an array of values back
and dt is pass the decision tree classifier
In [12]:
scores
Out[12]:
In [13]:
import numpy as np
In [14]:
np.mean(scores) #here we get our average result
Out[14]:
In [ ]: