In [438]:
import pandas as pd
%matplotlib inline
from sklearn import datasets, tree, metrics
from sklearn.cross_validation import train_test_split
import matplotlib.pyplot as plt
import numpy as np
In [439]:
iris = datasets.load_iris()
In [440]:
iris.keys()
Out[440]:
In [441]:
x = iris.data[:,2:]
y = iris.target
In [442]:
from random import shuffle
# Given list1 and list2
x_shuf = []
y_shuf = []
index_shuf = list(range(len(x)))
shuffle(index_shuf)
for i in index_shuf:
x_shuf.append(x[i])
y_shuf.append(y[i])
In [443]:
#Splitting the lists
In [444]:
list_chunk_length = int(len(x_shuf)/5)
In [445]:
#Function
def chunks(l, n):
n = max(1, n)
return [l[i:i + n] for i in range(0, len(l), n)]
In [446]:
x_chunks = chunks(x_shuf, list_chunk_length)
y_chunks = chunks(y_shuf, list_chunk_length)
In [447]:
dt = tree.DecisionTreeClassifier()
In [448]:
x_test = x_chunks[0] # the attributes
y_test = y_chunks[0] # the target variable
In [449]:
x_train = sum(x_chunks[1:], [])
y_train = sum(y_chunks[1:], [])
In [450]:
dt = dt.fit(x_train,y_train)
In [451]:
y_pred=dt.predict(x_test)
In [452]:
Accuracy_score = metrics.accuracy_score(y_test, y_pred)
In [453]:
print("Accuracy:{0:.3f}".format(metrics.accuracy_score(y_test, y_pred)),"\nClassification report:")
print(metrics.classification_report(y_test,y_pred),"\n")
print(metrics.confusion_matrix(y_test,y_pred),"\n")
In [454]:
Accuracy_score
Out[454]:
In [455]:
#test = list1.pop(0)
In [456]:
#list1.append(test)
In [457]:
#list1
In [458]:
dt = tree.DecisionTreeClassifier()
Average_list = []
for x, y in zip(x_chunks, y_chunks):
#Popping first item off the list
x_test = x_chunks.pop(0)
#Making one list out of many
x_train = sum(x_chunks, [])
#Adding the popped item back on to the orgininal list again. We need it.
x_chunks.append(x_test)
#Popping first item off the list
y_test = y_chunks.pop(0)
y_train = sum(y_chunks, [])
#Popping it back on again
y_chunks.append(y_test)
#fitting training
dt = dt.fit(x_train,y_train)
#Predicting
y_pred=dt.predict(x_test)
#Getting the accurancy score
Accuracy_score = metrics.accuracy_score(y_test, y_pred)
#Creating a list of averages:
Average_list.append(Accuracy_score)
In [459]:
#The average score:
sum(Average_list) / 5
Out[459]:
In [460]:
from sklearn.cross_validation import cross_val_score
In [461]:
scores = cross_val_score(dt,x,y,cv=5)
In [462]:
import numpy as np
In [463]:
np.mean(scores)
Out[463]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [425]:
#My testing material
list1 =[[1, 2, 3, 4, 5], [6, 7, 8, 9 , 10], [11, 12, 13, 14, 15]]
list2 =[[1, 2, 3, 4, 5], [6, 7, 8, 9 , 10], [11, 12, 13, 14, 15]]
for x, y in zip(list1, list2):
test = list1.pop(0)
print(test)
list1.append(test)
print(list1)
test2 = list2.pop(0)
print(test2)
list2.append(test2)
print(list2)