In [1]:
import pandas as pd
%matplotlib inline
from sklearn import datasets
from sklearn import tree
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics
In [2]:
iris = datasets.load_iris()
In [3]:
x=iris.data[:,2:]
y = iris.target
In [4]:
z = list(zip(x, y))
import random
random.shuffle(z)
x, y = zip(*z)
In [354]:
scores = []
numberoffolds = 5
subarray_size = len(x)/numberoffolds - 1
for i in range(1,numberoffolds):
x_test = x[int(subarray_size * (i-1)): int(subarray_size * i)]
y_test = y[int(subarray_size * (i-1)): int(subarray_size * i)]
#hold out a different portion as training data
x_train = x[0:int(subarray_size * (i-1))] + x[int(subarray_size * i):int(len(x)-1)]
y_train = y[0:int(subarray_size * (i-1))] + y[int(subarray_size * i):int(len(x)-1)]
dt = tree.DecisionTreeClassifier().fit(x_train,y_train)
#run on test data
y_pred = dt.predict(x_test)
score = metrics.accuracy_score(y_test, y_pred)
#return score, append it to list
scores.append(score)
In [355]:
scores
Out[355]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [115]:
z=np.array_split(x,numberoffolds)
In [129]:
z[3]
Out[129]:
In [122]:
middle=int(numberoffolds/2)+1
In [124]:
z[0:middle-1]+z[middle+1:numberoffolds]
Out[124]:
In [199]:
z=np.array_split(x,5)
In [310]:
y=np.append(z[0][:,0],z[1][:,0]])
In [317]:
initarray=z[0][:,0]
for count in range(0,4):
presentsubarray=z[count][:,0]
allsubarrays=np.append(z[count][:,0],z[count+1][:,0])
allsubarraysfinal=np.extend(allsubarrays)
In [319]:
allsubarrays
Out[319]:
In [ ]:
In [338]:
for i in range(len(z)):
x_test=z[i][:,0]
y_test=z[i][:,1]
x_train=z[][:,0]
y_train=z[int(i)][:,1]
In [331]:
x_test
Out[331]:
In [ ]:
In [ ]:
In [246]:
Out[246]:
In [52]:
count=0
for i in range(numberoffolds):
if count >=0 and count <=numberoffolds:
x_train = x[0][int((count/numberoffolds)*len(x)):(int((count+1/numberoffolds)*len(x))-1)]
print(x_train)
y_train = y[int((count/numberoffolds)*len(x)):(int((count+1/numberoffolds)*len(x))-1)][1]
x_test = z[0]
y_test =
dt = tree.DecisionTreeClassifier().fit(x_train,y_train)
#y_pred= tree.predict(x_test)
#score = metrics.accuracy_score(y_test, y_pred)
print(score)
#return score, append it to list
#score_list.append(score)
count=count+1
In [ ]:
In [ ]: