In [1]:
import pandas as pd
%matplotlib inline
from sklearn import datasets
from sklearn import tree
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics

In [2]:
iris = datasets.load_iris()

In [3]:
x=iris.data[:,2:]
y = iris.target

In [4]:
z = list(zip(x, y))
import random
random.shuffle(z)

x, y = zip(*z)

In [354]:
scores = []
numberoffolds = 5
subarray_size = len(x)/numberoffolds - 1

for i in range(1,numberoffolds):
    x_test = x[int(subarray_size * (i-1)): int(subarray_size * i)]
    y_test = y[int(subarray_size * (i-1)): int(subarray_size * i)]

    #hold out a different portion as training data
    x_train = x[0:int(subarray_size * (i-1))] + x[int(subarray_size * i):int(len(x)-1)]
    y_train = y[0:int(subarray_size * (i-1))] + y[int(subarray_size * i):int(len(x)-1)]


    dt = tree.DecisionTreeClassifier().fit(x_train,y_train)

    #run on test data
    y_pred = dt.predict(x_test)
    score = metrics.accuracy_score(y_test, y_pred)

    #return score, append it to list
    scores.append(score)

In [355]:
scores


Out[355]:
[0.96551724137931039, 1.0, 0.96551724137931039, 0.89655172413793105]

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [115]:
z=np.array_split(x,numberoffolds)

In [129]:
z[3]


Out[129]:
array([[ 6.1,  1.9],
       [ 1.4,  0.3],
       [ 1.4,  0.2],
       [ 3.7,  1. ],
       [ 1.4,  0.2],
       [ 1.6,  0.6],
       [ 3.9,  1.1],
       [ 4.7,  1.4],
       [ 1.5,  0.2],
       [ 5.9,  2.3],
       [ 1.5,  0.4],
       [ 5.9,  2.1],
       [ 4.5,  1.5],
       [ 4. ,  1.3],
       [ 1.9,  0.2],
       [ 5.5,  2.1],
       [ 6.1,  2.5],
       [ 5.7,  2.3],
       [ 1.7,  0.3],
       [ 4.5,  1.5],
       [ 5. ,  1.7],
       [ 1.4,  0.2],
       [ 6.6,  2.1],
       [ 5.6,  2.2],
       [ 1.3,  0.2],
       [ 5.5,  1.8],
       [ 4.2,  1.5],
       [ 6.7,  2.2],
       [ 4.1,  1.3],
       [ 4.6,  1.4]])

In [122]:
middle=int(numberoffolds/2)+1

In [124]:
z[0:middle-1]+z[middle+1:numberoffolds]


Out[124]:
[array([[ 6.3,  1.8],
        [ 5.4,  2.3],
        [ 1. ,  0.2],
        [ 1.5,  0.2],
        [ 4.9,  1.5],
        [ 1.3,  0.4],
        [ 1.7,  0.2],
        [ 4.5,  1.5],
        [ 5.1,  2. ],
        [ 5.3,  2.3],
        [ 1.4,  0.3],
        [ 3.6,  1.3],
        [ 4.9,  2. ],
        [ 6.9,  2.3],
        [ 1.9,  0.4],
        [ 4.8,  1.8],
        [ 1.4,  0.3],
        [ 5.1,  1.8],
        [ 4. ,  1.3],
        [ 1.5,  0.4],
        [ 3.3,  1. ],
        [ 4.7,  1.4],
        [ 1.7,  0.4],
        [ 5.1,  1.6],
        [ 4.8,  1.4],
        [ 1.4,  0.2],
        [ 5.1,  2.3],
        [ 1.4,  0.2],
        [ 4.2,  1.3],
        [ 4.9,  1.5]]), array([[ 5.1,  2.4],
        [ 1.5,  0.2],
        [ 4.4,  1.2],
        [ 1.2,  0.2],
        [ 3. ,  1.1],
        [ 1.5,  0.2],
        [ 4.3,  1.3],
        [ 4.4,  1.4],
        [ 5.6,  1.4],
        [ 5. ,  1.5],
        [ 5.8,  1.8],
        [ 1.6,  0.2],
        [ 4.4,  1.4],
        [ 1.6,  0.4],
        [ 4.6,  1.3],
        [ 1.1,  0.1],
        [ 6. ,  2.5],
        [ 4.8,  1.8],
        [ 4. ,  1.3],
        [ 1.6,  0.2],
        [ 1.6,  0.2],
        [ 1.2,  0.2],
        [ 4.9,  1.8],
        [ 4.7,  1.5],
        [ 4.1,  1.3],
        [ 1.3,  0.2],
        [ 5.4,  2.1],
        [ 1.4,  0.1],
        [ 1.5,  0.1],
        [ 6.7,  2. ]]), array([[ 5.7,  2.5],
        [ 1.5,  0.4],
        [ 3.3,  1. ],
        [ 4. ,  1. ],
        [ 4.7,  1.2],
        [ 5.6,  2.4],
        [ 6.4,  2. ],
        [ 5.8,  1.6],
        [ 1.5,  0.3],
        [ 5.6,  2.4],
        [ 1.3,  0.2],
        [ 4.7,  1.6],
        [ 1.3,  0.2],
        [ 1.7,  0.5],
        [ 4.5,  1.5],
        [ 5.1,  1.9],
        [ 5.6,  1.8],
        [ 4.2,  1.3],
        [ 1.5,  0.1],
        [ 4.3,  1.3],
        [ 5.6,  2.1],
        [ 4.9,  1.8],
        [ 1.6,  0.2],
        [ 3.8,  1.1],
        [ 4.6,  1.5],
        [ 5.2,  2.3],
        [ 5.7,  2.1],
        [ 5. ,  1.9],
        [ 4.2,  1.2],
        [ 3.5,  1. ]])]

In [199]:
z=np.array_split(x,5)


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
c:\users\harsha devulapalli\appdata\local\programs\python\python35\lib\site-packages\numpy\lib\shape_base.py in array_split(ary, indices_or_sections, axis)
    398     try:
--> 399         Ntotal = ary.shape[axis]
    400     except AttributeError:

AttributeError: 'int' object has no attribute 'shape'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-199-69a0316d9ea1> in <module>()
----> 1 z=np.array_split(x,5)

c:\users\harsha devulapalli\appdata\local\programs\python\python35\lib\site-packages\numpy\lib\shape_base.py in array_split(ary, indices_or_sections, axis)
    399         Ntotal = ary.shape[axis]
    400     except AttributeError:
--> 401         Ntotal = len(ary)
    402     try:
    403         # handle scalar case.

TypeError: object of type 'int' has no len()

In [310]:
y=np.append(z[0][:,0],z[1][:,0]])


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-310-97117e770334> in <module>()
----> 1 y=np.append(z[0][:,0],z[1][:,0],z[2][:,0],z[3][:,0],z[4][:,0])

TypeError: append() takes from 2 to 3 positional arguments but 5 were given

In [317]:
initarray=z[0][:,0]
for count in range(0,4):
    presentsubarray=z[count][:,0]
    allsubarrays=np.append(z[count][:,0],z[count+1][:,0])
    allsubarraysfinal=np.extend(allsubarrays)


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-317-885bec082831> in <module>()
      3     presentsubarray=z[count][:,0]
      4     allsubarrays=np.append(z[count][:,0],z[count+1][:,0])
----> 5     allsubarraysfinal=np.extend(allsubarrays)

AttributeError: module 'numpy' has no attribute 'extend'

In [319]:
allsubarrays


Out[319]:
array([ 6.3,  5.4,  1. ,  1.5,  4.9,  1.3,  1.7,  4.5,  5.1,  5.3,  1.4,
        3.6,  4.9,  6.9,  1.9,  4.8,  1.4,  5.1,  4. ,  1.5,  3.3,  4.7,
        1.7,  5.1,  4.8,  1.4,  5.1,  1.4,  4.2,  4.9,  5.1,  1.5,  4.4,
        1.2,  3. ,  1.5,  4.3,  4.4,  5.6,  5. ,  5.8,  1.6,  4.4,  1.6,
        4.6,  1.1,  6. ,  4.8,  4. ,  1.6,  1.6,  1.2,  4.9,  4.7,  4.1,
        1.3,  5.4,  1.4,  1.5,  6.7])

In [ ]:


In [338]:
for i in range(len(z)):
    x_test=z[i][:,0]
    y_test=z[i][:,1]
            
    x_train=z[][:,0]
    y_train=z[int(i)][:,1]


  File "<ipython-input-338-872b1c4809c3>", line 5
    x_train=z[][:,0]
              ^
SyntaxError: invalid syntax

In [331]:
x_test


Out[331]:
array([ 5.7,  1.5,  3.3,  4. ,  4.7,  5.6,  6.4,  5.8,  1.5,  5.6,  1.3,
        4.7,  1.3,  1.7,  4.5,  5.1,  5.6,  4.2,  1.5,  4.3,  5.6,  4.9,
        1.6,  3.8,  4.6,  5.2,  5.7,  5. ,  4.2,  3.5])

In [ ]:


In [ ]:


In [246]:



Out[246]:
5.4000000000000004

In [52]:
count=0
for i in range(numberoffolds):
    if count >=0 and count <=numberoffolds:
        x_train = x[0][int((count/numberoffolds)*len(x)):(int((count+1/numberoffolds)*len(x))-1)]
        print(x_train)
        y_train = y[int((count/numberoffolds)*len(x)):(int((count+1/numberoffolds)*len(x))-1)][1]
        x_test =  z[0]
        y_test =  

        dt = tree.DecisionTreeClassifier().fit(x_train,y_train)

        #y_pred= tree.predict(x_test)
        #score = metrics.accuracy_score(y_test, y_pred)
        print(score)
        #return score, append it to list
        #score_list.append(score)
        count=count+1


[ 1.6  0.2]
[]
[]
c:\users\harsha devulapalli\appdata\local\programs\python\python35\lib\site-packages\sklearn\utils\validation.py:386: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-52-78949cf3ad3f> in <module>()
      6         y_train = y[int((count/numberoffolds)*len(x)):(int((count+1/numberoffolds)*len(x))-1)][1]
      7         x_test =  x[0][(int((count+1/numberoffolds)*len(x))):len(x)]
----> 8         y_test =  y[(int((count+1/numberoffolds)*len(x))):len(x)][1]
      9 
     10         dt = tree.DecisionTreeClassifier().fit(x_train,y_train)

IndexError: tuple index out of range

In [ ]:


In [ ]: