We can split a dataset in three ways- 1) Using indices from top to bottom 2) Using indices using random numbers using numpy 3) Using text_train split in scikit learn

``````

In [17]:

import pandas as pd

``````
``````

In [18]:

``````
``````

In [19]:

``````
``````

Out[19]:

text-align: right;
}

text-align: left;
}

.dataframe tbody tr th {
vertical-align: top;
}

Unnamed: 0
Sepal.Length
Sepal.Width
Petal.Length
Petal.Width
Species

0
1
5.1
3.5
1.4
0.2
setosa

1
2
4.9
3.0
1.4
0.2
setosa

2
3
4.7
3.2
1.3
0.2
setosa

3
4
4.6
3.1
1.5
0.2
setosa

4
5
5.0
3.6
1.4
0.2
setosa

``````
``````

In [22]:

iris.shape

``````
``````

Out[22]:

(150, 6)

``````
``````

In [23]:

iris=iris.iloc[:,1:]

``````
``````

In [24]:

``````
``````

Out[24]:

text-align: right;
}

text-align: left;
}

.dataframe tbody tr th {
vertical-align: top;
}

Sepal.Length
Sepal.Width
Petal.Length
Petal.Width
Species

0
5.1
3.5
1.4
0.2
setosa

1
4.9
3.0
1.4
0.2
setosa

2
4.7
3.2
1.3
0.2
setosa

3
4.6
3.1
1.5
0.2
setosa

4
5.0
3.6
1.4
0.2
setosa

``````
``````

In [25]:

iris.shape

``````
``````

Out[25]:

(150, 5)

``````
``````

In [26]:

import numpy as np

``````
``````

In [27]:

len(iris)

``````
``````

Out[27]:

150

``````
``````

In [29]:

indices = np.random.permutation(len(iris))

``````
``````

In [30]:

indices

``````
``````

Out[30]:

array([ 70,  47, 123,   3,  14, 141,   5, 136,  89,  79,  62, 117,   2,
135,  40, 109, 145, 137,  87, 102,  23,  39,   0, 142,  54,  33,
61, 125,  51,  74,  76,  12,  59,  75,  42,  90,   9, 106,  50,
72, 110,  21,  43,  71,  20, 118, 126,  48,  24,  22,  37,  36,
98,  69,  60,   7,  57, 113,  92, 129,  63,  38, 133,  82,  11,
104, 146,  93, 116, 131,   4, 103, 127, 107,  94,  58,  80,  77,
19,  99,  78,  86, 130,  73,  96,  52,  85, 140,  31,  26,  27,
35,  97,   1, 111,  16,  34,  68,  49,  81, 128,  55, 120,  17,
65,  83,  32, 144,  10,  84, 139,  44, 112, 124, 105,  88, 138,
53,  30,  66,  15, 148,  64, 114,  45,  29,  67,  91, 147, 122,
18,   6, 115,  41, 121,  95,  56,   8, 108,  13, 100,  28, 101,
46, 149, 134,  25, 119, 143, 132])

``````
``````

In [35]:

from sklearn.linear_model import LogisticRegression

``````
``````

In [38]:

from sklearn import datasets

``````
``````

In [39]:

``````
``````

In [40]:

x,y=iris.data,iris.target

``````
``````

In [42]:

x

``````
``````

Out[42]:

array([[ 5.1,  3.5,  1.4,  0.2],
[ 4.9,  3. ,  1.4,  0.2],
[ 4.7,  3.2,  1.3,  0.2],
[ 4.6,  3.1,  1.5,  0.2],
[ 5. ,  3.6,  1.4,  0.2],
[ 5.4,  3.9,  1.7,  0.4],
[ 4.6,  3.4,  1.4,  0.3],
[ 5. ,  3.4,  1.5,  0.2],
[ 4.4,  2.9,  1.4,  0.2],
[ 4.9,  3.1,  1.5,  0.1],
[ 5.4,  3.7,  1.5,  0.2],
[ 4.8,  3.4,  1.6,  0.2],
[ 4.8,  3. ,  1.4,  0.1],
[ 4.3,  3. ,  1.1,  0.1],
[ 5.8,  4. ,  1.2,  0.2],
[ 5.7,  4.4,  1.5,  0.4],
[ 5.4,  3.9,  1.3,  0.4],
[ 5.1,  3.5,  1.4,  0.3],
[ 5.7,  3.8,  1.7,  0.3],
[ 5.1,  3.8,  1.5,  0.3],
[ 5.4,  3.4,  1.7,  0.2],
[ 5.1,  3.7,  1.5,  0.4],
[ 4.6,  3.6,  1. ,  0.2],
[ 5.1,  3.3,  1.7,  0.5],
[ 4.8,  3.4,  1.9,  0.2],
[ 5. ,  3. ,  1.6,  0.2],
[ 5. ,  3.4,  1.6,  0.4],
[ 5.2,  3.5,  1.5,  0.2],
[ 5.2,  3.4,  1.4,  0.2],
[ 4.7,  3.2,  1.6,  0.2],
[ 4.8,  3.1,  1.6,  0.2],
[ 5.4,  3.4,  1.5,  0.4],
[ 5.2,  4.1,  1.5,  0.1],
[ 5.5,  4.2,  1.4,  0.2],
[ 4.9,  3.1,  1.5,  0.1],
[ 5. ,  3.2,  1.2,  0.2],
[ 5.5,  3.5,  1.3,  0.2],
[ 4.9,  3.1,  1.5,  0.1],
[ 4.4,  3. ,  1.3,  0.2],
[ 5.1,  3.4,  1.5,  0.2],
[ 5. ,  3.5,  1.3,  0.3],
[ 4.5,  2.3,  1.3,  0.3],
[ 4.4,  3.2,  1.3,  0.2],
[ 5. ,  3.5,  1.6,  0.6],
[ 5.1,  3.8,  1.9,  0.4],
[ 4.8,  3. ,  1.4,  0.3],
[ 5.1,  3.8,  1.6,  0.2],
[ 4.6,  3.2,  1.4,  0.2],
[ 5.3,  3.7,  1.5,  0.2],
[ 5. ,  3.3,  1.4,  0.2],
[ 7. ,  3.2,  4.7,  1.4],
[ 6.4,  3.2,  4.5,  1.5],
[ 6.9,  3.1,  4.9,  1.5],
[ 5.5,  2.3,  4. ,  1.3],
[ 6.5,  2.8,  4.6,  1.5],
[ 5.7,  2.8,  4.5,  1.3],
[ 6.3,  3.3,  4.7,  1.6],
[ 4.9,  2.4,  3.3,  1. ],
[ 6.6,  2.9,  4.6,  1.3],
[ 5.2,  2.7,  3.9,  1.4],
[ 5. ,  2. ,  3.5,  1. ],
[ 5.9,  3. ,  4.2,  1.5],
[ 6. ,  2.2,  4. ,  1. ],
[ 6.1,  2.9,  4.7,  1.4],
[ 5.6,  2.9,  3.6,  1.3],
[ 6.7,  3.1,  4.4,  1.4],
[ 5.6,  3. ,  4.5,  1.5],
[ 5.8,  2.7,  4.1,  1. ],
[ 6.2,  2.2,  4.5,  1.5],
[ 5.6,  2.5,  3.9,  1.1],
[ 5.9,  3.2,  4.8,  1.8],
[ 6.1,  2.8,  4. ,  1.3],
[ 6.3,  2.5,  4.9,  1.5],
[ 6.1,  2.8,  4.7,  1.2],
[ 6.4,  2.9,  4.3,  1.3],
[ 6.6,  3. ,  4.4,  1.4],
[ 6.8,  2.8,  4.8,  1.4],
[ 6.7,  3. ,  5. ,  1.7],
[ 6. ,  2.9,  4.5,  1.5],
[ 5.7,  2.6,  3.5,  1. ],
[ 5.5,  2.4,  3.8,  1.1],
[ 5.5,  2.4,  3.7,  1. ],
[ 5.8,  2.7,  3.9,  1.2],
[ 6. ,  2.7,  5.1,  1.6],
[ 5.4,  3. ,  4.5,  1.5],
[ 6. ,  3.4,  4.5,  1.6],
[ 6.7,  3.1,  4.7,  1.5],
[ 6.3,  2.3,  4.4,  1.3],
[ 5.6,  3. ,  4.1,  1.3],
[ 5.5,  2.5,  4. ,  1.3],
[ 5.5,  2.6,  4.4,  1.2],
[ 6.1,  3. ,  4.6,  1.4],
[ 5.8,  2.6,  4. ,  1.2],
[ 5. ,  2.3,  3.3,  1. ],
[ 5.6,  2.7,  4.2,  1.3],
[ 5.7,  3. ,  4.2,  1.2],
[ 5.7,  2.9,  4.2,  1.3],
[ 6.2,  2.9,  4.3,  1.3],
[ 5.1,  2.5,  3. ,  1.1],
[ 5.7,  2.8,  4.1,  1.3],
[ 6.3,  3.3,  6. ,  2.5],
[ 5.8,  2.7,  5.1,  1.9],
[ 7.1,  3. ,  5.9,  2.1],
[ 6.3,  2.9,  5.6,  1.8],
[ 6.5,  3. ,  5.8,  2.2],
[ 7.6,  3. ,  6.6,  2.1],
[ 4.9,  2.5,  4.5,  1.7],
[ 7.3,  2.9,  6.3,  1.8],
[ 6.7,  2.5,  5.8,  1.8],
[ 7.2,  3.6,  6.1,  2.5],
[ 6.5,  3.2,  5.1,  2. ],
[ 6.4,  2.7,  5.3,  1.9],
[ 6.8,  3. ,  5.5,  2.1],
[ 5.7,  2.5,  5. ,  2. ],
[ 5.8,  2.8,  5.1,  2.4],
[ 6.4,  3.2,  5.3,  2.3],
[ 6.5,  3. ,  5.5,  1.8],
[ 7.7,  3.8,  6.7,  2.2],
[ 7.7,  2.6,  6.9,  2.3],
[ 6. ,  2.2,  5. ,  1.5],
[ 6.9,  3.2,  5.7,  2.3],
[ 5.6,  2.8,  4.9,  2. ],
[ 7.7,  2.8,  6.7,  2. ],
[ 6.3,  2.7,  4.9,  1.8],
[ 6.7,  3.3,  5.7,  2.1],
[ 7.2,  3.2,  6. ,  1.8],
[ 6.2,  2.8,  4.8,  1.8],
[ 6.1,  3. ,  4.9,  1.8],
[ 6.4,  2.8,  5.6,  2.1],
[ 7.2,  3. ,  5.8,  1.6],
[ 7.4,  2.8,  6.1,  1.9],
[ 7.9,  3.8,  6.4,  2. ],
[ 6.4,  2.8,  5.6,  2.2],
[ 6.3,  2.8,  5.1,  1.5],
[ 6.1,  2.6,  5.6,  1.4],
[ 7.7,  3. ,  6.1,  2.3],
[ 6.3,  3.4,  5.6,  2.4],
[ 6.4,  3.1,  5.5,  1.8],
[ 6. ,  3. ,  4.8,  1.8],
[ 6.9,  3.1,  5.4,  2.1],
[ 6.7,  3.1,  5.6,  2.4],
[ 6.9,  3.1,  5.1,  2.3],
[ 5.8,  2.7,  5.1,  1.9],
[ 6.8,  3.2,  5.9,  2.3],
[ 6.7,  3.3,  5.7,  2.5],
[ 6.7,  3. ,  5.2,  2.3],
[ 6.3,  2.5,  5. ,  1.9],
[ 6.5,  3. ,  5.2,  2. ],
[ 6.2,  3.4,  5.4,  2.3],
[ 5.9,  3. ,  5.1,  1.8]])

``````
``````

In [43]:

y

``````
``````

Out[43]:

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

``````
``````

In [45]:

from sklearn.cross_validation import train_test_split

``````
``````

C:\Users\KOGENTIX\Anaconda3\lib\site-packages\sklearn\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
"This module will be removed in 0.20.", DeprecationWarning)

``````
``````

In [46]:

X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.8)

``````
``````

In [61]:

iris_X_train2 = iris.data[:-30]
iris_X_test2  = iris.data[-30:]
iris_y_train2 = iris.target[:-30]
iris_y_test2  = iris.target[-30:]

``````
``````

In [62]:

iris_X_train2.shape

``````
``````

Out[62]:

(120, 4)

``````
``````

In [63]:

iris_X_train2

``````
``````

Out[63]:

array([[ 5.1,  3.5,  1.4,  0.2],
[ 4.9,  3. ,  1.4,  0.2],
[ 4.7,  3.2,  1.3,  0.2],
[ 4.6,  3.1,  1.5,  0.2],
[ 5. ,  3.6,  1.4,  0.2],
[ 5.4,  3.9,  1.7,  0.4],
[ 4.6,  3.4,  1.4,  0.3],
[ 5. ,  3.4,  1.5,  0.2],
[ 4.4,  2.9,  1.4,  0.2],
[ 4.9,  3.1,  1.5,  0.1],
[ 5.4,  3.7,  1.5,  0.2],
[ 4.8,  3.4,  1.6,  0.2],
[ 4.8,  3. ,  1.4,  0.1],
[ 4.3,  3. ,  1.1,  0.1],
[ 5.8,  4. ,  1.2,  0.2],
[ 5.7,  4.4,  1.5,  0.4],
[ 5.4,  3.9,  1.3,  0.4],
[ 5.1,  3.5,  1.4,  0.3],
[ 5.7,  3.8,  1.7,  0.3],
[ 5.1,  3.8,  1.5,  0.3],
[ 5.4,  3.4,  1.7,  0.2],
[ 5.1,  3.7,  1.5,  0.4],
[ 4.6,  3.6,  1. ,  0.2],
[ 5.1,  3.3,  1.7,  0.5],
[ 4.8,  3.4,  1.9,  0.2],
[ 5. ,  3. ,  1.6,  0.2],
[ 5. ,  3.4,  1.6,  0.4],
[ 5.2,  3.5,  1.5,  0.2],
[ 5.2,  3.4,  1.4,  0.2],
[ 4.7,  3.2,  1.6,  0.2],
[ 4.8,  3.1,  1.6,  0.2],
[ 5.4,  3.4,  1.5,  0.4],
[ 5.2,  4.1,  1.5,  0.1],
[ 5.5,  4.2,  1.4,  0.2],
[ 4.9,  3.1,  1.5,  0.1],
[ 5. ,  3.2,  1.2,  0.2],
[ 5.5,  3.5,  1.3,  0.2],
[ 4.9,  3.1,  1.5,  0.1],
[ 4.4,  3. ,  1.3,  0.2],
[ 5.1,  3.4,  1.5,  0.2],
[ 5. ,  3.5,  1.3,  0.3],
[ 4.5,  2.3,  1.3,  0.3],
[ 4.4,  3.2,  1.3,  0.2],
[ 5. ,  3.5,  1.6,  0.6],
[ 5.1,  3.8,  1.9,  0.4],
[ 4.8,  3. ,  1.4,  0.3],
[ 5.1,  3.8,  1.6,  0.2],
[ 4.6,  3.2,  1.4,  0.2],
[ 5.3,  3.7,  1.5,  0.2],
[ 5. ,  3.3,  1.4,  0.2],
[ 7. ,  3.2,  4.7,  1.4],
[ 6.4,  3.2,  4.5,  1.5],
[ 6.9,  3.1,  4.9,  1.5],
[ 5.5,  2.3,  4. ,  1.3],
[ 6.5,  2.8,  4.6,  1.5],
[ 5.7,  2.8,  4.5,  1.3],
[ 6.3,  3.3,  4.7,  1.6],
[ 4.9,  2.4,  3.3,  1. ],
[ 6.6,  2.9,  4.6,  1.3],
[ 5.2,  2.7,  3.9,  1.4],
[ 5. ,  2. ,  3.5,  1. ],
[ 5.9,  3. ,  4.2,  1.5],
[ 6. ,  2.2,  4. ,  1. ],
[ 6.1,  2.9,  4.7,  1.4],
[ 5.6,  2.9,  3.6,  1.3],
[ 6.7,  3.1,  4.4,  1.4],
[ 5.6,  3. ,  4.5,  1.5],
[ 5.8,  2.7,  4.1,  1. ],
[ 6.2,  2.2,  4.5,  1.5],
[ 5.6,  2.5,  3.9,  1.1],
[ 5.9,  3.2,  4.8,  1.8],
[ 6.1,  2.8,  4. ,  1.3],
[ 6.3,  2.5,  4.9,  1.5],
[ 6.1,  2.8,  4.7,  1.2],
[ 6.4,  2.9,  4.3,  1.3],
[ 6.6,  3. ,  4.4,  1.4],
[ 6.8,  2.8,  4.8,  1.4],
[ 6.7,  3. ,  5. ,  1.7],
[ 6. ,  2.9,  4.5,  1.5],
[ 5.7,  2.6,  3.5,  1. ],
[ 5.5,  2.4,  3.8,  1.1],
[ 5.5,  2.4,  3.7,  1. ],
[ 5.8,  2.7,  3.9,  1.2],
[ 6. ,  2.7,  5.1,  1.6],
[ 5.4,  3. ,  4.5,  1.5],
[ 6. ,  3.4,  4.5,  1.6],
[ 6.7,  3.1,  4.7,  1.5],
[ 6.3,  2.3,  4.4,  1.3],
[ 5.6,  3. ,  4.1,  1.3],
[ 5.5,  2.5,  4. ,  1.3],
[ 5.5,  2.6,  4.4,  1.2],
[ 6.1,  3. ,  4.6,  1.4],
[ 5.8,  2.6,  4. ,  1.2],
[ 5. ,  2.3,  3.3,  1. ],
[ 5.6,  2.7,  4.2,  1.3],
[ 5.7,  3. ,  4.2,  1.2],
[ 5.7,  2.9,  4.2,  1.3],
[ 6.2,  2.9,  4.3,  1.3],
[ 5.1,  2.5,  3. ,  1.1],
[ 5.7,  2.8,  4.1,  1.3],
[ 6.3,  3.3,  6. ,  2.5],
[ 5.8,  2.7,  5.1,  1.9],
[ 7.1,  3. ,  5.9,  2.1],
[ 6.3,  2.9,  5.6,  1.8],
[ 6.5,  3. ,  5.8,  2.2],
[ 7.6,  3. ,  6.6,  2.1],
[ 4.9,  2.5,  4.5,  1.7],
[ 7.3,  2.9,  6.3,  1.8],
[ 6.7,  2.5,  5.8,  1.8],
[ 7.2,  3.6,  6.1,  2.5],
[ 6.5,  3.2,  5.1,  2. ],
[ 6.4,  2.7,  5.3,  1.9],
[ 6.8,  3. ,  5.5,  2.1],
[ 5.7,  2.5,  5. ,  2. ],
[ 5.8,  2.8,  5.1,  2.4],
[ 6.4,  3.2,  5.3,  2.3],
[ 6.5,  3. ,  5.5,  1.8],
[ 7.7,  3.8,  6.7,  2.2],
[ 7.7,  2.6,  6.9,  2.3],
[ 6. ,  2.2,  5. ,  1.5]])

``````
``````

In [64]:

X_train

``````
``````

Out[64]:

array([[ 5.2,  2.7,  3.9,  1.4],
[ 4.9,  2.4,  3.3,  1. ],
[ 7.7,  2.6,  6.9,  2.3],
[ 5.8,  2.8,  5.1,  2.4],
[ 7.3,  2.9,  6.3,  1.8],
[ 7.7,  3.8,  6.7,  2.2],
[ 6.2,  2.2,  4.5,  1.5],
[ 6.1,  3. ,  4.9,  1.8],
[ 5.1,  3.8,  1.5,  0.3],
[ 6.3,  2.3,  4.4,  1.3],
[ 5.7,  4.4,  1.5,  0.4],
[ 5.5,  2.4,  3.8,  1.1],
[ 6.5,  3. ,  5.2,  2. ],
[ 7.2,  3.6,  6.1,  2.5],
[ 6. ,  2.2,  5. ,  1.5],
[ 5.6,  3. ,  4.5,  1.5],
[ 5. ,  3.6,  1.4,  0.2],
[ 4.8,  3. ,  1.4,  0.3],
[ 6.2,  2.9,  4.3,  1.3],
[ 5.9,  3. ,  5.1,  1.8],
[ 5.4,  3.7,  1.5,  0.2],
[ 5. ,  3.2,  1.2,  0.2],
[ 5.4,  3.4,  1.7,  0.2],
[ 6.7,  3.3,  5.7,  2.5],
[ 5. ,  2.3,  3.3,  1. ],
[ 6. ,  3.4,  4.5,  1.6],
[ 4.9,  3.1,  1.5,  0.1],
[ 5.7,  3.8,  1.7,  0.3],
[ 5.5,  2.4,  3.7,  1. ],
[ 6.3,  2.5,  4.9,  1.5]])

``````
``````

In [65]:

X_train.shape

``````
``````

Out[65]:

(30, 4)

``````
``````

In [66]:

X_train.shape

``````
``````

Out[66]:

(30, 4)

``````
``````

In [67]:

X_test.shape

``````
``````

Out[67]:

(120, 4)

``````
``````

In [68]:

y_train.shape

``````
``````

Out[68]:

(30,)

``````
``````

In [69]:

y_test.shape

``````
``````

Out[69]:

(120,)

``````
``````

In [70]:

np.random.seed(0)
indices = np.random.permutation(len(iris))

``````
``````

In [71]:

iris_X_train = iris.data[indices[:-30]]

``````
``````

In [72]:

iris_y_train = iris.data[indices[:-30]]
iris_X_test  = iris.data[indices[-30:]]
iris_y_test  = iris.target[indices[-30:]]

``````
``````

In [ ]:

``````