We can split a dataset in three ways- 1) Using indices from top to bottom 2) Using indices using random numbers using numpy 3) Using text_train split in scikit learn
In [17]:
import pandas as pd
In [18]:
iris=pd.read_csv("https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/datasets/iris.csv")
In [19]:
iris.head()
Out[19]:
In [22]:
iris.shape
Out[22]:
In [23]:
iris=iris.iloc[:,1:]
In [24]:
iris.head()
Out[24]:
In [25]:
iris.shape
Out[25]:
In [26]:
import numpy as np
In [27]:
len(iris)
Out[27]:
In [29]:
indices = np.random.permutation(len(iris))
In [30]:
indices
Out[30]:
In [35]:
from sklearn.linear_model import LogisticRegression
In [38]:
from sklearn import datasets
In [39]:
iris = datasets.load_iris()
In [40]:
x,y=iris.data,iris.target
In [42]:
x
Out[42]:
In [43]:
y
Out[43]:
In [45]:
from sklearn.cross_validation import train_test_split
In [46]:
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.8)
In [61]:
iris_X_train2 = iris.data[:-30]
iris_X_test2 = iris.data[-30:]
iris_y_train2 = iris.target[:-30]
iris_y_test2 = iris.target[-30:]
In [62]:
iris_X_train2.shape
Out[62]:
In [63]:
iris_X_train2
Out[63]:
In [64]:
X_train
Out[64]:
In [65]:
X_train.shape
Out[65]:
In [66]:
X_train.shape
Out[66]:
In [67]:
X_test.shape
Out[67]:
In [68]:
y_train.shape
Out[68]:
In [69]:
y_test.shape
Out[69]:
In [70]:
np.random.seed(0)
indices = np.random.permutation(len(iris))
In [71]:
iris_X_train = iris.data[indices[:-30]]
In [72]:
iris_y_train = iris.data[indices[:-30]]
iris_X_test = iris.data[indices[-30:]]
iris_y_test = iris.target[indices[-30:]]
In [ ]: