In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [2]:
data = load_iris()

In [3]:
X_df = pd.DataFrame(data['data'], columns=data['feature_names'])
y_s = pd.Series(data['target'])

In [4]:
print(X_df)


     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                  5.1               3.5                1.4               0.2
1                  4.9               3.0                1.4               0.2
2                  4.7               3.2                1.3               0.2
3                  4.6               3.1                1.5               0.2
4                  5.0               3.6                1.4               0.2
..                 ...               ...                ...               ...
145                6.7               3.0                5.2               2.3
146                6.3               2.5                5.0               1.9
147                6.5               3.0                5.2               2.0
148                6.2               3.4                5.4               2.3
149                5.9               3.0                5.1               1.8

[150 rows x 4 columns]

In [5]:
print(type(X_df))


<class 'pandas.core.frame.DataFrame'>

In [6]:
print(X_df.shape)


(150, 4)

In [7]:
print(y_s)


0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Length: 150, dtype: int64

In [8]:
print(type(y_s))


<class 'pandas.core.series.Series'>

In [9]:
print(y_s.shape)


(150,)

In [10]:
X_train_df, X_test_df, y_train_s, y_test_s = train_test_split(
    X_df, y_s, test_size=0.2, random_state=0, stratify=y_s
)

In [11]:
print(type(X_train_df))


<class 'pandas.core.frame.DataFrame'>

In [12]:
print(X_train_df.shape)


(120, 4)

In [13]:
print(type(X_test_df))


<class 'pandas.core.frame.DataFrame'>

In [14]:
print(X_test_df.shape)


(30, 4)

In [15]:
print(type(y_train_s))


<class 'pandas.core.series.Series'>

In [16]:
print(y_train_s.shape)


(120,)

In [17]:
print(type(y_test_s))


<class 'pandas.core.series.Series'>

In [18]:
print(y_test_s.shape)


(30,)

In [19]:
print(y_train_s.value_counts())


2    40
1    40
0    40
dtype: int64

In [20]:
print(y_test_s.value_counts())


2    10
1    10
0    10
dtype: int64