In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
In [2]:
data = load_iris()
In [3]:
X_df = pd.DataFrame(data['data'], columns=data['feature_names'])
y_s = pd.Series(data['target'])
In [4]:
print(X_df)
In [5]:
print(type(X_df))
In [6]:
print(X_df.shape)
In [7]:
print(y_s)
In [8]:
print(type(y_s))
In [9]:
print(y_s.shape)
In [10]:
X_train_df, X_test_df, y_train_s, y_test_s = train_test_split(
X_df, y_s, test_size=0.2, random_state=0, stratify=y_s
)
In [11]:
print(type(X_train_df))
In [12]:
print(X_train_df.shape)
In [13]:
print(type(X_test_df))
In [14]:
print(X_test_df.shape)
In [15]:
print(type(y_train_s))
In [16]:
print(y_train_s.shape)
In [17]:
print(type(y_test_s))
In [18]:
print(y_test_s.shape)
In [19]:
print(y_train_s.value_counts())
In [20]:
print(y_test_s.value_counts())