In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
In [2]:
a = np.arange(10)
print(a)
In [3]:
print(train_test_split(a))
In [4]:
print(type(train_test_split(a)))
In [5]:
print(len(train_test_split(a)))
In [6]:
a_train, a_test = train_test_split(a)
In [7]:
print(a_train)
In [8]:
print(a_test)
In [9]:
a_train, a_test = train_test_split(a, test_size=0.6)
In [10]:
print(a_train)
In [11]:
print(a_test)
In [12]:
a_train, a_test = train_test_split(a, test_size=6)
In [13]:
print(a_train)
In [14]:
print(a_test)
In [15]:
a_train, a_test = train_test_split(a, train_size=0.6)
In [16]:
print(a_train)
In [17]:
print(a_test)
In [18]:
a_train, a_test = train_test_split(a, train_size=6)
In [19]:
print(a_train)
In [20]:
print(a_test)
In [21]:
a_train, a_test = train_test_split(a, train_size=0.25)
In [22]:
print(a_train)
In [23]:
print(a_test)
In [24]:
a_train, a_test = train_test_split(a, test_size=0.3, train_size=0.4)
In [25]:
print(a_train)
In [26]:
print(a_test)
In [27]:
a_train, a_test = train_test_split(a, test_size=3, train_size=4)
In [28]:
print(a_train)
In [29]:
print(a_test)
In [30]:
# a_train, a_test = train_test_split(a, test_size=0.8, train_size=0.7)
# ValueError: The sum of test_size and train_size = 1.500000, should be smaller than 1.0. Reduce test_size and/or train_size.
In [31]:
# a_train, a_test = train_test_split(a, test_size=8, train_size=7)
# ValueError: The sum of train_size and test_size = 15, should be smaller than the number of samples 10. Reduce test_size and/or train_size.
In [32]:
a_train, a_test = train_test_split(a, shuffle=False)
In [33]:
print(a_train)
In [34]:
print(a_test)
In [35]:
a_train, a_test = train_test_split(a, random_state=0)
In [36]:
print(a_train)
In [37]:
print(a_test)
In [38]:
X = np.arange(20).reshape(2, 10).T
print(X)
In [39]:
y = np.arange(10)
print(y)
In [40]:
X_train, X_test, y_train, y_test = train_test_split(X, y)
In [41]:
print(X_train)
In [42]:
print(X_test)
In [43]:
print(y_train)
In [44]:
print(y_test)
In [45]:
z = np.arange(10) * 10
print(z)
In [46]:
X_train, X_test, y_train, y_test, z_train, z_test = train_test_split(X, y, z)
In [47]:
print(X_train)
In [48]:
print(X_test)
In [49]:
print(y_train)
In [50]:
print(y_test)
In [51]:
print(z_train)
In [52]:
print(z_test)
In [53]:
y_mismatch = np.arange(8)
print(y_mismatch)
In [54]:
# X_train, X_test, y_train, y_test = train_test_split(X, y_mismatch)
# ValueError: Found input variables with inconsistent numbers of samples: [10, 8]
In [55]:
y = np.array([0] * 5 + [1] * 5)
print(y)
In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)
In [57]:
print(y_train)
In [58]:
print(y_test)
In [59]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100,
stratify=y)
In [60]:
print(y_train)
In [61]:
print(y_test)