In [1]:
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
a = np.arange(10)
print(a)


[0 1 2 3 4 5 6 7 8 9]

In [3]:
print(train_test_split(a))


[array([3, 9, 6, 1, 5, 0, 7]), array([2, 8, 4])]

In [4]:
print(type(train_test_split(a)))


<class 'list'>

In [5]:
print(len(train_test_split(a)))


2

In [6]:
a_train, a_test = train_test_split(a)

In [7]:
print(a_train)


[3 4 0 5 7 8 2]

In [8]:
print(a_test)


[6 1 9]

In [9]:
a_train, a_test = train_test_split(a, test_size=0.6)

In [10]:
print(a_train)


[9 1 2 6]

In [11]:
print(a_test)


[5 7 4 3 0 8]

In [12]:
a_train, a_test = train_test_split(a, test_size=6)

In [13]:
print(a_train)


[4 2 1 0]

In [14]:
print(a_test)


[7 6 3 9 8 5]

In [15]:
a_train, a_test = train_test_split(a, train_size=0.6)

In [16]:
print(a_train)


[2 9 6 0 4 3]

In [17]:
print(a_test)


[7 8 5 1]

In [18]:
a_train, a_test = train_test_split(a, train_size=6)

In [19]:
print(a_train)


[9 3 0 8 7 1]

In [20]:
print(a_test)


[5 6 4 2]

In [21]:
a_train, a_test = train_test_split(a, train_size=0.25)

In [22]:
print(a_train)


[1 2]

In [23]:
print(a_test)


[0 8 4 7 5 6 3 9]

In [24]:
a_train, a_test = train_test_split(a, test_size=0.3, train_size=0.4)

In [25]:
print(a_train)


[3 0 4 9]

In [26]:
print(a_test)


[7 2 8]

In [27]:
a_train, a_test = train_test_split(a, test_size=3, train_size=4)

In [28]:
print(a_train)


[9 7 0 4]

In [29]:
print(a_test)


[3 8 5]

In [30]:
# a_train, a_test = train_test_split(a, test_size=0.8, train_size=0.7)
# ValueError: The sum of test_size and train_size = 1.500000, should be smaller than 1.0. Reduce test_size and/or train_size.

In [31]:
# a_train, a_test = train_test_split(a, test_size=8, train_size=7)
# ValueError: The sum of train_size and test_size = 15, should be smaller than the number of samples 10. Reduce test_size and/or train_size.

In [32]:
a_train, a_test = train_test_split(a, shuffle=False)

In [33]:
print(a_train)


[0 1 2 3 4 5 6]

In [34]:
print(a_test)


[7 8 9]

In [35]:
a_train, a_test = train_test_split(a, random_state=0)

In [36]:
print(a_train)


[9 1 6 7 3 0 5]

In [37]:
print(a_test)


[2 8 4]

In [38]:
X = np.arange(20).reshape(2, 10).T
print(X)


[[ 0 10]
 [ 1 11]
 [ 2 12]
 [ 3 13]
 [ 4 14]
 [ 5 15]
 [ 6 16]
 [ 7 17]
 [ 8 18]
 [ 9 19]]

In [39]:
y = np.arange(10)
print(y)


[0 1 2 3 4 5 6 7 8 9]

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [41]:
print(X_train)


[[ 7 17]
 [ 3 13]
 [ 0 10]
 [ 8 18]
 [ 6 16]
 [ 4 14]
 [ 2 12]]

In [42]:
print(X_test)


[[ 5 15]
 [ 1 11]
 [ 9 19]]

In [43]:
print(y_train)


[7 3 0 8 6 4 2]

In [44]:
print(y_test)


[5 1 9]

In [45]:
z = np.arange(10) * 10
print(z)


[ 0 10 20 30 40 50 60 70 80 90]

In [46]:
X_train, X_test, y_train, y_test, z_train, z_test = train_test_split(X, y, z)

In [47]:
print(X_train)


[[ 6 16]
 [ 9 19]
 [ 1 11]
 [ 2 12]
 [ 7 17]
 [ 0 10]
 [ 3 13]]

In [48]:
print(X_test)


[[ 8 18]
 [ 4 14]
 [ 5 15]]

In [49]:
print(y_train)


[6 9 1 2 7 0 3]

In [50]:
print(y_test)


[8 4 5]

In [51]:
print(z_train)


[60 90 10 20 70  0 30]

In [52]:
print(z_test)


[80 40 50]

In [53]:
y_mismatch = np.arange(8)
print(y_mismatch)


[0 1 2 3 4 5 6 7]

In [54]:
# X_train, X_test, y_train, y_test = train_test_split(X, y_mismatch)
# ValueError: Found input variables with inconsistent numbers of samples: [10, 8]

In [55]:
y = np.array([0] * 5 + [1] * 5)
print(y)


[0 0 0 0 0 1 1 1 1 1]

In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)

In [57]:
print(y_train)


[0 1 0 0 0 0 1 1]

In [58]:
print(y_test)


[1 1]

In [59]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100,
                                                    stratify=y)

In [60]:
print(y_train)


[1 1 0 0 0 1 1 0]

In [61]:
print(y_test)


[1 0]