In [1]:
from sklearn import preprocessing
import numpy as np
X_train = np.array([[ 1., -1., 2.],
[ 2., 0., 0.],
[ 0., 1., -1.]])
X_scaled = preprocessing.scale(X_train)
X_scaled
Out[1]:
In [3]:
X_scaled.mean(axis=0), X_scaled.std(axis=0)
Out[3]:
In [12]:
scaler = preprocessing.StandardScaler().fit(X_train)
scaler
Out[12]:
In [13]:
scaler.mean_, scaler.scale_, scaler.transform(X_train)
Out[13]:
Once a scaler instance has been fitted, it can perform the same transformation on new data:
In [5]:
X_test = [[-1., 1., 0.]]
scaler.transform(X_test)
Out[5]:
In [6]:
X_train = np.array([[ 1., -1., 2.],
[ 2., 0., 0.],
[ 0., 1., -1.]])
min_max_scaler = preprocessing.MinMaxScaler()
X_train_minmax = min_max_scaler.fit_transform(X_train)
X_train_minmax
Out[6]:
In [7]:
X_test = np.array([[ -3., -1., 4.]])
X_test_minmax = min_max_scaler.transform(X_test)
X_test_minmax
Out[7]:
In [8]:
min_max_scaler.scale_, min_max_scaler.min_
Out[8]:
In [9]:
X_train = np.array([[ 1., -1., 2.],
[ 2., 0., 0.],
[ 0., 1., -1.]])
max_abs_scaler = preprocessing.MaxAbsScaler()
X_train_maxabs = max_abs_scaler.fit_transform(X_train)
X_train_maxabs # doctest +NORMALIZE_WHITESPACE^
Out[9]:
In [10]:
X_test = np.array([[ -3., -1., 4.]])
X_test_maxabs = max_abs_scaler.transform(X_test)
X_test_maxabs
Out[10]:
In [11]:
max_abs_scaler.scale_
Out[11]:
In [14]:
preprocessing.minmax_scale(X_train)
Out[14]:
In [15]:
preprocessing.maxabs_scale(X_train)
Out[15]:
In [18]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
np.percentile(X_train[:, 0], [0, 25, 50, 75, 100])
Out[18]:
In [19]:
quantile_transformer = preprocessing.QuantileTransformer(random_state=0)
X_train_trans = quantile_transformer.fit_transform(X_train)
X_test_trans = quantile_transformer.transform(X_test)
In [20]:
np.percentile(X_train_trans[:, 0], [0, 25, 50, 75, 100])
Out[20]:
In [21]:
np.percentile(X_test[:, 0], [0, 25, 50, 75, 100]), np.percentile(X_test_trans[:, 0], [0, 25, 50, 75, 100])
Out[21]:
You can map the data to a normal distribution:
In [22]:
quantile_transformer = preprocessing.QuantileTransformer(output_distribution='normal', random_state=0)
X_trans = quantile_transformer.fit_transform(X)
quantile_transformer.quantiles_
Out[22]:
In [ ]: