In [1]:
from sklearn import datasets
import numpy as np
In [3]:
mat = datasets.make_spd_matrix(10)
In [4]:
masking_array = np.random.binomial(1, .1, mat.shape).astype(bool)
In [5]:
mat[masking_array] = np.nan
In [6]:
mat[:4, :4]
Out[6]:
In [13]:
# Not using pipeline
from sklearn import preprocessing
impute = preprocessing.Imputer()
scaler = preprocessing.StandardScaler()
mat_imputed = impute.fit_transform(mat)
mat_imp_and_scaled = scaler.fit_transform(mat_imputed)
mat_imp_and_scaled[:4, :4]
Out[13]:
In [14]:
# With Pipeline
from sklearn import pipeline
pipe = pipeline.Pipeline([('impute', impute), ('scaler', scaler)])
In [16]:
new_mat = pipe.fit_transform(mat)
In [19]:
new_mat[:4, :4]
Out[19]:
In [20]:
np.array_equal(new_mat, mat_imp_and_scaled)
Out[20]:
In [ ]: