In [1]:
from sklearn import datasets
import numpy as np

In [3]:
mat = datasets.make_spd_matrix(10)

In [4]:
masking_array = np.random.binomial(1, .1, mat.shape).astype(bool)

In [5]:
mat[masking_array] = np.nan

In [6]:
mat[:4, :4]


Out[6]:
array([[        nan, -1.28233028,  0.26733597, -1.82867292],
       [        nan,  2.11274337, -0.27099408,  1.68340478],
       [ 0.26733597, -0.27099408,  0.72121447, -0.2380178 ],
       [-1.82867292,  1.68340478, -0.2380178 ,  2.77275305]])

In [13]:
# Not using pipeline
from sklearn import preprocessing
impute = preprocessing.Imputer()
scaler = preprocessing.StandardScaler()
mat_imputed = impute.fit_transform(mat)
mat_imp_and_scaled = scaler.fit_transform(mat_imputed)
mat_imp_and_scaled[:4, :4]


Out[13]:
array([[ 0.        , -1.39779077,  0.90066135, -1.55784233],
       [ 0.        ,  1.43805572, -0.87863909,  0.79701056],
       [ 0.91482885, -0.55303907,  2.40083069, -0.4913059 ],
       [-1.44900992,  1.07943661, -0.76964518,  1.52742006]])

In [14]:
# With Pipeline
from sklearn import pipeline
pipe = pipeline.Pipeline([('impute', impute), ('scaler', scaler)])

In [16]:
new_mat = pipe.fit_transform(mat)

In [19]:
new_mat[:4, :4]


Out[19]:
array([[ 0.        , -1.39779077,  0.90066135, -1.55784233],
       [ 0.        ,  1.43805572, -0.87863909,  0.79701056],
       [ 0.91482885, -0.55303907,  2.40083069, -0.4913059 ],
       [-1.44900992,  1.07943661, -0.76964518,  1.52742006]])

In [20]:
np.array_equal(new_mat, mat_imp_and_scaled)


Out[20]:
True

In [ ]: