notebook.community

Edit and run



In [1]:

    
from sklearn import datasets
import numpy as np



In [3]:

    
mat = datasets.make_spd_matrix(10)



In [4]:

    
masking_array = np.random.binomial(1, .1, mat.shape).astype(bool)



In [5]:

    
mat[masking_array] = np.nan



In [6]:

    
mat[:4, :4]









    Out[6]:





array([[        nan, -1.28233028,  0.26733597, -1.82867292],
       [        nan,  2.11274337, -0.27099408,  1.68340478],
       [ 0.26733597, -0.27099408,  0.72121447, -0.2380178 ],
       [-1.82867292,  1.68340478, -0.2380178 ,  2.77275305]])



In [13]:

    
# Not using pipeline
from sklearn import preprocessing
impute = preprocessing.Imputer()
scaler = preprocessing.StandardScaler()
mat_imputed = impute.fit_transform(mat)
mat_imp_and_scaled = scaler.fit_transform(mat_imputed)
mat_imp_and_scaled[:4, :4]









    Out[13]:





array([[ 0.        , -1.39779077,  0.90066135, -1.55784233],
       [ 0.        ,  1.43805572, -0.87863909,  0.79701056],
       [ 0.91482885, -0.55303907,  2.40083069, -0.4913059 ],
       [-1.44900992,  1.07943661, -0.76964518,  1.52742006]])



In [14]:

    
# With Pipeline
from sklearn import pipeline
pipe = pipeline.Pipeline([('impute', impute), ('scaler', scaler)])



In [16]:

    
new_mat = pipe.fit_transform(mat)



In [19]:

    
new_mat[:4, :4]









    Out[19]:





array([[ 0.        , -1.39779077,  0.90066135, -1.55784233],
       [ 0.        ,  1.43805572, -0.87863909,  0.79701056],
       [ 0.91482885, -0.55303907,  2.40083069, -0.4913059 ],
       [-1.44900992,  1.07943661, -0.76964518,  1.52742006]])



In [20]:

    
np.array_equal(new_mat, mat_imp_and_scaled)









    Out[20]:





True



In [ ]: