In [20]:
from sklearn import datasets as d
import numpy as np
from sklearn import preprocessing
In [2]:
iris = d.load_iris()
iris_x = iris.data
In [3]:
masking_array = np.random.binomial(1,.25, iris_x.shape).astype(bool)
In [17]:
iris_x[masking_array] = np.nan
In [18]:
masking_array[:5]
Out[18]:
In [19]:
iris_x[:5]
Out[19]:
In [21]:
# could pass strategy='median' or 'most_frequent'
impute = preprocessing.Imputer()
In [23]:
iris_x_prime = impute.fit_transform(iris_x)
In [24]:
iris_x_prime[:5]
Out[24]:
In [ ]: