In [ ]:
from sklearn.base import BaseEstimator, TransformerMixin

class MyTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, first_paramter=1, second_parameter=2):
        # all parameters must be specified in the __init__ function
        self.first_paramter = 1
        self.second_parameter = 2
        
    def fit(self, X, y=None):
        # fit should only take X and y as parameters
        # even if your model is unsupervised, you need to accept a y argument!
        
        # Model fitting code goes here
        print("fitting the model right here")
        # fit returns self
        return self
    
    def transform(self, X):
        # transform takes as parameter only X
        
        # apply some transformation to X:
        X_transformed = X + 1
        return X_transformed

In [ ]:


In [ ]:
from sklearn.utils.validation import check_X_y

class MyEstimator(object):
    def __init__(self, my_parameter="stuff"):
        self.my_parameter = my_parameter
    def fit(self, X, y):
        X, y = check_X_y(X, y)
        return self
    def set_params(self, **kwargs):
        for key, value in kwargs:
            if key == "parameter":
                self.my_parameter = my_parameter
            else:
                raise ValueError("Unknown parameter %s" % key)
        return self
    def get_params(self, deep=None):
        return {'my_parameter': self.my_parameter}

In [ ]:
est = MyEstimator(my_parameter="bla")
print(est)

In [ ]:
from sklearn.utils.estimator_checks import check_estimator
check_estimator(MyEstimator)

In [ ]:
from sklearn.utils.validation import check_X_y, check_array

class MyBrokenEstimator(object):
    def __init__(self, my_parameter="stuff"):
        self.my_parameter = my_parameter + " more stuff"
    def fit(self, X, y):
        X, y = check_X_y(X, y)
        return self
    def set_params(self, **kwargs):
        for key, value in kwargs:
            if key == "parameter":
                self.my_parameter = my_parameter
            else:
                raise ValueError("Unknown parameter %s" % key)
        return self
    def get_params(self, deep=None):
        return {'my_parameter': self.my_parameter}

In [ ]:
check_estimator(MyBrokenEstimator)

In [ ]:
from sklearn.base import BaseEstimator

class MyInheritingEstimator(BaseEstimator):
    def __init__(self, my_parameter="stuff"):
        self.my_parameter = my_parameter
    def fit(self, X, y):
        X, y = check_X_y(X, y)
        return self

In [ ]:
est = MyInheritingEstimator(my_parameter="bla")
print(est)

In [ ]:
check_estimator(MyInheritingEstimator)

In [ ]:
from sklearn.base import TransformerMixin
class MyTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, my_parameter="stuff"):
        self.my_parameter = my_parameter
    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.n_features_ = X.shape[1]
        return self
    def transform(self, X):
        X = check_array(X)
        if X.shape[1] != self.n_features_:
            raise ValueError("lol wat")
        return X - 2

In [ ]:
check_estimator(MyTransformer)

In [ ]:
import numpy as np
from sklearn.base import ClassifierMixin

class MyBrokenClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, my_parameter="stuff"):
        self.my_parameter = my_parameter
    def fit(self, X, y):
        X, y = check_X_y(X, y)
        return self
    def predict(self, X):
        X = check_array(X)
        return np.array([1, 2])

In [ ]:
check_estimator(MyBrokenClassifier)

In [ ]:
from sklearn.metrics import euclidean_distances
from sklearn.utils.multiclass import unique_labels
from sklearn.utils.validation import check_is_fitted

class MyClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, my_parameter="stuff"):
        self.my_parameter = my_parameter
        
    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.classes_ = unique_labels(y)
        self.X_ = X
        self.y_ = y
        return self
    
    def predict(self, X):
        check_is_fitted(self, ["X_", "y_"])
        X = check_array(X)
        closest = np.argmin(euclidean_distances(X, self.X_), axis=1)
        return self.y_[closest]

In [ ]:
check_estimator(MyClassifier)