A jinja extension for the harness


In [9]:
try:
    from .base import HarnessExtension
except:
    from harness.python.base import HarnessExtension

import pandas, sklearn.model_selection as model_selection
from toolz.curried import first

In [10]:
class SciKitExtension(HarnessExtension):
    alias = 'sklearn'
    def keywords(self, dataframe):
        return {
            'X': lambda: dataframe.values,
            'y': lambda: 
            dataframe.index.get_level_values(dataframe.feature_level) 
            if dataframe.feature_level else None,
        }
    
    def pipe(self, dataframe, attr):
        self.module_ = dataframe.estimator
        return super().pipe(dataframe, attr)
    
    def callback(self, dataframe, value):
        if value is dataframe.estimator:
            return dataframe
        if isinstance(value, pandas.np.ndarray):
            return dataframe.__class__(
                value,
                index=dataframe.index,
                feature_level=dataframe.feature_level,
            )
        if isinstance(value, pandas.CategoricalIndex):
            # new dataframe
            value = dataframe.set_index(value, append=True)
        
            value.index = value.index.reorder_levels([-1, *range(
                        len(dataframe.index.levels) if hasattr(dataframe.index, 'levels') else 1
                    )])

        return value