Generate additional meta-features derived from pairwise differences and ratios of most important features (helpful for tree-based models).
This utility package imports numpy
, pandas
, matplotlib
and a helper kg
module into the root namespace.
In [ ]:
from pygoose import *
In [ ]:
import itertools
Automatically discover the paths to various data folders and compose the project structure.
In [ ]:
project = kg.Project.discover()
Identifier for storing these features on disk and referring to them later.
In [ ]:
feature_list_id = 'meta_pairwise_interactions'
Feature lists containing the most important features.
In [ ]:
feature_lists = [
# Place feature lists here.
]
Names of the features to use for pairwise interactions.
In [ ]:
most_important_features = [
# Place features here.
]
In [ ]:
df_train, df_test, _ = project.load_feature_lists(feature_lists)
In [ ]:
feature_names = []
In [ ]:
for a, b in itertools.combinations(most_important_features, 2):
diff_feature_name = f'diff_{a}_{b}'
df_train[diff_feature_name] = df_train[a] - df_train[b]
df_test[diff_feature_name] = df_test[a] - df_test[b]
feature_names.append(diff_feature_name)
ratio_feature_name = f'ratio_{a}_{b}'
df_train[ratio_feature_name] = df_train[a] / df_train[b]
df_test[ratio_feature_name] = df_test[a] / df_test[b]
feature_names.append(ratio_feature_name)
In [ ]:
features_to_keep = feature_names
In [ ]:
X_train = df_train[features_to_keep].values
X_test = df_test[features_to_keep].values
In [ ]:
project.save_features(X_train, X_test, feature_names, feature_list_id)