In [1]:
import pandas as pd
Motive of the notebook is to give a brief overview as to how to use the evolutionary sampling powered ensemble models as part of the EvoML research project.
Will make the notebook more verbose if time permits. Priority will be to showcase the flexible API of the new estimators which encourage research and tinkering.
In [2]:
from evoml.subsampling import BasicSegmenter_FEMPO, BasicSegmenter_FEGT, BasicSegmenter_FEMPT
In [3]:
df = pd.read_csv('datasets/ozone.csv')
In [4]:
df.head(2)
Out[4]:
In [5]:
X, y = df.iloc[:,:-1], df['output']
In [6]:
print(BasicSegmenter_FEGT.__doc__)
In [7]:
from sklearn.tree import DecisionTreeRegressor
clf_dt = DecisionTreeRegressor(max_depth=3)
clf = BasicSegmenter_FEGT(base_estimator=clf_dt, statistics=True)
In [ ]:
clf.fit(X, y)
In [9]:
clf.score(X, y)
Out[9]:
In [10]:
EGs = clf.segments_
In [11]:
len(EGs)
Out[11]:
In [12]:
sampled_datasets = [eg.get_data() for eg in EGs]
In [13]:
[sd.shape for sd in sampled_datasets]
Out[13]:
In [ ]:
In [14]:
from evoml.subspacing import FeatureStackerFEGT, FeatureStackerFEMPO
In [15]:
print(FeatureStackerFEGT.__doc__)
In [16]:
clf = FeatureStackerFEGT(ngen=30)
In [17]:
clf.fit(X, y)
Out[17]:
In [18]:
clf.score(X, y)
Out[18]:
In [19]:
## Get the Hall of Fame individual
hof = clf.segment[0]
In [20]:
sampled_datasets = [eg.get_data() for eg in hof]
In [21]:
[data.columns.tolist() for data in sampled_datasets]
Out[21]:
In [22]:
## Original X columns
X.columns
Out[22]:
In [ ]:
In [ ]: