In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
plt.style.use('ggplot')
In [19]:
from sklearn.datasets import load_boston
boston = load_boston()
In [21]:
boston.keys()
Out[21]:
In [22]:
feature_df = pd.DataFrame(data=boston['data'], columns=boston['feature_names'])
In [23]:
target = pd.Series(data=boston['target'], name='target')
In [24]:
_ = target.hist()
In [25]:
feature_df.head()
Out[25]:
In [33]:
from tpot import TPOTRegressor
regressor = TPOTRegressor(generations=5,
population_size=50,
verbosity=2,
n_jobs=4,
random_state=42,
template='Regressor')
regressor.fit(feature_df.values, target.values)
Out[33]:
In [36]:
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import train_test_split
In [35]:
m = regressor.export()
print(m)
In [37]:
exported_pipeline = GradientBoostingRegressor(
alpha=0.9, learning_rate=0.1,
loss="lad", max_depth=7, max_features=0.3, min_samples_leaf=10, min_samples_split=3, n_estimators=100, subsample=0.7500000000000001)
exported_pipeline.fit(feature_df.values, target.values)
results = exported_pipeline.predict(feature_df.values)
In [41]:
values = pd.DataFrame(data=np.column_stack([results, target.values]), columns=['predicted', 'actual'])
values.plot(kind='scatter', x='actual', y='predicted')
Out[41]: