In [31]:
%matplotlib notebook
import json
import numpy as np
import pandas as pd
import seaborn as sns
import yellowbrick as yb
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import RidgeCV, LassoCV, ElasticNetCV
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.model_selection import cross_val_score
In [9]:
data = pd.read_csv("../data/energy/energy.csv")
data.head()
Out[9]:
In [15]:
with open("../data/energy/meta.json", "r") as f:
meta = json.load(f)
columns = meta['feature_names'] + list(meta['target_names'].values())
data.columns = columns
data.head()
Out[15]:
In [41]:
data.shape
Out[41]:
In [16]:
heating = data[meta['target_names']['Y1']]
cooling = data[meta['target_names']['Y2']]
features = data[meta['feature_names']]
In [18]:
features.head()
Out[18]:
In [29]:
_, axes = plt.subplots(1, 2, figsize=(9,6), sharey=True)
# Heating first
h = sns.violinplot(data=heating, ax=axes[0])
h.set_ylabel("energy load")
h.set_xlabel("heating")
c = sns.violinplot(data=cooling, ax=axes[1])
c.set_xlabel("cooling")
Out[29]:
In [30]:
sns.boxplot(data=features)
Out[30]:
In [59]:
def build_poly(model, degree=1):
if degree == 1:
return Pipeline([
("std", StandardScaler()),
("reg", model),
])
return Pipeline([
("std", StandardScaler()),
("poly", PolynomialFeatures(degree)),
("reg", model),
])
alphas = np.logspace(-6, 0, 200)
In [38]:
plt.plot(alphas)
Out[38]:
In [43]:
# Ridge
ridge_reg = build_poly(RidgeCV(alphas=alphas))
cross_val_score(ridge_reg, X=features, y=heating, cv=3)
Out[43]:
In [56]:
ridge_reg.fit(features, heating)
print(ridge_reg._final_estimator.alpha_)
print(ridge_reg._final_estimator.coef_)
In [53]:
# Lasso
lasso_reg = build_poly(LassoCV(alphas=alphas))
cross_val_score(lasso_reg, X=features, y=heating, cv=3)
Out[53]:
In [57]:
lasso_reg.fit(features, heating)
print(lasso_reg._final_estimator.alpha_)
print(lasso_reg._final_estimator.coef_)
In [61]:
scores = []
for d in range(1, 10):
reg = build_poly(RidgeCV(alphas=alphas), degree=d)
scores.append(
cross_val_score(reg, X=features, y=heating, cv=3).mean()
)
scores = pd.Series(scores, index=range(1,10), name="$r^2$ of RidgeCV by degree")
scores.plot()
Out[61]:
In [ ]:
In [64]:
gb = build_poly(GradientBoostingRegressor())
cross_val_score(gb, X=features, y=heating, cv=3)
Out[64]:
In [67]:
gb.fit(features, heating)
Out[67]:
In [72]:
gb._final_estimator.feature_importances_
Out[72]:
In [75]:
from sklearn.model_selection import train_test_split as tts
from yellowbrick.regressor import PredictionError
X_train, X_test, y_train, y_test = tts(features, heating, train_size=0.6)
oz = PredictionError(build_poly(GradientBoostingRegressor(n_estimators=10)))
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.poof()