In [1]:
%matplotlib notebook
from linear_model import RelevanceVectorMachine, distribution_wrapper, GaussianFeatures, \
FourierFeatures, repeated_regression, plot_summary
from sklearn import preprocessing
import numpy as np
from scipy import stats, misc
import matplotlib#
import matplotlib.pylab as plt
matplotlib.rc('text', usetex=True)
matplotlib.rcParams['text.latex.preamble']=[r"\usepackage{amsmath}"]
from sklearn import utils, preprocessing
import sklearn
First things first, let's set up up the database to regress.
In [2]:
x = np.linspace(-5,5,500)
x_pred = np.linspace(1.5*x[0],1.5*x[-1],x.shape[0])
epsilon = stats.norm(loc=0,scale=0.01)
noise = epsilon.rvs(size=x.shape[0])
# t_0 = np.exp(-x**2) + noise
# t_1 = -2*x*np.exp(-x**2) + noise
t_0 = np.cos(x) + noise
t_1 = -np.sin(x) + noise
t = np.hstack((t_0,t_1))
N = len(t_0)
N_pred = len(x_pred)
fig = plt.figure(figsize=(5,5))
plt.plot(x,t_0,'ro',markerfacecolor="None",label="data")
plt.plot(x,t_1,'bo',markerfacecolor="None",label="data (1st derivative)")
plt.xlabel("input")
plt.ylabel("output")
plt.legend(loc=0)
plt.show()
In [3]:
# choosing the feature space
k = 5
trafo = preprocessing.PolynomialFeatures(k)
X_0 = trafo.fit_transform(x.reshape((-1,1)))
trafo_der = preprocessing.PolynomialFeatures(k-1)
X_1 = trafo_der.fit_transform(x.reshape((-1,1)))
X_1 = np.hstack((np.zeros((X_1.shape[0],1)),X_1))
X = np.vstack((X_0,X_1))
t = np.hstack((t_0,t_1))
print(X.shape,t.shape)
M = X.shape[1]
# initializing hyperparameters
init_beta = 1./ np.var(t) # (that's the default start)
init_alphas = np.ones(X.shape[1])
init_alphas[1:] = np.inf
# setting up the model regression class
model = RelevanceVectorMachine(n_iter=250,verbose=False,compute_score=True,init_beta=init_beta,
init_alphas=init_alphas)
# regress
model.fit(X,t)
# predict
X_pred_0 = trafo.fit_transform(x_pred.reshape((-1,1)))
X_pred_1 = trafo_der.fit_transform(x_pred.reshape((-1,1)))
X_pred_1 = np.hstack((np.zeros((X_pred_1.shape[0],1)),X_pred_1))
X_pred = np.vstack((X_pred_0,X_pred_1))
y, yerr = model.predict(X_pred,return_std=True)
y_0, y_1 = y[:N_pred], y[N_pred:]
yerr_0, yerr_1 = yerr[:N_pred], yerr[N_pred:]
fig = plt.figure()
ax = fig.add_subplot(121)
ax.plot(x,t_0,'ro',label="data",markerfacecolor="None")
ax.plot(x,t_1,'bo',label="data (1st derivative)",markerfacecolor="None")
ax.fill_between(x_pred,y_0-2*yerr_0,y_0+2*yerr_0,alpha=.5,label="95\%")
ax.fill_between(x_pred,y_1-2*yerr_1,y_1+2*yerr_1,alpha=.5,label="95\% (1st derivative)")
ax.plot(x_pred,y_0,'-',label="estimate")
ax.plot(x_pred,y_1,'-',label="estimate (1st derivative)")
plt.legend(loc=0)
ax.set_xlabel("input")
ax.set_ylabel("output")
ax1 = fig.add_subplot(122)
ax1.plot(model.mse_,'-')
ax1.set_xlabel("iteration")
ax1.set_ylabel("MSE")
plt.tight_layout()
plt.show()
In [12]:
In [13]:
# choosing the feature space
# trafo_0 = GaussianFeatures(k=30, mu0=-3, dmu=.2, k_der=0)
# trafo_1 = GaussianFeatures(k=30, mu0=-3, dmu=.2, k_der=1)
trafo_0 = FourierFeatures(k=10, k_der=0)
trafo_1 = FourierFeatures(k=10, k_der=1)
X_0 = trafo_0.fit_transform(x.reshape((-1,1)))
X_1 = trafo_1.fit_transform(x.reshape((-1,1)))
X = np.vstack((X_0,X_1))
# initializing hyperparameters
init_beta = 1./ np.var(t) # (that's the default start)
init_alphas = np.ones(X.shape[1])
init_alphas[1:] = np.inf
# setting up the model regression class
model = RelevanceVectorMachine(n_iter=250,verbose=False,compute_score=True,init_beta=init_beta,
init_alphas=init_alphas,do_logbook=True,update_pct=1)
# regress
model.fit(X,t)
# predict
X_pred_0 = trafo_0.fit_transform(x_pred.reshape((-1,1)))
X_pred_1 = trafo_1.fit_transform(x_pred.reshape((-1,1)))
X_pred = np.vstack((X_pred_0,X_pred_1))
y, yerr = model.predict(X_pred, return_std=True)
y_0, y_1 = y[:N_pred], y[N_pred:]
yerr_0, yerr_1 = yerr[:N_pred], yerr[N_pred:]
fig = plt.figure()
ax = fig.add_subplot(121)
ax.plot(x,t_0,'ro',label="data",markerfacecolor="None")
ax.plot(x,t_1,'bo',label="data (1st derivative)",markerfacecolor="None")
ax.fill_between(x_pred,y_0-2*yerr_0,y_0+2*yerr_0,alpha=.5,label="95\%")
ax.fill_between(x_pred,y_1-2*yerr_1,y_1+2*yerr_1,alpha=.5,label="95\% (1st derivative)")
ax.plot(x_pred,y_0,'-',label="estimate")
ax.plot(x_pred,y_1,'-',label="estimate (1st derivative)")
plt.legend(loc=0)
ax.set_xlabel("input")
ax.set_ylabel("output")
ax1 = fig.add_subplot(122)
ax1.plot(model.mse_,'-')
ax1.set_xlabel("iteration")
ax1.set_ylabel("MSE")
plt.tight_layout()
plt.show()
In [7]:
# choosing the feature space
trafo_0 = GaussianFeatures(k=30, mu0=-3, dmu=.2, k_der=0)
trafo_1 = GaussianFeatures(k=30, mu0=-3, dmu=.2, k_der=1)
base_trafo_0 = trafo_0.fit_transform
base_trafo_1 = trafo_1.fit_transform
# initializing hyperparameters using callable distributions giving new hyperparameters
# with every call (useful for repeated regression)
init_beta = distribution_wrapper(stats.halfnorm(scale=1),size=1,single=True)
init_alphas = distribution_wrapper(stats.halfnorm(scale=1),single=False)
model_type = RelevanceVectorMachine
model_kwargs = dict(n_iter=250,verbose=False,compute_score=True,init_beta=init_beta,
init_alphas=init_alphas,fit_intercept=False, update_pct=.9)
Nruns = 100
runtimes, coefs, models = repeated_regression(x, base_trafo_0, model_type, t=t,
model_kwargs=model_kwargs, Nruns=Nruns,
return_coefs=True, return_models=True,
base_trafo_1=base_trafo_1)
X_0 = base_trafo_0(x.reshape((-1,1)))
X_1 = base_trafo_1(x.reshape((-1,1)))
plot_summary(models,noise,x,t,X_0,coefs,base_trafo_0,X_1=X_1)
Adding the derivatives to the regression process seems to increase the difficulty in finding the global minimum, since confidence intervals of the predicted functions do not completely overlap with the data everywhere. The weights are also much more uncertain and the noise levels seem to deviate by an order of magnitude. Hence there is either a bug or a fundamental issue. Interestingly enough the regression improves when limiting the number of basis functions which are allowed to be updated every iteration.