In [31]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 12, 10
Data simulating a sine-curve between 60°-300° with random noise:
In [32]:
# Define input array with angles from 60° to 300° in radians
x = np.array([i*np.pi/180 for i in range(60,300,4)])
np.random.seed(0) # setting rand seed for reproducibility
y = np.sin(x) + np.random.normal(0,0.15,len(x))
data = pd.DataFrame(np.column_stack([x,y]), columns=['x','y'])
In [33]:
plt.plot(data['x'], data['y'], '.');
Adding a column for each power up to 15:
In [34]:
for i in range(2,16): # power of 1 is already there
colname = f'x_{i}' # new var will be the x_power
data[colname] = data['x']**i
In [35]:
data.head()
# print(data.head())
Out[35]:
Generic function for ridge regression, similar to that defined for simple linear regression:
In [36]:
# from: https://www.analyticsvidhya.com/blog/2016/01/complete-tutorial-ridge-lasso-regression-python/#three
def ridge_regression(data, predictors, alpha, models_to_plot={}):
# Fit the model
ridgereg = Ridge(alpha=alpha, normalize=True)
ridgereg.fit(data[predictors], data['y'])
y_pred = ridgereg.predict(data[predictors])
# Check if a plot is to be made for the entered alpha
if alpha in models_to_plot:
plt.subplot(models_to_plot[alpha])
plt.tight_layout()
plt.plot(data['x'], y_pred)
plt.plot(data['x'], data['y'], '.')
plt.title(f'Plot for alpha: {alpha:.3g}')
# Return result in pre-defined format
rss = sum((y_pred - data['y'])**2)
ret = [rss]
ret.extend([ridgereg.intercept_])
ret.extend(ridgereg.coef_)
return ret
Analyze result of ridge (L2) regression for 10 values of α:
In [37]:
# Initialize predictors to be set of 15 powers of x
predictors = ['x']
predictors.extend([f'x_{i}' for i in range(2,16)])
# Set different values of alpha to be tested
alpha_ridge = [1e-15, 1e-10, 1e-8, 1e-4, 1e-3,1e-2, 1, 5, 10, 20]
# Initialize dataframe for storing coefficients
col = ['rss','intercept'] + [f'coef_x_{i}' for i in range(1,16)]
ind = [f'alpha_{alpha_ridge[i]:.2g}' for i in range(0,10)]
coef_matrix_ridge = pd.DataFrame(index=ind, columns=col)
models_to_plot = {1e-15:231, 1e-10:232, 1e-4:233, 1e-3:234, 1e-2:235, 5:236}
In [38]:
for i in range(10):
coef_matrix_ridge.iloc[i,] = ridge_regression(data, predictors,
alpha_ridge[i], models_to_plot)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: