In [1]:
# tuning ridge regression parameter to find relationships between
# models.
In [10]:
from sklearn.datasets import make_regression
import numpy as np
In [11]:
reg_data, reg_target = make_regression(n_samples=100,
n_features=2, effective_rank=1,
noise=10)
In [12]:
reg_data[:5]
Out[12]:
In [13]:
reg_target[:5]
Out[13]:
In [14]:
# Ridge Cross Validation is similar to Leave-One-Out Cross
# Validation (LOOCV)
In [15]:
from sklearn.linear_model import RidgeCV
rcv = RidgeCV(alphas=np.array([.1, .2, .3, .4]))
In [23]:
rcv.fit(reg_data, reg_target)
Out[23]:
In [26]:
print 'best alpha is:'
rcv.alpha_
Out[26]:
In [18]:
rcv2 = RidgeCV(alphas=np.array([.08, .09, .1, .11, .12]))
In [19]:
rcv2.fit(reg_data, reg_target)
Out[19]:
In [27]:
print 'best alpha is:'
rcv2.alpha_
Out[27]:
In [28]:
# What does "best" mean?
# At each step in the CV process, the model scores an erro against
# the test sample. By default, it's a squared error.
In [29]:
alphas_to_test = np.linspace(0.01, 1)
In [31]:
alphas_to_test[:5]
Out[31]:
In [32]:
rcv3 = RidgeCV(alphas=alphas_to_test, store_cv_values=True)
In [33]:
rcv3.fit(reg_data, reg_target)
Out[33]:
In [34]:
rcv3.cv_values_.shape
Out[34]:
In [35]:
rcv3.alpha_
Out[35]:
In [39]:
smallest_idx = rcv3.cv_values_.mean(axis=0).argmin()
alphas_to_test[smallest_idx]
Out[39]:
In [41]:
if rcv3.alpha_ == alphas_to_test[smallest_idx]:
print 'Got it! Ridge CV found the correct smallest alpha:', rcv3.alpha_
else:
print 'Something went wrong. Ridge Regression didnt find the smallest value'
In [49]:
%matplotlib inline
import matplotlib.pyplot as plt
In [53]:
f = plt.figure(figsize=(7,5))
ax = f.add_subplot(111)
# plot our results and look for the bottom of the trough
ax.plot(alphas_to_test, rcv3.cv_values_.mean(axis=0), color='r')
Out[53]:
In [54]:
# Creating our own scoring function
from msemad import MAD
In [55]:
from sklearn.metrics import make_scorer
In [56]:
MAD = make_scorer(MAD, greater_is_better=False)
rcv4 = RidgeCV(alphas=alphas_to_test, store_cv_values=True,
scoring=MAD)
In [57]:
rcv4.fit(reg_data, reg_target)
Out[57]:
In [60]:
smallest_idx = rcv4.cv_values_.mean(axis=0).argmin()
alphas_to_test[smallest_idx] # this is wrong...
Out[60]:
In [61]:
rcv4.alpha_ # this is correct
Out[61]:
In [62]:
f = plt.figure(figsize=(7,5))
ax = f.add_subplot(111)
# plot our results and look for the bottom of the trough
ax.plot(alphas_to_test, rcv4.cv_values_.mean(axis=0), color='r')
Out[62]:
In [ ]: