In [5]:
# Directly applying Bayesian interpretation of priors on the
# coefficients which attracts the mass of the density towards
# the prior (which usually has a mean of 0)
from sklearn.datasets import make_regression

In [6]:
X, y = make_regression(100, 10, n_informative=2, noise=20)

In [7]:
from sklearn.linear_model import BayesianRidge

In [8]:
br = BayesianRidge()

In [9]:
# the two sets of coefficients of interest are alpha_1/alpha2
# and lambda_1/lambda_2.
# the alphas are hyperparameters for the prior over the alpha
# parameter. and the lambdas are hyperparameters of the prior
# over the lambda parameter

In [10]:
# first, fit a linear_model with no mods to hyperparameters
br.fit(X,y)


Out[10]:
BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)

In [11]:
br.coef_


Out[11]:
array([ 73.50971115,   0.20412803,   5.20487135,  -1.4370047 ,
        -0.19369022,  33.19896028,   1.02237584,   3.13697212,
        -2.07951837,   0.80291728])

In [12]:
# Now modify the hyperparameters to see changes in coefs
br_alphas = BayesianRidge(alpha_1=10, lambda_1=10)

In [13]:
br_alphas.fit(X,y)


Out[13]:
BayesianRidge(alpha_1=10, alpha_2=1e-06, compute_score=False, copy_X=True,
       fit_intercept=True, lambda_1=10, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)

In [15]:
br_alphas.coef_


Out[15]:
array([  1.32087522e-07,  -4.84362047e-09,   2.38577101e-08,
        -1.41196553e-09,   1.46258496e-08,   4.67869309e-08,
         2.54396651e-08,  -3.72689544e-08,   7.80047553e-09,
         1.20829071e-08])

In [ ]:
# For Bayesian Ridge Regression we assume a prior over the errors
# and alpha. Both these priors are gamma distributions.
# The gamma distribution is a very flexibile distribution.