In [1]:
# Least Absolute Shrinkage and Selection Operator (LASSO)
# LASSO is similar to Ridge Regression in that it penalizes
# regression by an amount.
In [2]:
from sklearn.datasets import make_regression
In [3]:
reg_data, reg_target = make_regression(n_samples=200, n_features=500,
n_informative=5, noise=5)
In [10]:
from sklearn.linear_model import Lasso
import numpy as np
In [11]:
lasso = Lasso()
In [12]:
lasso
Out[12]:
In [13]:
# alpha parameter determines how lasso is penalized
In [14]:
lasso.fit(reg_data, reg_target)
Out[14]:
In [15]:
np.sum(lasso.coef_ != 0)
Out[15]:
In [16]:
lasso_0 = Lasso(0)
In [17]:
lasso_0.fit(reg_data, reg_target)
Out[17]:
In [18]:
np.sum(lasso_0.coef_ != 0)
Out[18]:
In [19]:
# how it works:
# Linear Regression minimizes the squared error.
# Lasso still minimizes the squared error but adds a parameter
# that penalizes the error to induce scarcity.
In [20]:
# Lasso Cross Validation
In [21]:
from sklearn.linear_model import LassoCV
In [22]:
lassocv = LassoCV()
lassocv.fit(reg_data, reg_target)
Out[22]:
In [23]:
lassocv.alpha_
Out[23]:
In [24]:
lassocv.coef_[:5]
Out[24]:
In [25]:
np.sum(lassocv.coef_ != 0)
Out[25]:
In [26]:
# you can use this to remove useless features
mask = lassocv.coef_ != 0
new_reg_data = reg_data[:, mask]
new_reg_data.shape
Out[26]:
In [ ]: