In [1]:
# Least Absolute Shrinkage and Selection Operator (LASSO)
# LASSO is similar to Ridge Regression in that it penalizes
# regression by an amount.

In [2]:
from sklearn.datasets import make_regression

In [3]:
reg_data, reg_target = make_regression(n_samples=200, n_features=500,
                                      n_informative=5, noise=5)

In [10]:
from sklearn.linear_model import Lasso
import numpy as np

In [11]:
lasso = Lasso()

In [12]:
lasso


Out[12]:
Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [13]:
# alpha parameter determines how lasso is penalized

In [14]:
lasso.fit(reg_data, reg_target)


Out[14]:
Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [15]:
np.sum(lasso.coef_ != 0)


Out[15]:
7

In [16]:
lasso_0 = Lasso(0)

In [17]:
lasso_0.fit(reg_data, reg_target)


/usr/local/lib/python2.7/site-packages/IPython/kernel/__main__.py:1: UserWarning: With alpha=0, this algorithm does not converge well. You are advised to use the LinearRegression estimator
  if __name__ == '__main__':
/usr/local/lib/python2.7/site-packages/sklearn/linear_model/coordinate_descent.py:432: UserWarning: Coordinate descent with alpha=0 may lead to unexpected results and is discouraged.
  positive)
Out[17]:
Lasso(alpha=0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [18]:
np.sum(lasso_0.coef_ != 0)


Out[18]:
500

In [19]:
# how it works:
# Linear Regression minimizes the squared error.
# Lasso still minimizes the squared error but adds a parameter
# that penalizes the error to induce scarcity.

In [20]:
# Lasso Cross Validation

In [21]:
from sklearn.linear_model import LassoCV

In [22]:
lassocv = LassoCV()
lassocv.fit(reg_data, reg_target)


Out[22]:
LassoCV(alphas=None, copy_X=True, cv=None, eps=0.001, fit_intercept=True,
    max_iter=1000, n_alphas=100, n_jobs=1, normalize=False, positive=False,
    precompute='auto', random_state=None, selection='cyclic', tol=0.0001,
    verbose=False)

In [23]:
lassocv.alpha_


Out[23]:
0.82422898878738993

In [24]:
lassocv.coef_[:5]


Out[24]:
array([-0., -0., -0., -0., -0.])

In [25]:
np.sum(lassocv.coef_ != 0)


Out[25]:
12

In [26]:
# you can use this to remove useless features
mask = lassocv.coef_ != 0
new_reg_data = reg_data[:, mask]
new_reg_data.shape


Out[26]:
(200, 12)

In [ ]: