notebook.community

Edit and run



In [1]:

    
# Least Absolute Shrinkage and Selection Operator (LASSO)
# LASSO is similar to Ridge Regression in that it penalizes
# regression by an amount.



In [2]:

    
from sklearn.datasets import make_regression



In [3]:

    
reg_data, reg_target = make_regression(n_samples=200, n_features=500,
                                      n_informative=5, noise=5)



In [10]:

    
from sklearn.linear_model import Lasso
import numpy as np



In [11]:

    
lasso = Lasso()



In [12]:

    
lasso









    Out[12]:





Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)



In [13]:

    
# alpha parameter determines how lasso is penalized



In [14]:

    
lasso.fit(reg_data, reg_target)









    Out[14]:





Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)



In [15]:

    
np.sum(lasso.coef_ != 0)









    Out[15]:





7



In [16]:

    
lasso_0 = Lasso(0)



In [17]:

    
lasso_0.fit(reg_data, reg_target)









    



/usr/local/lib/python2.7/site-packages/IPython/kernel/__main__.py:1: UserWarning: With alpha=0, this algorithm does not converge well. You are advised to use the LinearRegression estimator
  if __name__ == '__main__':
/usr/local/lib/python2.7/site-packages/sklearn/linear_model/coordinate_descent.py:432: UserWarning: Coordinate descent with alpha=0 may lead to unexpected results and is discouraged.
  positive)






    Out[17]:





Lasso(alpha=0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)



In [18]:

    
np.sum(lasso_0.coef_ != 0)









    Out[18]:





500



In [19]:

    
# how it works:
# Linear Regression minimizes the squared error.
# Lasso still minimizes the squared error but adds a parameter
# that penalizes the error to induce scarcity.



In [20]:

    
# Lasso Cross Validation



In [21]:

    
from sklearn.linear_model import LassoCV



In [22]:

    
lassocv = LassoCV()
lassocv.fit(reg_data, reg_target)









    Out[22]:





LassoCV(alphas=None, copy_X=True, cv=None, eps=0.001, fit_intercept=True,
    max_iter=1000, n_alphas=100, n_jobs=1, normalize=False, positive=False,
    precompute='auto', random_state=None, selection='cyclic', tol=0.0001,
    verbose=False)



In [23]:

    
lassocv.alpha_









    Out[23]:





0.82422898878738993



In [24]:

    
lassocv.coef_[:5]









    Out[24]:





array([-0., -0., -0., -0., -0.])



In [25]:

    
np.sum(lassocv.coef_ != 0)









    Out[25]:





12



In [26]:

    
# you can use this to remove useless features
mask = lassocv.coef_ != 0
new_reg_data = reg_data[:, mask]
new_reg_data.shape









    Out[26]:





(200, 12)



In [ ]: