Modified from the github repo: https://github.com/JWarmenhoven/ISLR-python which is based on the book by James et al. Intro to Statistical Learning.
In [5]:
# %load ../standard_import.txt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV, Lasso, LassoCV
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error
%matplotlib inline
plt.style.use('ggplot')
datafolder = "../data/"
In [6]:
# In R, I exported the dataset from package 'ISLR' to a csv file.
df = pd.read_csv(datafolder+'Hitters.csv', index_col=0).dropna()
df.index.name = 'Player'
df.info()
In [7]:
df.head()
Out[7]:
In [8]:
dummies = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
dummies.info()
print(dummies.head())
In [9]:
y = df.Salary
# Drop the column with the independent variable (Salary), and columns for which we created dummy variables
X_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
# Define the feature set X.
X = pd.concat([X_, dummies[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X.info()
In [10]:
X.head(5)
Out[10]:
In [12]:
alphas = 10**np.linspace(10,-2,100)*0.5
ridge = Ridge()
coefs = []
for a in alphas:
ridge.set_params(alpha=a)
ridge.fit(scale(X), y)
coefs.append(ridge.coef_)
ax = plt.gca()
ax.plot(alphas, coefs)
ax.set_xscale('log')
ax.set_xlim(ax.get_xlim()[::-1]) # reverse axis
plt.axis('tight')
plt.xlabel('lambda')
plt.ylabel('weights')
plt.title('Ridge coefficients as a function of the regularization');
The above plot shows that the Ridge coefficients get larger when we decrease lambda.
Exercise 1 Plot the LOO risk and the empirical risk as a function of lambda.
Exercise 2 Implement and test forward stagewise regression (recall that stagewise and stepwise are different).