``````

In [24]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

``````

# Numpy

``````

In [5]:

## numpy array
a = np.array([1, 4, 6])
print a.shape
print
print np.ones((3, 4))
print
print np.zeros((2, 5))
print
print np.arange(6).reshape(2, 3)
print
print a.T
print
print np.hstack([a, a])
print
print np.vstack([a, a])

``````
``````

(3L,)

[[ 1.  1.  1.  1.]
[ 1.  1.  1.  1.]
[ 1.  1.  1.  1.]]

[[ 0.  0.  0.  0.  0.]
[ 0.  0.  0.  0.  0.]]

[[0 1 2]
[3 4 5]]

[1 4 6]

[1 4 6 1 4 6]

[[1 4 6]
[1 4 6]]

``````
``````

In [6]:

## element wise or matrix multiplication
print np.dot(a, a)  # or a.dot(a)
print
print a*a

``````
``````

53

[ 1 16 36]

``````
``````

In [7]:

# you can convert a 1-d array to a 2-d array with np.newaxis
print 'a:'
print a
print 'a.shape:', a.shape
print
print 'a[np.newaxis] is a 2-d row vector:'
print a[np.newaxis]
print 'a[np.newaxis].shape:', a[np.newaxis].shape
print

print 'a[np.newaxis].T: is a 2-d column vector:'
print a[np.newaxis].T
print 'a[np.newaxis].T.shape:', a[np.newaxis].T.shape
print

``````
``````

a:
[1 4 6]
a.shape: (3L,)

a[np.newaxis] is a 2-d row vector:
[[1 4 6]]
a[np.newaxis].shape: (1L, 3L)

a[np.newaxis].T: is a 2-d column vector:
[[1]
[4]
[6]]
a[np.newaxis].T.shape: (3L, 1L)

``````
``````

In [8]:

# numpy provides a ton of other functions for working with matrices
m = np.array([[1, 2],[3, 4]])
m_inverse = np.linalg.inv(m)
print 'inverse of [[1, 2],[3, 4]]:'
print m_inverse
print

print 'm.dot(m_inverse):'
print m.dot(m_inverse)

``````
``````

inverse of [[1, 2],[3, 4]]:
[[-2.   1. ]
[ 1.5 -0.5]]

m.dot(m_inverse):
[[  1.00000000e+00   1.11022302e-16]
[  0.00000000e+00   1.00000000e+00]]

``````
``````

In [9]:

# and for doing all kinds of sciency type stuff.  like generating random numbers:
np.random.seed(5678)
n = np.random.randn(3, 4)
print 'a matrix with random entries drawn from a Normal(0, 1) distribution:'
print n

``````
``````

a matrix with random entries drawn from a Normal(0, 1) distribution:
[[-0.70978938 -0.01719118  0.31941137 -2.26533107]
[-1.37745366  1.94998073 -0.56381007 -0.84373759]
[ 0.22453858 -0.39137772  0.60550347 -0.68615034]]

``````
``````

In [10]:

np.random.seed(3333)
n_data = 10 # number of data points. i.e. N
n_dim = 5   # number of dimensions of each datapoint.  i.e. D

betas = np.random.randn(n_dim + 1)

X_no_constant = np.random.randn(n_data, n_dim)
print 'X_no_constant:'
print X_no_constant
print

X = np.hstack([np.ones(n_data)[np.newaxis].T, X_no_constant])
y = np.dot(X, betas)

# Tests:
y_expected = np.array([-0.41518357, -9.34696153, 5.08980544,
-0.26983873, -1.47667864, 1.96580794,
6.87009791, -2.07784135, -0.7726816,
-2.74954984])
np.testing.assert_allclose(y, y_expected)
print '****** Tests passed! ******'

``````
``````

X_no_constant:
[[-0.92232935  0.27352359 -0.86339625  1.43766044 -1.71379871]
[ 0.179322   -0.89138595  2.13005603  0.51898975 -0.41875106]
[ 0.34010119 -1.07736609 -1.02314142 -1.02518535  0.40972072]
[ 1.18883814  1.01044759  0.3108216  -1.17868611 -0.49526331]
[-1.50248369 -0.196458    0.34752922 -0.79200465 -0.31534705]
[ 1.73245191 -1.42793626 -0.94376587  0.86823495 -0.95946769]
[-1.07074604 -0.06555247 -2.17689578  1.58538804  1.81492637]
[-0.73706088  0.77546031  0.42653908 -0.51853723 -0.53045538]
[ 1.09620536 -0.69557321  0.03080082  0.25219596 -0.35304303]
[-0.93971165  0.04448078  0.04273069  0.4961477  -1.7673568 ]]

****** Tests passed! ******

``````

# Pandas

``````

In [11]:

b = np.array([[6, 7], [3, 1], [4, 0]])
df = pd.DataFrame(data=b,  columns=['Weight', 'Height'])
print 'b:'
print b
print
print 'DataFame version of b:'
print df
print

``````
``````

b:
[[6 7]
[3 1]
[4 0]]

DataFame version of b:
Weight  Height
0       6       7
1       3       1
2       4       0

``````
``````

In [12]:

``````
``````

In [16]:

# baseball.describe()
# baseball.keys()
# baseball.info()

``````
``````

In [18]:

millionaire_indices = baseball['Salary'] > 1000
# you can use the query indices to look at a subset of your original dataframe
print 'baseball.shape:', baseball.shape
print "baseball[millionaire_indices].shape:", baseball[millionaire_indices].shape

``````
``````

baseball.shape: (337, 18)
baseball[millionaire_indices].shape: (139, 18)

Out[18]:

Salary
AVG
Runs
Name

0
3300
0.272
69
Andre Dawson

1
2600
0.269
58
Steve Buchele

2
2500
0.249
54
Kal Daniels

3
2475
0.260
59
Shawon Dunston

4
2313
0.273
87
Mark Grace

``````
``````

In [19]:

shoe_size_df.shape

``````
``````

Out[19]:

(3, 2)

``````
``````

In [20]:

merged = pd.merge(baseball, shoe_size_df, on=['Name'])
merged

``````
``````

Out[20]:

Salary
AVG
OBP
Runs
Hits
Doubles
Triples
HR
RBI
Walks
SO
SB
Errs
free agency eligibility
free agent in 1991/2
arbitration eligibility
arbitration in 1991/2
Name
Shoe Size

0
3300
0.272
0.302
69
153
21
4
31
104
22
80
4
3
1
0
0
0
Andre Dawson
11

1
2313
0.273
0.346
87
169
28
5
8
58
70
53
3
8
0
0
1
0
Mark Grace
13

2
200
0.203
0.240
39
64
10
1
10
33
14
96
13
6
0
0
0
0
Sammy Sosa
12

``````
``````

In [23]:

merged_outer = pd.merge(baseball, shoe_size_df, on=['Name'], how='outer')

``````
``````

Out[23]:

Salary
AVG
OBP
Runs
Hits
Doubles
Triples
HR
RBI
Walks
SO
SB
Errs
free agency eligibility
free agent in 1991/2
arbitration eligibility
arbitration in 1991/2
Name
Shoe Size

0
3300
0.272
0.302
69
153
21
4
31
104
22
80
4
3
1
0
0
0
Andre Dawson
11

1
2600
0.269
0.335
58
111
17
2
18
66
39
69
0
3
1
1
0
0
Steve Buchele
NaN

2
2500
0.249
0.337
54
115
15
1
17
73
63
116
6
5
1
0
0
0
Kal Daniels
NaN

3
2475
0.260
0.292
59
128
22
7
12
50
23
64
21
21
0
0
1
0
Shawon Dunston
NaN

4
2313
0.273
0.346
87
169
28
5
8
58
70
53
3
8
0
0
1
0
Mark Grace
13

``````

# Plotting with Matplotlib

``````

In [25]:

``````
``````

In [26]:

f = plt.figure()
plt.hist(baseball['Hits'], bins=15)   # plot or scatter
plt.xlabel('Number of Hits')
plt.ylabel('Frequency')
plt.title('Histogram of Number of Hits')
f.set_size_inches(10, 5)
plt.show()

``````
``````

``````

# Sci-Kit Learn Linear Regression

``````

In [31]:

from sklearn import linear_model

``````
``````

In [32]:

## linear regression models
model_lr = linear_model.LinearRegression()
model_ridge = linear_model.Ridge(alpha=1)
model_lasso = linear_model.Lasso(alpha=1)
model_en = linear_model.ElasticNet(alpha=0.5, l1_ratio=0.1)

``````
``````

In [27]:

def mean_squared_error(y_true, y_pred):
"""
calculate the mean_squared_error given a vector of true ys and a vector of predicted ys
"""
diff = y_true - y_pred
return np.dot(diff, diff) / len(diff)

def predict_test_values(model, X_train, y_train, X_test):
model.fit(X_train, y_train)
return model.predict(X_test)

def calc_train_and_test_error(model, X_train, y_train, X_test, y_test):
model.fit(X_train, y_train)
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)
return mean_squared_error(y_train, y_pred_train), mean_squared_error(y_test, y_pred_test)

``````
``````

In [33]:

x_train = data['x_train']
y_train = data['y_train']
x_test = data['x_test']
y_test = data['y_test']

``````
``````

In [34]:

## Model performance

print "Linear Regression Training and Test Errors:"
print calc_train_and_test_error(model_lr, x_train, y_train, x_test, y_test)
print

print "Ridge Regression Training and Test Errors:"
print calc_train_and_test_error(model_ridge, x_train, y_train, x_test, y_test)
print

print "Lasso Regression Training and Test Errors:"
print calc_train_and_test_error(model_lasso, x_train, y_train, x_test, y_test)
print

print 'ElasticNet Training and Test Errors:'
print calc_train_and_test_error(model_en, x_train, y_train, x_test, y_test)
print

``````
``````

Linear Regression Training and Test Errors:
(2.4835421623899702e-05, 283.52728792173116)

Ridge Regression Training and Test Errors:
(0.018634112597992421, 9.5641560683730305)

Lasso Regression Training and Test Errors:
(4.1142351854727677, 4.6028697944107098)

ElasticNet Errors:
(1.9616145613107794, 3.8189893038857918)

``````
``````

In [35]:

n_disp_coefs = 10

print 'Linear Regression Coefficients:'
print model_lr.coef_[:n_disp_coefs]
print

print 'Ridge Regression Coefficients:'
print model_ridge.coef_[:n_disp_coefs]
print

print 'LASSO Coefficients:'
print model_lasso.coef_[:n_disp_coefs]
print

print 'ElasticNet Coefficients:'
print model_en.coef_[:n_disp_coefs]
print

``````
``````

Linear Regression Coefficients:
[  5.22757470e-01   2.78289824e+00   4.04383818e+00   1.17544241e+00
3.13230537e-01  -1.28127160e-01   5.11682173e-01   3.83754833e-03
-1.19481096e+00   9.56448172e-01]

Ridge Regression Coefficients:
[ 1.01611626  1.77246927  3.06534773 -0.0333898   0.04378713  0.10472107
-0.13445823  0.12656315  0.05779722  0.10204281]

LASSO Coefficients:
[ 0.03375129  0.92694409  1.92659636  0.          0.          0.         -0.
0.          0.          0.        ]

ElasticNet Coefficients:
[ 0.61034977  1.16675401  1.79600624  0.          0.          0.00686607
0.          0.02027936  0.00469244  0.00644604]

``````
``````

In [36]:

print "Sum of Linear Regression Coefficients:"
print np.sum(np.abs(model_lr.coef_))
print

print "Sum of Ridge Regression Coefficients:"
print np.sum(np.abs(model_ridge.coef_))
print

print "Sum of Lasso Regression Coefficients:"
print np.sum(np.abs(model_lasso.coef_))
print

print 'Sum of ElasticNet Coefficients'
print np.sum(np.abs(model_en.coef_))
print

``````
``````

Sum of Linear Regression Coefficients:
338.387469048

Sum of Ridge Regression Coefficients:
62.4912904062

Sum of Lasso Regression Coefficients:
2.88729174216

Sum of ElasticNet Coefficients
9.82525057342

``````

# Model Selection

## Types of Cross Validation

### Validation Set Cross Validation

``````

In [37]:

# a helper function for performing validation set cross validation
from sklearn.cross_validation import train_test_split
validation_portion = 0.1
seed = 1234
x_train_small, x_valid, y_train_small, y_valid = \
train_test_split(x_train, y_train, test_size=validation_portion, random_state=seed)

print 'Original Training Set Size:'
print x_train.shape, y_train.shape
print

print 'Reducted Training Set Size:'
print x_train_small.shape, y_train_small.shape
print

print 'Validation Set Size:'
print x_valid.shape, y_valid.shape
print

``````
``````

Original Training Set Size:
(600L, 598L) (600L,)

Reducted Training Set Size:
(540L, 598L) (540L,)

Validation Set Size:
(60L, 598L) (60L,)

``````
``````

In [38]:

def validation_set_error(model, x_train, y_train, validation_portion=0.1, seed=1234):
# FILL IN YOUR CODE HERE

x_train_small, x_valid, y_train_small, y_valid = \
train_test_split(x_train, y_train, test_size=validation_portion, random_state=seed)
model.fit(x_train_small, y_train_small)
y_pred_valid = model.predict(x_valid)
return mean_squared_error(y_valid, y_pred_valid)

# set up models
model_lr_valid = linear_model.LinearRegression()
model_ridge_valid = linear_model.Ridge(alpha=10)

# calculate errors
valid_portion = .1
n_seeds = 5
print "Linear Regression Training and Test Errors:"
# FILL IN YOUR CODE HERE
print calc_train_and_test_error(model_lr_valid, x_train_small, y_train_small, x_test, y_test)

print
print "Linear Regression Validation Errors:"
# FILL IN YOUR CODE HERE
print validation_set_error(model_lr_valid, x_train, y_train, validation_portion=0.1, seed=1234)
print

for seed in range(n_seeds):
print validation_set_error(model_lr_valid, x_train, y_train, validation_portion=valid_portion, seed=seed)
print

print "Ridge Regression Training and Test Errors:"
# FILL IN YOUR CODE HERE
print calc_train_and_test_error(model_ridge_valid, x_train_small, y_train_small, x_test, y_test)

print
print "Ridge Regression Validation Errors:"
# FILL IN YOUR CODE HERE
print validation_set_error(model_ridge_valid, x_train, y_train, validation_portion=0.1, seed=1234)
print

for seed in range(n_seeds):
print validation_set_error(model_ridge_valid, x_train, y_train, validation_portion=valid_portion, seed=seed)
print

``````
``````

Linear Regression Training and Test Errors:
(6.5894013208313341e-28, 9.6373710755996189)

Linear Regression Validation Errors:
9.36759564041

10.4039988935

11.6352333478

8.8241606146

9.20945551949

7.60088829288

Ridge Regression Training and Test Errors:
(0.037116269305341815, 4.8163269566646871)

Ridge Regression Validation Errors:
4.44120540399

3.61817500364

7.12476980873

5.32580668571

5.74292650031

4.6239411424

``````

### K-Fold Cross Validation

``````

In [42]:

# scikit learn provides a useful object to help you perform kfold cross validation
from sklearn.cross_validation import KFold

n_data = len(y_train)
fold_count = 0
for train_reduced_row_ids, valid_row_ids in KFold(n_data, n_folds=4):
print
print
print "FOLD %d:" % fold_count
print "-------"
print("train_ids:\n%s\n\nvalid_ids\n%s" % (train_reduced_row_ids, valid_row_ids))
x_train_reduced = x_train[train_reduced_row_ids]
y_train_reduced = y_train[train_reduced_row_ids]
x_valid = x_train[valid_row_ids]
y_valid = y_train[valid_row_ids]
fold_count += 1

``````
``````

In [43]:

# NOTE: KFolds isn't random at all.  It's important to shuffle your data first before using it.
from sklearn.utils import shuffle
x_train_shuffled, y_train_shuffled = shuffle(x_train, y_train)

``````
``````

In [44]:

def kfold_error(model, x_train, y_train, k=4, seed=1234):
# FILL IN YOUR CODE HERE

# shuffle training data
x_train_shuffled, y_train_shuffled = shuffle(x_train, y_train, random_state=seed)

n_data = len(y_train)
error_sum = 0
for train_reduced_row_ids, valid_row_ids in KFold(n_data, n_folds=k):
x_train_reduced = x_train_shuffled[train_reduced_row_ids]
y_train_reduced = y_train_shuffled[train_reduced_row_ids]
x_valid = x_train_shuffled[valid_row_ids]
y_valid = y_train_shuffled[valid_row_ids]
model.fit(x_train_reduced, y_train_reduced)
y_valid_pred = model.predict(x_valid)
error_sum += mean_squared_error(y_valid, y_valid_pred)
return error_sum*1.0 / k

# set up models
model_lr_valid = linear_model.LinearRegression()
model_ridge_valid = linear_model.Ridge(alpha=10)

# calculate errors
n_seeds = 3
k = 5

print "Linear Regression Training and Test Errors:"
# FILL IN YOUR CODE HERE
print calc_train_and_test_error(model_lr_valid, x_train, y_train, x_test, y_test)

print
print "Linear Regression K-Fold Errors:"
# FILL IN YOUR CODE HERE
print
for seed in range(n_seeds):
print kfold_error(model_lr_valid, x_train, y_train, k=k, seed=seed)
print

print
print "Ridge Regression Training and Test Errors:"
# FILL IN YOUR CODE HERE
print calc_train_and_test_error(model_ridge_valid, x_train, y_train, x_test, y_test)

print
print "Ridge Regression K-Fold Errors:"
# FILL IN YOUR CODE HERE
print
for seed in range(n_seeds):
print kfold_error(model_ridge_valid, x_train, y_train, k=k, seed=seed)
print

``````
``````

Linear Regression Training and Test Errors:
(2.4835421623899702e-05, 283.52728792173116)

Linear Regression K-Fold Errors:

7.21045028087

7.3510411941

6.69216918868

Ridge Regression Training and Test Errors:
(0.064063243432624289, 4.9205415455726982)

Ridge Regression K-Fold Errors:

5.77769677178

5.78170553945

5.6587338965

``````

### Model and Hyperparameter Selection with Cross Validation

``````

In [45]:

def model_name(model):
s = model.__str__().lower()
if "linearregression" in s:
return 'LinearRegression'
elif "lasso" in s:
return 'Lasso(a=%g)' % model.alpha
elif "ridge" in s:
return 'Ridge(a=%g)' % model.alpha
elif "elastic" in s:
return 'ElasticNet(a=%g, r=%g)' % (model.alpha, model.l1_ratio)
else:
raise ValueError("Unknown Model Type")

def create_models(alphas=(.01, .03, .1, .3, 1, 3), l1_ratios=(.7, .5, .3)):
models = [linear_model.LinearRegression()]
models.extend([linear_model.Ridge(a) for a in alphas])
models.extend([linear_model.Lasso(a) for a in alphas])
models.extend([linear_model.ElasticNet(a, l1_ratio=l) for a in alphas for l in l1_ratios])
return models

def results_df(models, betas_true, x_train, y_train, x_test, y_test, k=4):
n_data, n_dim = x_train.shape

n_zeros = n_dim - len(betas_true)

betas_true = np.concatenate([betas_true, np.zeros(n_zeros)])

# fit models to training data
[m.fit(x_train, y_train) for m in models]

betas = np.vstack([betas_true] + [m.coef_ for m in models])
beta_names = ['Beta ' + str(i) for i in range(n_dim)]

# set up model names
model_names =  ["True Coefs"] + [model_name(m) for m in models]
df = pd.DataFrame(data=betas, columns=beta_names, index=model_names)

# calculate training errors
y_preds = [m.predict(x_train) for m in models]
errors = [np.nan] + [mean_squared_error(y_train, y_pred) for y_pred in y_preds]
df['Train Error'] = errors

# calculate validation errors
errors = [np.nan] + [kfold_error(m, x_train, y_train, k=k) for m in models]
df['Cross Validation Error'] = errors

# calculate test errors
y_preds = [m.predict(x_test) for m in models]
errors = [np.nan] + [mean_squared_error(y_test, y_pred) for y_pred in y_preds]
df['Test Error'] = errors

return df

# these are some of the magic parameters that I used to actually
# generate the overfitting dataset
n_dim = 598
n_dim_meaningful = 3
n_dim_disp_extra = 2

# the actual betas used to generate the y values.  the rest were 0.
betas_true = np.arange(n_dim_meaningful) + 1

# create a whole bunch of untrained models
models = create_models(alphas=(.01, .03, .1, .3, 1), l1_ratios=(.9, .7, .5))

#
all_results = results_df(models, betas_true, x_train, y_train, x_test, y_test, k=4)

# decide which columns we want to display
disp_cols = ["Beta " + str(i) for i in range(n_dim_meaningful + n_dim_disp_extra)]
disp_cols += ['Train Error', 'Cross Validation Error', 'Test Error']

# display the results
all_results[disp_cols]

``````
``````

Out[45]:

Beta 0
Beta 1
Beta 2
Beta 3
Beta 4
Train Error
Cross Validation Error
Test Error

True Coefs
1.000000
2.000000
3.000000
0.000000
0.000000
NaN
NaN
NaN

LinearRegression
0.522757
2.782898
4.043838
1.175442
0.313231
0.000025
6.550726
8.569427

Ridge(a=0.01)
0.867059
2.290546
3.729941
0.570987
0.380292
0.001033
6.590630
8.592261

Ridge(a=0.03)
1.028546
2.023949
3.548358
0.237767
0.370161
0.002749
6.588766
8.590441

Ridge(a=0.1)
1.088696
1.847044
3.386562
0.012841
0.272410
0.005693
6.582288
8.584100

Ridge(a=0.3)
1.065763
1.788885
3.247394
-0.050784
0.142292
0.010186
6.564170
8.566235

Ridge(a=1)
1.016116
1.772469
3.065348
-0.033390
0.043787
0.018634
6.504991
8.506474

Lasso(a=0.01)
1.076240
1.956283
2.955116
0.000000
0.025530
0.213388
1.701747
1.849997

Lasso(a=0.03)
1.042435
1.941418
2.952131
0.000000
0.002460
0.526780
1.223279
1.196381

Lasso(a=0.1)
0.972258
1.869852
2.892761
-0.000000
0.000000
0.968836
1.024546
0.895046

Lasso(a=0.3)
0.764523
1.659750
2.677197
0.000000
0.000000
1.235803
1.264843
1.120443

Lasso(a=1)
0.033751
0.926944
1.926596
0.000000
0.000000
4.114235
4.166927
4.301203

ElasticNet(a=0.01, r=0.9)
1.073393
1.951365
2.947436
0.000000
0.027348
0.195976
1.760720
1.935114

ElasticNet(a=0.01, r=0.7)
1.066012
1.936668
2.931448
0.000000
0.031150
0.163034
1.919308
2.167257

ElasticNet(a=0.01, r=0.5)
1.046440
1.907626
2.908069
0.000000
0.029275
0.127897
2.191965
2.583608

ElasticNet(a=0.03, r=0.9)
1.042974
1.931661
2.936235
0.000000
0.008043
0.484044
1.261913
1.240859

ElasticNet(a=0.03, r=0.7)
1.038972
1.908386
2.901951
0.000000
0.014723
0.401288
1.361827
1.360261

ElasticNet(a=0.03, r=0.5)
1.026654
1.885132
2.862618
0.000000
0.020993
0.316085
1.547012
1.592804

ElasticNet(a=0.1, r=0.9)
0.971808
1.860572
2.872529
-0.000000
0.000000
0.960364
1.041134
0.906070

ElasticNet(a=0.1, r=0.7)
0.968971
1.841385
2.828162
-0.000000
0.000000
0.913444
1.092529
0.967566

ElasticNet(a=0.1, r=0.5)
0.961790
1.813670
2.765941
0.000000
0.000000
0.807426
1.215067
1.092389

ElasticNet(a=0.3, r=0.9)
0.771897
1.639918
2.624446
0.000000
0.000000
1.277930
1.307354
1.168137

ElasticNet(a=0.3, r=0.7)
0.785336
1.603633
2.528307
0.000000
0.000000
1.370249
1.400441
1.273228

ElasticNet(a=0.3, r=0.5)
0.797276
1.571245
2.442910
0.000000
0.000000
1.469114
1.512212
1.390285

ElasticNet(a=1, r=0.9)
0.125708
0.934637
1.835475
0.000000
0.000000
4.112881
4.189787
4.313581

ElasticNet(a=1, r=0.7)
0.265489
0.946058
1.698143
0.000000
0.000000
4.168660
4.233218
4.410287

ElasticNet(a=1, r=0.5)
0.366721
0.954115
1.599575
0.000000
0.000000
4.252089
4.309107
4.534828

``````
``````

In [46]:

# scikit learn includes some functions for making cross validation easier
# and computationally faster for a some models
from sklearn import linear_model
model_ridge_cv = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0])
model_lasso_cv = linear_model.LassoCV(alphas=[0.1, 1.0, 10.0])
model_en_cv = linear_model.ElasticNetCV(l1_ratio=[.9], n_alphas=100)

``````
``````

In [47]:

``````
``````

In [ ]:

``````