In [1]:
import random
import csv
import numpy as np
import sklearn.linear_model as lm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures

In [2]:
def load_data(file_path):
    target = 0
    explaining = [1, 2, 3, 4, 5, 6]

    X = [] # Design matrix
    y = []

    with open(file_path) as f:
        reader = csv.reader(f)
        next(reader) # Skip header line
        for row in reader:
            try:
                X.append([float(row[i]) for i in explaining]) # Explaining variables
                y.append(1 / float(row[target])) # Gallons per mile
            except:
                # Skip rows that are not parseable
                pass

    return np.array(X), np.array(y)

In [3]:
X, y = load_data("cars.csv")
X, y


Out[3]:
(array([[   8. ,  307. ,  130. , 3504. ,   12. ,   70. ],
        [   8. ,  350. ,  165. , 3693. ,   11.5,   70. ],
        [   8. ,  318. ,  150. , 3436. ,   11. ,   70. ],
        ...,
        [   4. ,  135. ,   84. , 2295. ,   11.6,   82. ],
        [   4. ,  120. ,   79. , 2625. ,   18.6,   82. ],
        [   4. ,  119. ,   82. , 2720. ,   19.4,   82. ]]),
 array([0.05555556, 0.06666667, 0.05555556, 0.0625    , 0.05882353,
        0.06666667, 0.07142857, 0.07142857, 0.07142857, 0.06666667,
        0.06666667, 0.07142857, 0.06666667, 0.07142857, 0.04166667,
        0.04545455, 0.05555556, 0.04761905, 0.03703704, 0.03846154,
        0.04      , 0.04166667, 0.04      , 0.03846154, 0.04761905,
        0.1       , 0.1       , 0.09090909, 0.11111111, 0.03703704,
        0.03571429, 0.04      , 0.05263158, 0.0625    , 0.05882353,
        0.05263158, 0.05555556, 0.07142857, 0.07142857, 0.07142857,
        0.07142857, 0.08333333, 0.07692308, 0.07692308, 0.05555556,
        0.04545455, 0.05263158, 0.05555556, 0.04347826, 0.03571429,
        0.03333333, 0.03333333, 0.03225806, 0.02857143, 0.03703704,
        0.03846154, 0.04166667, 0.04      , 0.04347826, 0.05      ,
        0.04761905, 0.07692308, 0.07142857, 0.06666667, 0.07142857,
        0.05882353, 0.09090909, 0.07692308, 0.08333333, 0.07692308,
        0.05263158, 0.06666667, 0.07692308, 0.07692308, 0.07142857,
        0.05555556, 0.04545455, 0.04761905, 0.03846154, 0.04545455,
        0.03571429, 0.04347826, 0.03571429, 0.03703704, 0.07692308,
        0.07142857, 0.07692308, 0.07142857, 0.06666667, 0.08333333,
        0.07692308, 0.07692308, 0.07142857, 0.07692308, 0.08333333,
        0.07692308, 0.05555556, 0.0625    , 0.05555556, 0.05555556,
        0.04347826, 0.03846154, 0.09090909, 0.08333333, 0.07692308,
        0.08333333, 0.05555556, 0.05      , 0.04761905, 0.04545455,
        0.05555556, 0.05263158, 0.04761905, 0.03846154, 0.06666667,
        0.0625    , 0.03448276, 0.04166667, 0.05      , 0.05263158,
        0.06666667, 0.04166667, 0.05      , 0.09090909, 0.05      ,
        0.05263158, 0.06666667, 0.03225806, 0.03846154, 0.03125   ,
        0.04      , 0.0625    , 0.0625    , 0.05555556, 0.0625    ,
        0.07692308, 0.07142857, 0.07142857, 0.07142857, 0.03448276,
        0.03846154, 0.03846154, 0.03225806, 0.03125   , 0.03571429,
        0.04166667, 0.03846154, 0.04166667, 0.03846154, 0.03225806,
        0.05263158, 0.05555556, 0.06666667, 0.06666667, 0.0625    ,
        0.06666667, 0.0625    , 0.07142857, 0.05882353, 0.0625    ,
        0.06666667, 0.05555556, 0.04761905, 0.05      , 0.07692308,
        0.03448276, 0.04347826, 0.05      , 0.04347826, 0.04166667,
        0.04      , 0.04166667, 0.05555556, 0.03448276, 0.05263158,
        0.04347826, 0.04347826, 0.04545455, 0.04      , 0.03030303,
        0.03571429, 0.04      , 0.04      , 0.03846154, 0.03703704,
        0.05714286, 0.0625    , 0.06451613, 0.06896552, 0.04545455,
        0.04545455, 0.04166667, 0.04444444, 0.03448276, 0.04081633,
        0.03448276, 0.03030303, 0.05      , 0.05555556, 0.05405405,
        0.05714286, 0.03389831, 0.03125   , 0.03571429, 0.03773585,
        0.05      , 0.07692308, 0.05263158, 0.05263158, 0.06060606,
        0.06060606, 0.07692308, 0.07692308, 0.07692308, 0.03174603,
        0.03333333, 0.02777778, 0.03921569, 0.02985075, 0.05714286,
        0.05882353, 0.06451613, 0.06666667, 0.05714286, 0.04878049,
        0.05263158, 0.05405405, 0.0625    , 0.06451613, 0.06451613,
        0.0625    , 0.03448276, 0.04081633, 0.03846154, 0.03921569,
        0.03278689, 0.02985075, 0.03333333, 0.03278689, 0.04545455,
        0.04651163, 0.04651163, 0.02320186, 0.02770083, 0.0304878 ,
        0.02538071, 0.02770083, 0.05025126, 0.05154639, 0.04950495,
        0.05208333, 0.04878049, 0.04950495, 0.03984064, 0.04878049,
        0.05154639, 0.04854369, 0.04807692, 0.05376344, 0.05524862,
        0.05208333, 0.05649718, 0.05524862, 0.05714286, 0.03333333,
        0.03636364, 0.03676471, 0.03236246, 0.04739336, 0.04310345,
        0.04201681, 0.041841  , 0.04926108, 0.05882353, 0.0462963 ,
        0.0617284 , 0.03174603, 0.03389831, 0.04651163, 0.05050505,
        0.04484305, 0.04950495, 0.04854369, 0.05882353, 0.05681818,
        0.06060606, 0.05494505, 0.0591716 , 0.06451613, 0.05208333,
        0.05405405, 0.03134796, 0.02932551, 0.0280112 , 0.03649635,
        0.03937008, 0.04347826, 0.03676471, 0.041841  , 0.02923977,
        0.02898551, 0.03144654, 0.02680965, 0.03521127, 0.03472222,
        0.03731343, 0.02985075, 0.02409639, 0.02624672, 0.03115265,
        0.02688172, 0.03571429, 0.03787879, 0.04115226, 0.05235602,
        0.02915452, 0.03355705, 0.03194888, 0.02702703, 0.0310559 ,
        0.02145923, 0.03584229, 0.0245098 , 0.02257336, 0.02304147,
        0.02747253, 0.03333333, 0.02242152, 0.0295858 , 0.03355705,
        0.03058104, 0.04219409, 0.02857143, 0.0308642 , 0.03676471,
        0.03759398, 0.03875969, 0.04255319, 0.03333333, 0.02557545,
        0.02564103, 0.02849003, 0.03095975, 0.02702703, 0.0265252 ,
        0.02932551, 0.02881844, 0.02906977, 0.03344482, 0.03030303,
        0.02967359, 0.0308642 , 0.03039514, 0.03164557, 0.03558719,
        0.03257329, 0.03937008, 0.04132231, 0.04464286, 0.03759398,
        0.04950495, 0.05681818, 0.03571429, 0.03703704, 0.02941176,
        0.03225806, 0.03448276, 0.03703704, 0.04166667, 0.02777778,
        0.02702703, 0.03225806, 0.02631579, 0.02777778, 0.02777778,
        0.02777778, 0.02941176, 0.02631579, 0.03125   , 0.02631579,
        0.04      , 0.02631579, 0.03846154, 0.04545455, 0.03125   ,
        0.02777778, 0.03703704, 0.03703704, 0.02272727, 0.03125   ,
        0.03571429, 0.03225806]))

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

features = PolynomialFeatures(4, include_bias=True)
X_train = features.fit_transform(X_train)
X_test = features.fit_transform(X_test)

In [8]:
model = lm.RidgeCV(``alphas=[0.01, 0.05, 0.1, 0.25, 0.5, 1]`, normalize=True)
model.fit(X_train, y_train)


Out[8]:
RidgeCV(alphas=array([0.01, 0.05, 0.1 , 0.25, 0.5 , 1.  ]), cv=None,
    fit_intercept=True, gcv_mode=None, normalize=True, scoring=None,
    store_cv_values=False)

In [9]:
model.score(X_train, y_train)


Out[9]:
0.9130283927581451

In [10]:
model.score(X_test, y_test)


Out[10]:
0.9074452466596641

In [ ]: