In [1]:
from __future__ import division
import os
from tqdm import tqdm
from natsort import natsorted
import nibabel as nb
import statistics
# Opens data file and constructs features
def get_data(subdir):
    data = pd.DataFrame(columns=['mean', 'median', 'stdev'])
    for filename in tqdm(natsorted(os.listdir(subdir))):
        path = subdir + filename
        img = nb.load(path).get_data()[..., 0]
        hist = img[img > 0].flatten()
        data = data.append({'mean': hist.mean(),
                            'median': statistics.median(hist),
                            'stdev': statistics.stdev(hist),
                           }, ignore_index=True)
    return data

In [2]:
# Fetch data or open files with data
import pandas as pd
train_features = pd.DataFrame.from_csv('../data/train_data.csv')[["mean", "median", "gm", "age"]]
test_features = pd.DataFrame.from_csv('../data/test_data.csv')[["mean", "median", "gm"]]

In [3]:
# Show pairwise relations between features
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sb
sb.pairplot(train_features, hue='age', palette='Blues')


Out[3]:
<seaborn.axisgrid.PairGrid at 0x7f0e19868d90>

In [5]:
# Illustrate polynomial regression features vs. age
order = 2
sb.lmplot(y='mean', x='age', data=train_features, order=order)
sb.lmplot(y='median', x='age', data=train_features, order=order)
sb.lmplot(y='gm', x='age', data=train_features, order=order)


Out[5]:
<seaborn.axisgrid.FacetGrid at 0x7f0e09218410>

In [6]:
#from sklearn.cross_validation import train_test_split
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
alpha = 40
order = 2
best_estimator_name, best_estimator, best_order, min_error = '', None, 0, 1000
# Train and test model
X = train_features.drop("age", axis=1).as_matrix()
y = train_features["age"].as_matrix()
kf = KFold(n_splits=5)
train_test_split = list(kf.split(X))
normalize = False
for order in range(1, 6):
    for alpha in range(50, 2001, 50):
        estimators = [
            ('LinearRegression', LinearRegression(normalize=normalize)),
            ('Ridge(alpha=%d)' % (alpha), Ridge(alpha=alpha, normalize=normalize)),
            ('Lasso(alpha=%d)' % (alpha), Lasso(alpha=alpha, normalize=normalize))
        ]
        for estimator_name, estimator in estimators:
            model = make_pipeline(PolynomialFeatures(order), estimator)
            errors = []
            for train, test in train_test_split:
                X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test]
                model.fit(X_train, y_train)                    
                predictions = model.predict(X_test)
                errors.append(mean_squared_error(y_test, predictions))
            errors_mean = sum(errors) / len(errors)
            print '%s [%d]: %f' % (estimator_name, order, errors_mean)
            if min_error > errors_mean:
                best_estimator_name = estimator_name
                best_estimator = estimator
                min_error = errors_mean
                best_order = order
            
print "\nBest result:"
print best_estimator_name, best_order, min_error


LinearRegression [1]: 84.856687
Ridge(alpha=50) [1]: 84.856802
Lasso(alpha=50) [1]: 105.530373
LinearRegression [1]: 84.856687
Ridge(alpha=100) [1]: 84.859349
Lasso(alpha=100) [1]: 105.530354
LinearRegression [1]: 84.856687
Ridge(alpha=150) [1]: 84.864217
Lasso(alpha=150) [1]: 105.530337
LinearRegression [1]: 84.856687
Ridge(alpha=200) [1]: 84.871298
Lasso(alpha=200) [1]: 105.530319
LinearRegression [1]: 84.856687
Ridge(alpha=250) [1]: 84.880493
Lasso(alpha=250) [1]: 105.530303
LinearRegression [1]: 84.856687
Ridge(alpha=300) [1]: 84.891705
Lasso(alpha=300) [1]: 105.530286
LinearRegression [1]: 84.856687
Ridge(alpha=350) [1]: 84.904841
Lasso(alpha=350) [1]: 105.530271
LinearRegression [1]: 84.856687
Ridge(alpha=400) [1]: 84.919814
Lasso(alpha=400) [1]: 105.530256
LinearRegression [1]: 84.856687
Ridge(alpha=450) [1]: 84.936538
Lasso(alpha=450) [1]: 105.530241
LinearRegression [1]: 84.856687
Ridge(alpha=500) [1]: 84.954934
Lasso(alpha=500) [1]: 105.530228
LinearRegression [1]: 84.856687
Ridge(alpha=550) [1]: 84.974925
Lasso(alpha=550) [1]: 105.530214
LinearRegression [1]: 84.856687
Ridge(alpha=600) [1]: 84.996437
Lasso(alpha=600) [1]: 105.530202
LinearRegression [1]: 84.856687
Ridge(alpha=650) [1]: 85.019399
Lasso(alpha=650) [1]: 105.530190
LinearRegression [1]: 84.856687
Ridge(alpha=700) [1]: 85.043744
Lasso(alpha=700) [1]: 105.530178
LinearRegression [1]: 84.856687
Ridge(alpha=750) [1]: 85.069408
Lasso(alpha=750) [1]: 105.530167
LinearRegression [1]: 84.856687
Ridge(alpha=800) [1]: 85.096328
Lasso(alpha=800) [1]: 105.530157
LinearRegression [1]: 84.856687
Ridge(alpha=850) [1]: 85.124446
Lasso(alpha=850) [1]: 105.530147
LinearRegression [1]: 84.856687
Ridge(alpha=900) [1]: 85.153706
Lasso(alpha=900) [1]: 105.530138
LinearRegression [1]: 84.856687
Ridge(alpha=950) [1]: 85.184052
Lasso(alpha=950) [1]: 105.530130
LinearRegression [1]: 84.856687
Ridge(alpha=1000) [1]: 85.215434
Lasso(alpha=1000) [1]: 105.530122
LinearRegression [1]: 84.856687
Ridge(alpha=1050) [1]: 85.247801
Lasso(alpha=1050) [1]: 105.530114
LinearRegression [1]: 84.856687
Ridge(alpha=1100) [1]: 85.281106
Lasso(alpha=1100) [1]: 105.530107
LinearRegression [1]: 84.856687
Ridge(alpha=1150) [1]: 85.315303
Lasso(alpha=1150) [1]: 105.530101
LinearRegression [1]: 84.856687
Ridge(alpha=1200) [1]: 85.350349
Lasso(alpha=1200) [1]: 105.530095
LinearRegression [1]: 84.856687
Ridge(alpha=1250) [1]: 85.386201
Lasso(alpha=1250) [1]: 105.530090
LinearRegression [1]: 84.856687
Ridge(alpha=1300) [1]: 85.422820
Lasso(alpha=1300) [1]: 105.530086
LinearRegression [1]: 84.856687
Ridge(alpha=1350) [1]: 85.460167
Lasso(alpha=1350) [1]: 105.530082
LinearRegression [1]: 84.856687
Ridge(alpha=1400) [1]: 85.498205
Lasso(alpha=1400) [1]: 105.530079
LinearRegression [1]: 84.856687
Ridge(alpha=1450) [1]: 85.536898
Lasso(alpha=1450) [1]: 105.530076
LinearRegression [1]: 84.856687
Ridge(alpha=1500) [1]: 85.576214
Lasso(alpha=1500) [1]: 105.530074
LinearRegression [1]: 84.856687
Ridge(alpha=1550) [1]: 85.616119
Lasso(alpha=1550) [1]: 105.530072
LinearRegression [1]: 84.856687
Ridge(alpha=1600) [1]: 85.656582
Lasso(alpha=1600) [1]: 105.530071
LinearRegression [1]: 84.856687
Ridge(alpha=1650) [1]: 85.697573
Lasso(alpha=1650) [1]: 105.530071
LinearRegression [1]: 84.856687
Ridge(alpha=1700) [1]: 85.739064
Lasso(alpha=1700) [1]: 105.530071
LinearRegression [1]: 84.856687
Ridge(alpha=1750) [1]: 85.781027
Lasso(alpha=1750) [1]: 105.530072
LinearRegression [1]: 84.856687
Ridge(alpha=1800) [1]: 85.823435
Lasso(alpha=1800) [1]: 105.530073
LinearRegression [1]: 84.856687
Ridge(alpha=1850) [1]: 85.866263
Lasso(alpha=1850) [1]: 105.530075
LinearRegression [1]: 84.856687
Ridge(alpha=1900) [1]: 85.909487
Lasso(alpha=1900) [1]: 105.530077
LinearRegression [1]: 84.856687
Ridge(alpha=1950) [1]: 85.953084
Lasso(alpha=1950) [1]: 105.530080
LinearRegression [1]: 84.856687
Ridge(alpha=2000) [1]: 85.997030
Lasso(alpha=2000) [1]: 105.530084
LinearRegression [2]: 72.190534
Ridge(alpha=50) [2]: 70.741325
/home/abis_m/.pyenv/versions/2.7.9/lib/python2.7/site-packages/scikit_learn-0.18-py2.7-linux-x86_64.egg/sklearn/linear_model/coordinate_descent.py:479: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Fitting data with very small alpha may cause precision problems.
  ConvergenceWarning)
Lasso(alpha=50) [2]: 73.938098
LinearRegression [2]: 72.190534
Ridge(alpha=100) [2]: 70.736527
Lasso(alpha=100) [2]: 73.941909
LinearRegression [2]: 72.190534
Ridge(alpha=150) [2]: 70.734848
Lasso(alpha=150) [2]: 73.944375
LinearRegression [2]: 72.190534
Ridge(alpha=200) [2]: 70.733948
Lasso(alpha=200) [2]: 73.945163
LinearRegression [2]: 72.190534
Ridge(alpha=250) [2]: 70.733361
Lasso(alpha=250) [2]: 73.942471
LinearRegression [2]: 72.190534
Ridge(alpha=300) [2]: 70.732929
Lasso(alpha=300) [2]: 73.931327
LinearRegression [2]: 72.190534
Ridge(alpha=350) [2]: 70.732587
Lasso(alpha=350) [2]: 73.919597
LinearRegression [2]: 72.190534
Ridge(alpha=400) [2]: 70.732300
Lasso(alpha=400) [2]: 73.909836
LinearRegression [2]: 72.190534
Ridge(alpha=450) [2]: 70.732051
Lasso(alpha=450) [2]: 73.890680
LinearRegression [2]: 72.190534
Ridge(alpha=500) [2]: 70.731827
Lasso(alpha=500) [2]: 73.907233
LinearRegression [2]: 72.190534
Ridge(alpha=550) [2]: 70.731622
Lasso(alpha=550) [2]: 73.906213
LinearRegression [2]: 72.190534
Ridge(alpha=600) [2]: 70.731432
Lasso(alpha=600) [2]: 73.905378
LinearRegression [2]: 72.190534
Ridge(alpha=650) [2]: 70.731252
Lasso(alpha=650) [2]: 73.905391
LinearRegression [2]: 72.190534
Ridge(alpha=700) [2]: 70.731082
Lasso(alpha=700) [2]: 73.903145
LinearRegression [2]: 72.190534
Ridge(alpha=750) [2]: 70.730918
Lasso(alpha=750) [2]: 73.894972
LinearRegression [2]: 72.190534
Ridge(alpha=800) [2]: 70.730759
Lasso(alpha=800) [2]: 73.888212
LinearRegression [2]: 72.190534
Ridge(alpha=850) [2]: 70.730606
Lasso(alpha=850) [2]: 73.882924
LinearRegression [2]: 72.190534
Ridge(alpha=900) [2]: 70.730456
Lasso(alpha=900) [2]: 73.874170
LinearRegression [2]: 72.190534
Ridge(alpha=950) [2]: 70.730309
Lasso(alpha=950) [2]: 73.865244
LinearRegression [2]: 72.190534
Ridge(alpha=1000) [2]: 70.730165
Lasso(alpha=1000) [2]: 73.857602
LinearRegression [2]: 72.190534
Ridge(alpha=1050) [2]: 70.730024
Lasso(alpha=1050) [2]: 73.850099
LinearRegression [2]: 72.190534
Ridge(alpha=1100) [2]: 70.729884
Lasso(alpha=1100) [2]: 73.842729
LinearRegression [2]: 72.190534
Ridge(alpha=1150) [2]: 70.729747
Lasso(alpha=1150) [2]: 73.835490
LinearRegression [2]: 72.190534
Ridge(alpha=1200) [2]: 70.729611
Lasso(alpha=1200) [2]: 73.828381
LinearRegression [2]: 72.190534
Ridge(alpha=1250) [2]: 70.729476
Lasso(alpha=1250) [2]: 73.821403
LinearRegression [2]: 72.190534
Ridge(alpha=1300) [2]: 70.729343
Lasso(alpha=1300) [2]: 73.814554
LinearRegression [2]: 72.190534
Ridge(alpha=1350) [2]: 70.729211
Lasso(alpha=1350) [2]: 73.807835
LinearRegression [2]: 72.190534
Ridge(alpha=1400) [2]: 70.729079
Lasso(alpha=1400) [2]: 73.802319
LinearRegression [2]: 72.190534
Ridge(alpha=1450) [2]: 70.728949
Lasso(alpha=1450) [2]: 73.797295
LinearRegression [2]: 72.190534
Ridge(alpha=1500) [2]: 70.728820
Lasso(alpha=1500) [2]: 73.792347
LinearRegression [2]: 72.190534
Ridge(alpha=1550) [2]: 70.728691
Lasso(alpha=1550) [2]: 73.787513
LinearRegression [2]: 72.190534
Ridge(alpha=1600) [2]: 70.728563
Lasso(alpha=1600) [2]: 73.784280
LinearRegression [2]: 72.190534
Ridge(alpha=1650) [2]: 70.728435
Lasso(alpha=1650) [2]: 73.782397
LinearRegression [2]: 72.190534
Ridge(alpha=1700) [2]: 70.728308
Lasso(alpha=1700) [2]: 73.780624
LinearRegression [2]: 72.190534
Ridge(alpha=1750) [2]: 70.728182
Lasso(alpha=1750) [2]: 73.778970
LinearRegression [2]: 72.190534
Ridge(alpha=1800) [2]: 70.728056
Lasso(alpha=1800) [2]: 73.777437
LinearRegression [2]: 72.190534
Ridge(alpha=1850) [2]: 70.727930
Lasso(alpha=1850) [2]: 73.776024
LinearRegression [2]: 72.190534
Ridge(alpha=1900) [2]: 70.727805
Lasso(alpha=1900) [2]: 73.774735
LinearRegression [2]: 72.190534
Ridge(alpha=1950) [2]: 70.727681
Lasso(alpha=1950) [2]: 73.773566
LinearRegression [2]: 72.190534
Ridge(alpha=2000) [2]: 70.727556
Lasso(alpha=2000) [2]: 73.772519
LinearRegression [3]: 69.249970
Ridge(alpha=50) [3]: 69.770441
Lasso(alpha=50) [3]: 71.256348
LinearRegression [3]: 69.249970
Ridge(alpha=100) [3]: 69.704635
Lasso(alpha=100) [3]: 71.241471
LinearRegression [3]: 69.249970
Ridge(alpha=150) [3]: 69.677038
Lasso(alpha=150) [3]: 71.226244
LinearRegression [3]: 69.249970
Ridge(alpha=200) [3]: 69.660344
Lasso(alpha=200) [3]: 71.211701
LinearRegression [3]: 69.249970
Ridge(alpha=250) [3]: 69.648268
Lasso(alpha=250) [3]: 71.190876
LinearRegression [3]: 69.249970
Ridge(alpha=300) [3]: 69.638614
Lasso(alpha=300) [3]: 71.166389
LinearRegression [3]: 69.249970
Ridge(alpha=350) [3]: 69.630390
Lasso(alpha=350) [3]: 71.142674
LinearRegression [3]: 69.249970
Ridge(alpha=400) [3]: 69.623099
Lasso(alpha=400) [3]: 71.131789
LinearRegression [3]: 69.249970
Ridge(alpha=450) [3]: 69.616453
Lasso(alpha=450) [3]: 71.130271
LinearRegression [3]: 69.249970
Ridge(alpha=500) [3]: 69.610283
Lasso(alpha=500) [3]: 71.127962
LinearRegression [3]: 69.249970
Ridge(alpha=550) [3]: 69.604470
Lasso(alpha=550) [3]: 71.125757
LinearRegression [3]: 69.249970
Ridge(alpha=600) [3]: 69.598944
Lasso(alpha=600) [3]: 71.119841
LinearRegression [3]: 69.249970
Ridge(alpha=650) [3]: 69.593651
Lasso(alpha=650) [3]: 71.111217
LinearRegression [3]: 69.249970
Ridge(alpha=700) [3]: 69.588561
Lasso(alpha=700) [3]: 71.103309
LinearRegression [3]: 69.249970
Ridge(alpha=750) [3]: 69.583620
Lasso(alpha=750) [3]: 71.095925
LinearRegression [3]: 69.249970
Ridge(alpha=800) [3]: 69.578842
Lasso(alpha=800) [3]: 71.076438
LinearRegression [3]: 69.249970
Ridge(alpha=850) [3]: 69.574187
Lasso(alpha=850) [3]: 71.063724
LinearRegression [3]: 69.249970
Ridge(alpha=900) [3]: 69.569645
Lasso(alpha=900) [3]: 71.051124
LinearRegression [3]: 69.249970
Ridge(alpha=950) [3]: 69.565212
Lasso(alpha=950) [3]: 71.038617
LinearRegression [3]: 69.249970
Ridge(alpha=1000) [3]: 69.560862
Lasso(alpha=1000) [3]: 71.026190
LinearRegression [3]: 69.249970
Ridge(alpha=1050) [3]: 69.556617
Lasso(alpha=1050) [3]: 71.013834
LinearRegression [3]: 69.249970
Ridge(alpha=1100) [3]: 69.552444
Lasso(alpha=1100) [3]: 71.001540
LinearRegression [3]: 69.249970
Ridge(alpha=1150) [3]: 69.548356
Lasso(alpha=1150) [3]: 70.989303
LinearRegression [3]: 69.249970
Ridge(alpha=1200) [3]: 69.544340
Lasso(alpha=1200) [3]: 70.977120
LinearRegression [3]: 69.249970
Ridge(alpha=1250) [3]: 69.540397
Lasso(alpha=1250) [3]: 70.964987
LinearRegression [3]: 69.249970
Ridge(alpha=1300) [3]: 69.536517
Lasso(alpha=1300) [3]: 70.952902
LinearRegression [3]: 69.249970
Ridge(alpha=1350) [3]: 69.532699
Lasso(alpha=1350) [3]: 70.940863
LinearRegression [3]: 69.249970
Ridge(alpha=1400) [3]: 69.528935
Lasso(alpha=1400) [3]: 70.928869
LinearRegression [3]: 69.249970
Ridge(alpha=1450) [3]: 69.525237
Lasso(alpha=1450) [3]: 70.916918
LinearRegression [3]: 69.249970
Ridge(alpha=1500) [3]: 69.521604
Lasso(alpha=1500) [3]: 70.905009
LinearRegression [3]: 69.249970
Ridge(alpha=1550) [3]: 69.518005
Lasso(alpha=1550) [3]: 70.893141
LinearRegression [3]: 69.249970
Ridge(alpha=1600) [3]: 69.514472
Lasso(alpha=1600) [3]: 70.881315
LinearRegression [3]: 69.249970
Ridge(alpha=1650) [3]: 69.510987
Lasso(alpha=1650) [3]: 70.869529
LinearRegression [3]: 69.249970
Ridge(alpha=1700) [3]: 69.507551
Lasso(alpha=1700) [3]: 70.857782
LinearRegression [3]: 69.249970
Ridge(alpha=1750) [3]: 69.504156
Lasso(alpha=1750) [3]: 70.846075
LinearRegression [3]: 69.249970
Ridge(alpha=1800) [3]: 69.500821
Lasso(alpha=1800) [3]: 70.834408
LinearRegression [3]: 69.249970
Ridge(alpha=1850) [3]: 69.497523
Lasso(alpha=1850) [3]: 70.822778
LinearRegression [3]: 69.249970
Ridge(alpha=1900) [3]: 69.494277
Lasso(alpha=1900) [3]: 70.811188
LinearRegression [3]: 69.249970
Ridge(alpha=1950) [3]: 69.491062
Lasso(alpha=1950) [3]: 70.799636
LinearRegression [3]: 69.249970
Ridge(alpha=2000) [3]: 69.487905
Lasso(alpha=2000) [3]: 70.788122
LinearRegression [4]: 67.533813
Ridge(alpha=50) [4]: 77.198193
Lasso(alpha=50) [4]: 67.409057
LinearRegression [4]: 67.533813
Ridge(alpha=100) [4]: 77.516811
Lasso(alpha=100) [4]: 67.398954
LinearRegression [4]: 67.533813
Ridge(alpha=150) [4]: 76.486395
Lasso(alpha=150) [4]: 67.389243
LinearRegression [4]: 67.533813
Ridge(alpha=200) [4]: 77.577928
Lasso(alpha=200) [4]: 67.377519
LinearRegression [4]: 67.533813
Ridge(alpha=250) [4]: 76.722157
Lasso(alpha=250) [4]: 67.363138
LinearRegression [4]: 67.533813
Ridge(alpha=300) [4]: 78.999317
Lasso(alpha=300) [4]: 67.342853
LinearRegression [4]: 67.533813
Ridge(alpha=350) [4]: 77.924397
Lasso(alpha=350) [4]: 67.330672
LinearRegression [4]: 67.533813
Ridge(alpha=400) [4]: 77.885119
Lasso(alpha=400) [4]: 67.322193
LinearRegression [4]: 67.533813
Ridge(alpha=450) [4]: 79.630949
Lasso(alpha=450) [4]: 67.312406
LinearRegression [4]: 67.533813
Ridge(alpha=500) [4]: 78.279183
Lasso(alpha=500) [4]: 67.301386
LinearRegression [4]: 67.533813
Ridge(alpha=550) [4]: 79.630832
Lasso(alpha=550) [4]: 67.280899
LinearRegression [4]: 67.533813
Ridge(alpha=600) [4]: 79.746950
Lasso(alpha=600) [4]: 67.271861
LinearRegression [4]: 67.533813
Ridge(alpha=650) [4]: 79.314344
Lasso(alpha=650) [4]: 67.262558
LinearRegression [4]: 67.533813
Ridge(alpha=700) [4]: 78.487299
Lasso(alpha=700) [4]: 67.253222
LinearRegression [4]: 67.533813
Ridge(alpha=750) [4]: 78.825355
Lasso(alpha=750) [4]: 67.243888
LinearRegression [4]: 67.533813
Ridge(alpha=800) [4]: 79.069314
Lasso(alpha=800) [4]: 67.234572
LinearRegression [4]: 67.533813
Ridge(alpha=850) [4]: 79.944008
Lasso(alpha=850) [4]: 67.225282
LinearRegression [4]: 67.533813
Ridge(alpha=900) [4]: 79.536546
Lasso(alpha=900) [4]: 67.216023
LinearRegression [4]: 67.533813
Ridge(alpha=950) [4]: 79.660151
Lasso(alpha=950) [4]: 67.206795
LinearRegression [4]: 67.533813
Ridge(alpha=1000) [4]: 81.172883
Lasso(alpha=1000) [4]: 67.197600
LinearRegression [4]: 67.533813
Ridge(alpha=1050) [4]: 80.303134
Lasso(alpha=1050) [4]: 67.188438
LinearRegression [4]: 67.533813
Ridge(alpha=1100) [4]: 80.043220
Lasso(alpha=1100) [4]: 67.179309
LinearRegression [4]: 67.533813
Ridge(alpha=1150) [4]: 79.885794
Lasso(alpha=1150) [4]: 67.170214
LinearRegression [4]: 67.533813
Ridge(alpha=1200) [4]: 80.637951
Lasso(alpha=1200) [4]: 67.161151
LinearRegression [4]: 67.533813
Ridge(alpha=1250) [4]: 80.054765
Lasso(alpha=1250) [4]: 67.152121
LinearRegression [4]: 67.533813
Ridge(alpha=1300) [4]: 81.244288
Lasso(alpha=1300) [4]: 67.143124
LinearRegression [4]: 67.533813
Ridge(alpha=1350) [4]: 81.545137
Lasso(alpha=1350) [4]: 67.134159
LinearRegression [4]: 67.533813
Ridge(alpha=1400) [4]: 80.923199
Lasso(alpha=1400) [4]: 67.125226
LinearRegression [4]: 67.533813
Ridge(alpha=1450) [4]: 82.272327
Lasso(alpha=1450) [4]: 67.116325
LinearRegression [4]: 67.533813
Ridge(alpha=1500) [4]: 80.426348
Lasso(alpha=1500) [4]: 67.107457
LinearRegression [4]: 67.533813
Ridge(alpha=1550) [4]: 82.253117
Lasso(alpha=1550) [4]: 67.098620
LinearRegression [4]: 67.533813
Ridge(alpha=1600) [4]: 83.242110
Lasso(alpha=1600) [4]: 67.089814
LinearRegression [4]: 67.533813
Ridge(alpha=1650) [4]: 82.533024
Lasso(alpha=1650) [4]: 67.081041
LinearRegression [4]: 67.533813
Ridge(alpha=1700) [4]: 80.036054
Lasso(alpha=1700) [4]: 67.072298
LinearRegression [4]: 67.533813
Ridge(alpha=1750) [4]: 82.095260
Lasso(alpha=1750) [4]: 67.063588
LinearRegression [4]: 67.533813
Ridge(alpha=1800) [4]: 80.862675
Lasso(alpha=1800) [4]: 67.054909
LinearRegression [4]: 67.533813
Ridge(alpha=1850) [4]: 81.831519
Lasso(alpha=1850) [4]: 67.046261
LinearRegression [4]: 67.533813
Ridge(alpha=1900) [4]: 85.441974
Lasso(alpha=1900) [4]: 67.037644
LinearRegression [4]: 67.533813
Ridge(alpha=1950) [4]: 82.158284
Lasso(alpha=1950) [4]: 67.029059
LinearRegression [4]: 67.533813
Ridge(alpha=2000) [4]: 81.022518
Lasso(alpha=2000) [4]: 67.020505
LinearRegression [5]: 72.870745
Ridge(alpha=50) [5]: 100.652375
Lasso(alpha=50) [5]: 66.185284
LinearRegression [5]: 72.870745
Ridge(alpha=100) [5]: 100.652375
Lasso(alpha=100) [5]: 66.177427
LinearRegression [5]: 72.870745
Ridge(alpha=150) [5]: 100.652375
Lasso(alpha=150) [5]: 66.164842
LinearRegression [5]: 72.870745
Ridge(alpha=200) [5]: 100.652375
Lasso(alpha=200) [5]: 66.151234
LinearRegression [5]: 72.870745
Ridge(alpha=250) [5]: 100.652375
Lasso(alpha=250) [5]: 66.140223
LinearRegression [5]: 72.870745
Ridge(alpha=300) [5]: 100.652375
Lasso(alpha=300) [5]: 66.129704
LinearRegression [5]: 72.870745
Ridge(alpha=350) [5]: 100.652375
Lasso(alpha=350) [5]: 66.112908
LinearRegression [5]: 72.870745
Ridge(alpha=400) [5]: 100.652375
Lasso(alpha=400) [5]: 66.099862
LinearRegression [5]: 72.870745
Ridge(alpha=450) [5]: 100.652375
Lasso(alpha=450) [5]: 66.094082
LinearRegression [5]: 72.870745
Ridge(alpha=500) [5]: 100.652375
Lasso(alpha=500) [5]: 66.088133
LinearRegression [5]: 72.870745
Ridge(alpha=550) [5]: 100.652375
Lasso(alpha=550) [5]: 66.082165
LinearRegression [5]: 72.870745
Ridge(alpha=600) [5]: 100.652375
Lasso(alpha=600) [5]: 66.076211
LinearRegression [5]: 72.870745
Ridge(alpha=650) [5]: 100.652375
Lasso(alpha=650) [5]: 66.070281
LinearRegression [5]: 72.870745
Ridge(alpha=700) [5]: 100.652375
Lasso(alpha=700) [5]: 66.064379
LinearRegression [5]: 72.870745
Ridge(alpha=750) [5]: 100.652375
Lasso(alpha=750) [5]: 66.058504
LinearRegression [5]: 72.870745
Ridge(alpha=800) [5]: 100.652375
Lasso(alpha=800) [5]: 66.052658
LinearRegression [5]: 72.870745
Ridge(alpha=850) [5]: 100.652375
Lasso(alpha=850) [5]: 66.046838
LinearRegression [5]: 72.870745
Ridge(alpha=900) [5]: 100.652375
Lasso(alpha=900) [5]: 66.041046
LinearRegression [5]: 72.870745
Ridge(alpha=950) [5]: 100.652375
Lasso(alpha=950) [5]: 66.035279
LinearRegression [5]: 72.870745
Ridge(alpha=1000) [5]: 100.652375
Lasso(alpha=1000) [5]: 66.029539
LinearRegression [5]: 72.870745
Ridge(alpha=1050) [5]: 100.652375
Lasso(alpha=1050) [5]: 66.023825
LinearRegression [5]: 72.870745
Ridge(alpha=1100) [5]: 100.652375
Lasso(alpha=1100) [5]: 66.018136
LinearRegression [5]: 72.870745
Ridge(alpha=1150) [5]: 100.652375
Lasso(alpha=1150) [5]: 66.012473
LinearRegression [5]: 72.870745
Ridge(alpha=1200) [5]: 100.652375
Lasso(alpha=1200) [5]: 66.006834
LinearRegression [5]: 72.870745
Ridge(alpha=1250) [5]: 100.652375
Lasso(alpha=1250) [5]: 66.001221
LinearRegression [5]: 72.870745
Ridge(alpha=1300) [5]: 100.652375
Lasso(alpha=1300) [5]: 65.995633
LinearRegression [5]: 72.870745
Ridge(alpha=1350) [5]: 100.652375
Lasso(alpha=1350) [5]: 65.990069
LinearRegression [5]: 72.870745
Ridge(alpha=1400) [5]: 100.652375
Lasso(alpha=1400) [5]: 65.984530
LinearRegression [5]: 72.870745
Ridge(alpha=1450) [5]: 100.652375
Lasso(alpha=1450) [5]: 65.979016
LinearRegression [5]: 72.870745
Ridge(alpha=1500) [5]: 100.652375
Lasso(alpha=1500) [5]: 65.973527
LinearRegression [5]: 72.870745
Ridge(alpha=1550) [5]: 100.652375
Lasso(alpha=1550) [5]: 65.968058
LinearRegression [5]: 72.870745
Ridge(alpha=1600) [5]: 100.652375
Lasso(alpha=1600) [5]: 65.962614
LinearRegression [5]: 72.870745
Ridge(alpha=1650) [5]: 100.652375
Lasso(alpha=1650) [5]: 65.957194
LinearRegression [5]: 72.870745
Ridge(alpha=1700) [5]: 100.652375
Lasso(alpha=1700) [5]: 65.951624
LinearRegression [5]: 72.870745
Ridge(alpha=1750) [5]: 100.652375
Lasso(alpha=1750) [5]: 65.946069
LinearRegression [5]: 72.870745
Ridge(alpha=1800) [5]: 100.652375
Lasso(alpha=1800) [5]: 65.940584
LinearRegression [5]: 72.870745
Ridge(alpha=1850) [5]: 100.652375
Lasso(alpha=1850) [5]: 65.935080
LinearRegression [5]: 72.870745
Ridge(alpha=1900) [5]: 100.652375
Lasso(alpha=1900) [5]: 65.929638
LinearRegression [5]: 72.870745
Ridge(alpha=1950) [5]: 100.652375
Lasso(alpha=1950) [5]: 65.924354
LinearRegression [5]: 72.870745
Ridge(alpha=2000) [5]: 100.652375
Lasso(alpha=2000) [5]: 65.919102

Best result:
Lasso(alpha=2000) 5 65.9191023811

In [7]:
best_order = 3
best_estimator = Ridge(alpha=1000)
best_estimator_name = "Ridge"
best_model = make_pipeline(PolynomialFeatures(best_order), best_estimator)
best_model.fit(X, y)


Out[7]:
Pipeline(steps=[('polynomialfeatures', PolynomialFeatures(degree=3, include_bias=True, interaction_only=False)), ('ridge', Ridge(alpha=1000, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001))])

In [8]:
predictions = best_model.predict(test_features)
predictions = [min(max(int(round(i)), 18), 96) for i in predictions]
result = pd.DataFrame({'ID': range(1, len(predictions)+1), 'Prediction': predictions})
print ''
result.to_csv('../data/result_%s-order-%d.csv' % (best_estimator_name, best_order), index=False)




In [10]:
print predictions


[57, 53, 46, 46, 76, 24, 36, 83, 28, 26, 56, 64, 30, 33, 18, 71, 86, 31, 69, 76, 41, 27, 21, 32, 23, 35, 38, 68, 32, 32, 48, 30, 58, 83, 18, 41, 19, 24, 18, 18, 84, 82, 63, 39, 31, 67, 25, 68, 84, 60, 83, 87, 30, 68, 88, 71, 46, 74, 18, 27, 77, 33, 70, 43, 18, 25, 24, 68, 83, 83, 64, 54, 87, 41, 70, 83, 29, 63, 56, 42, 30, 25, 75, 67, 23, 81, 19, 50, 30, 82, 75, 73, 19, 71, 59, 60, 77, 45, 56, 18, 31, 24, 29, 80, 76, 76, 83, 77, 18, 18, 78, 72, 49, 58, 56, 51, 32, 68, 80, 83, 33, 33, 83, 18, 47, 30, 57, 77, 25, 66, 45, 25, 67, 83, 65, 78, 63, 61]

In [ ]: