In [1]:
import numpy as np
import matplotlib as mpl
%matplotlib inline
import time

import pandas as pd
import seaborn as sns

from mnist import MNIST  # public package for making arrays out of MINST data.

In [2]:
import sys
sys.path.append('../code/')

In [3]:
from ridge_regression import RidgeMulti
from hyperparameter_explorer import HyperparameterExplorer

In [4]:
from mnist_helpers import mnist_training, mnist_testing

In [5]:
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 4, 3

In [6]:
train_X, train_y = mnist_training()
test_X, test_y = mnist_testing()


[    0     1     2 ..., 59997 59998 59999]
[   0    1    2 ..., 9997 9998 9999]

Make the big random matrix


In [7]:
R = np.random.normal(size=(train_X.shape[1], 10000))

In [8]:
XR = train_X.dot(R).clip(min=0)

In [9]:
XR = train_X.dot(R)

Do the crazy inversion


In [10]:
hyper_explorer = HyperparameterExplorer(X=XR, y=train_y, 
                                        model=RidgeMulti, 
                                        validation_split=58./60, score_name = 'training RMSE', 
                                        use_prev_best_weights=False,
                                        test_X=test_X.dot(R), test_y=test_y)


58001 of 60000 points from training are reserved for validation
variances of all training data: 8.34774452888889
variances of split-off training & validation data: 8.550363726135203, 8.34000359608884

In [11]:
hyper_explorer.train_model(lam=100, kernelized=True, verbose=True)


prediction time.
invert matrix:
time: Mon Oct 31 08:10:39 2016
done inverting via kernel trick at time: Mon Oct 31 08:10:52 2016
dot with H^T at time: Mon Oct 31 08:10:52 2016
done dotting with H^T at time: Mon Oct 31 08:12:48 2016
Apply weights to H(X): Mon Oct 31 08:12:48 2016
Done applying weights to H(X): Mon Oct 31 08:12:49 2016
prediction time.
Apply weights to H(X): Mon Oct 31 08:12:49 2016
Done applying weights to H(X): Mon Oct 31 08:12:49 2016
Apply weights to H(X): Mon Oct 31 08:12:49 2016
Done applying weights to H(X): Mon Oct 31 08:12:50 2016
Apply weights to H(X): Mon Oct 31 08:12:50 2016
Done applying weights to H(X): Mon Oct 31 08:12:50 2016
prediction time.
Apply weights to H(X): Mon Oct 31 08:12:50 2016
Done applying weights to H(X): Mon Oct 31 08:12:51 2016
Apply weights to H(X): Mon Oct 31 08:12:51 2016
Done applying weights to H(X): Mon Oct 31 08:12:51 2016
Apply weights to H(X): Mon Oct 31 08:12:51 2016
Done applying weights to H(X): Mon Oct 31 08:12:52 2016
training RMSE:0.5049648491280186
prediction time.
Apply weights to H(X): Mon Oct 31 08:14:18 2016
Done applying weights to H(X): Mon Oct 31 08:14:35 2016
Apply weights to H(X): Mon Oct 31 08:14:35 2016
Done applying weights to H(X): Mon Oct 31 08:14:52 2016
Apply weights to H(X): Mon Oct 31 08:14:52 2016
Done applying weights to H(X): Mon Oct 31 08:15:09 2016

In [12]:
# Boo.  We can't run vanilla ridge regression for a N=300 data set w/o the Kernel.
# The inversion would be 10,000 by 10,000, causing a memory error. 
#hyper_explorer.train_model(lam=100, kernelized=False, verbose=True)

In [13]:
hyper_explorer.final_model = hyper_explorer.models[1].copy()

In [14]:
hyper_explorer.evaluate_test_data()


prediction time.
Apply weights to H(X): Mon Oct 31 08:15:19 2016
Done applying weights to H(X): Mon Oct 31 08:15:22 2016
Apply weights to H(X): Mon Oct 31 08:15:22 2016
Done applying weights to H(X): Mon Oct 31 08:15:24 2016
Apply weights to H(X): Mon Oct 31 08:15:24 2016
Done applying weights to H(X): Mon Oct 31 08:15:27 2016
                                                                   0
# nonzero weights                                                  7
kernelized solvin                                               True
lambda                                                           100
test (0/1 loss)/N                                             0.2431
test 0/1 loss                                                   2431
test RMSE                                                    13.5732
test SSE                                                 1.84232e+06
weights            [[3.92729253598e-05, -2.49610644421e-05, 3.389...

In [ ]: