In [1]:
import sys
print(sys.version)


3.5.2 |Continuum Analytics, Inc.| (default, Jul  2 2016, 17:52:12) 
[GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]

In [2]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import time

import pandas as pd
import seaborn as sns

In [3]:
import sys
sys.path.append('../code/')

from mnist_helpers import mnist_training, mnist_testing
from hyperparameter_explorer import HyperparameterExplorer
from least_squares_sgd import LeastSquaresSGD
from kernel import Fourier

In [4]:
X_train_untransformed, y_train = mnist_training(shuffled=False) 
X_train = np.load('../notebooks/data/X_transformed_by_50_components.npy')

X_test_untransformed, y_test = mnist_testing(shuffled=False)
X_test = np.load('../notebooks/data/X_test_transformed_by_50_components.npy')

Can't do with HyperExplorer; not amenable to # of points changing.


In [5]:
ls60k = LeastSquaresSGD(X_train, 
                     y_train,
                     max_epochs=2,
                        eta0_search_start = 100,
                     eta0_max_pts=X_train.shape[0],
                     verbose=True,
                     assess_test_data_during_fitting = False)


No test data was provided.
determine kernel bandwidth using 6000 points.
median distance for 6000 samples from N: 2365.3743022656654
eta0 search begins with eta0 = 100/0.0016666666666666668 = 60000
Determining eta0 using 60000 points
testing eta0 = 0.001666666666666667.  (Try # 1)
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, (done calculating hat{Y})
average error: 0.1.  (step = 0)
Begin epoch 1
............................................................ (epoch complete)
Epoch iteration time: 4.0:31.
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, (done calculating hat{Y})
average error: 0.09281455757552602.  (step = 12000)
fit observation done: 3.0:17.
0.220811049473
Begin epoch 2
............................................................ (epoch complete)
Epoch iteration time: 4.0:16.
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, (done calculating hat{Y})
average error: 0.08352685635424476.  (step = 24000)
fit observation done: 2.0:33.
0.182121471268
Begin epoch 3
............................................................ (epoch complete)
Epoch iteration time: 4.0:14.
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, (done calculating hat{Y})
average error: 0.07559210732930001.  (step = 36000)
fit observation done: 2.0:26.
0.15641874051
Begin epoch 4
............................................................ (epoch complete)
Epoch iteration time: 4.0:6.
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, (done calculating hat{Y})
average error: 0.0714905081902154.  (step = 48000)
fit observation done: 2.0:30.
0.144091278283

!!! Max epochs (5) reached. !!!
final normalized training (square loss): 0.14409127828349433
testing eta0 = 0.008333333333333335.  (Try # 2)
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, (done calculating hat{Y})
average error: 0.1.  (step = 0)
Begin epoch 1
............................................................ (epoch complete)
Epoch iteration time: 3.0:53.
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, (done calculating hat{Y})
average error: 1.2461792335712669e+157.  (step = 12000)
The sum of errors is concerningly big: 1.2461792335712669e+157
fit observation done: 2.0:37.
square loss/N/N grew to inf
Model training raised an exception.
Exploration for good eta0 started at 0.0016666666666666668; stopped passing when eta0  grew to 0.008333333333333335
===== eta0 search landed on 0.001666666666666667, using 60000 points ====
../code/least_squares_sgd.py:306: RuntimeWarning: overflow encountered in multiply
  "Compute Yhat before calling predict, but don't compute too often!"