In [1]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import time

import pandas as pd
import seaborn as sns

In [2]:
import sys
sys.path.append('../code/')

In [3]:
!pwd


/Users/janet/Machine_Learning_CSE_546/HW2/notebooks

In [4]:
from logistic_regression import LogisticRegression
from logistic_regression_batch import LogisticRegressionBatch
from hyperparameter_explorer import HyperparameterExplorer

In [5]:
from mnist_helpers import mnist_training, mnist_testing

In [6]:
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 4, 3
from sklearn.datasets import make_classification X, y = make_classification(n_samples=20, n_features=5, n_informative=5, n_redundant=0, n_repeated=0, n_classes=3, n_clusters_per_class=1, weights=None, flip_y=0.001, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None) sp = 10 train_X = X[1:sp,] train_y = y[1:sp,] test_X = X[sp:,] test_y = y[sp:,]

In [7]:
train_X, train_y = mnist_training(shuffled=True)
test_X, test_y = mnist_testing(shuffled=True)


[    0     1     2 ..., 59997 59998 59999]
[   0    1    2 ..., 9997 9998 9999]

In [8]:
hyper_explorer_sgd = HyperparameterExplorer(X=train_X, y=train_y, 
                                        model=LogisticRegression, 
                                        validation_split=0.1, 
                                        score_name = '-(log loss)/N, training', 
                                        use_prev_best_weights=True,
                                        test_X=test_X, test_y=test_y)  # need test_X, test_y for loss w/ fit plot


6000 of 60000 points from training are reserved for validation
variances of all training data: 8.347744528888889
variances of split-off training & validation data: 8.339031241426612, 8.425954555555558

In [9]:
eta0=0.1
batchs = 1000
hyper_explorer_sgd.train_model(lam=0, eta0=eta0, max_iter=10**3, #10**2, 
                               batch_size=batchs, progress_monitoring_freq=batchs*10,
                               delta_percent=.001, verbose=True)


.........0.483632408768
..........0.458715956351
..........0.452085494371
..........0.449190734165
..........0.447780723644
..........0.447119600004
..........0.44688759702
..Loss optimized.  Old/N: 0.4468809456443384, new/N:0.44687678814466. Eta: 0.012309149097933273
final normalized training -(log loss): 0.44687678814466
-(log loss)/N, training:0.44687678814466

In [10]:
hyper_explorer_sgd.train_on_whole_training_set()


getting best model.
{'iteration': [3834], 'log loss': [-24131.346559811638], 'batch size': [1000], 'lambda': [0], 'training (0/1 loss)/N': [0.13342592592592592], 'training 0/1 loss': [7205], '-(log loss), training': [24131.346559811638], '-(log loss)/N, training': [0.44687678814465998], '# of passes through N pts': [71], 'weights': [array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])], 'eta': [0.012309149097933273], 'eta0': [0.1], 'lambda normalized': [0.0], '# nonzero weights': [261]}
.........0.396629050801
..........0.39301178977
.......Loss optimized.  Old/N: 0.39269961077780474, new/N:0.392699352272627. Eta: 0.02
final normalized training -(log loss): 0.392699352272627

In [ ]:
hyper_explorer_sgd.e()

In [ ]:
eta0=0.1
batchs = 1000
hyper_explorer_sgd.train_model(lam=1, eta0=eta0, max_iter=8*10**3, #10**2, 
                               batch_size=batchs, progress_monitoring_freq=batchs*10,
                               delta_percent=.001, verbose=False)

In [ ]:
eta0=0.1
batchs = 1000
hyper_explorer_sgd.train_model(lam=2, eta0=eta0, max_iter=5*10**3, #10**2, 
                               batch_size=batchs, progress_monitoring_freq=batchs*10,
                               delta_percent=.001, verbose=False)

In [ ]:
eta0=0.1
batchs = 1000
hyper_explorer_sgd.train_model(lam=3, eta0=eta0, max_iter=5*10**3, #10**2, 
                               batch_size=batchs, progress_monitoring_freq=batchs*10,
                               delta_percent=.001, verbose=False)

In [ ]:
eta0=0.1
batchs = 1000
hyper_explorer_sgd.train_model(lam=5, eta0=eta0, max_iter=5*10**3, #10**2, 
                               batch_size=batchs, progress_monitoring_freq=batchs*10,
                               delta_percent=.001, verbose=False)

In [ ]:
eta0=0.1
batchs = 1000
hyper_explorer_sgd.train_model(lam=10, eta0=eta0, max_iter=5*10**3, #10**2, 
                               batch_size=batchs, progress_monitoring_freq=batchs*10,
                               delta_percent=.001, verbose=False)

In [ ]:
hyper_explorer_sgd.plot_best_fits(logx=False)

In [ ]:
hyper_explorer_sgd.models[1].plot_test_and_train_log_loss_during_fitting()

In [ ]:
10**5

In [ ]:
hyper_explorer_sgd.summary.tail(3)

In [ ]:
hyper_explorer_sgd.best('model').results.tail(3)

In [ ]:
fig, ax = plt.subplots(1, 1, figsize=(4, 3))
plot_data = hyper_explorer_sgd.best('model').results
print(plot_data.columns)
xval = '# of passes through N pts'
colors=['b', 'g']
plt.plot(plot_data[xval], plot_data['-(log loss)/N, training'], linestyle='--', marker='o', color=colors[0])
plt.plot(plot_data[xval], plot_data['-(log loss)/N, testing'], linestyle='--', marker='o', color=colors[1])
plt.legend(loc='best')
plt.xlabel(xval)
plt.ylabel('-(log loss)/N')
ax.axhline(y=0, color='k')
plt.tight_layout()
#fig.savefig("161031_Q-2-1_norm_log_loss_during_fit--no_starting_weights.pdf")

In [ ]:
hyper_explorer_sgd.train_on_whole_training_set(max_iter=10**3)

In [ ]:
hyper_explorer_sgd.evaluate_test_data()

In [ ]: