In [1]:

    
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import time

import pandas as pd
import seaborn as sns



In [2]:

    
import sys
sys.path.append('../code/')



In [3]:

    
!pwd









    



/Users/janet/Machine_Learning_CSE_546/HW2/notebooks



In [4]:

    
from logistic_regression import LogisticRegression
from logistic_regression_batch import LogisticRegressionBatch
from hyperparameter_explorer import HyperparameterExplorer



In [5]:

    
from mnist_helpers import mnist_training, mnist_testing



In [6]:

    
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 4, 3

from sklearn.datasets import make_classification X, y = make_classification(n_samples=20, n_features=5, n_informative=5, n_redundant=0, n_repeated=0, n_classes=3, n_clusters_per_class=1, weights=None, flip_y=0.001, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None) sp = 10 train_X = X[1:sp,] train_y = y[1:sp,] test_X = X[sp:,] test_y = y[sp:,]



In [7]:

    
train_X, train_y = mnist_training(shuffled=True)
test_X, test_y = mnist_testing(shuffled=True)

hyper_explorer_batch = HyperparameterExplorer(X=train_X, y=train_y, model=LogisticRegressionBatch, validation_split=0.1, score_name = '-(log loss)/N, training', use_prev_best_weights=True, test_X=test_X, test_y=test_y) # need test_X, test_y for loss w/ fit plot



In [8]:

    
hyper_explorer_sgd = HyperparameterExplorer(X=train_X, y=train_y, 
                                        model=LogisticRegression, 
                                        validation_split=0.1, 
                                        score_name = '-(log loss)/N, training', 
                                        use_prev_best_weights=True,
                                        test_X=test_X, test_y=test_y)  # need test_X, test_y for loss w/ fit plot









    



6000 of 60000 points from training are reserved for validation
variances of all training data: 8.347744528888889
variances of split-off training & validation data: 8.34820004080933, 8.342451972222223



In [9]:

    
eta0=1
batch_size = 1000
hyper_explorer_sgd.train_model(lam=0, eta0=eta0, max_iter=10**2, #10**2, 
                               batch_size=batch_size, progress_monitoring_freq=batch_size*150,
                               delta_percent=.01, verbose=True)









    



loop through all the data. 1th time
.loop through all the data. 2th time
.loop through all the data. 3th time
.loop through all the data. 4th time
.loop through all the data. 5th time
.loop through all the data. 6th time
.loop through all the data. 7th time
.loop through all the data. 8th time
.loop through all the data. 9th time
.0.335977893519
loop through all the data. 10th time
.loop through all the data. 11th time
.loop through all the data. 12th time
.loop through all the data. 13th time
.loop through all the data. 14th time
.loop through all the data. 15th time
.loop through all the data. 16th time
.loop through all the data. 17th time
.loop through all the data. 18th time
.loop through all the data. 19th time
.0.314177673055
loop through all the data. 20th time
.loop through all the data. 21th time
.loop through all the data. 22th time
.loop through all the data. 23th time
.loop through all the data. 24th time
.loop through all the data. 25th time
.loop through all the data. 26th time
.loop through all the data. 27th time
.loop through all the data. 28th time
.loop through all the data. 29th time
.0.315779092792
loop through all the data. 30th time
.loop through all the data. 31th time
.loop through all the data. 32th time
.loop through all the data. 33th time
.loop through all the data. 34th time
.loop through all the data. 35th time
.loop through all the data. 36th time
.loop through all the data. 37th time
.loop through all the data. 38th time
.loop through all the data. 39th time
.0.30136961778
loop through all the data. 40th time
.loop through all the data. 41th time
.loop through all the data. 42th time
.loop through all the data. 43th time
.loop through all the data. 44th time
.loop through all the data. 45th time
.loop through all the data. 46th time
.loop through all the data. 47th time
.Loss optimized.  Old/N: 0.2978014706251266, new/N:0.29779033330150134. Eta: 0.14907119849998599
final normalized training -(log loss): 0.29779033330150134
-(log loss)/N, training:0.29779033330150134



In [11]:

    
eta0=1
batch_size = 1000
hyper_explorer_sgd.train_model(lam=0, eta0=eta0, max_iter=10**2, #10**2, 
                               batch_size=batch_size, progress_monitoring_freq=batch_size*150,
                               delta_percent=.01, verbose=True)









    



returning best weights for lambda = 0.  Corresponded to -(log loss)/N, validation = 0.3311562261300456
loop through all the data. 1th time
.loop through all the data. 2th time
.loop through all the data. 3th time
.loop through all the data. 4th time
.loop through all the data. 5th time
.loop through all the data. 6th time
.loop through all the data. 7th time
.loop through all the data. 8th time
.loop through all the data. 9th time
.0.301546137993
loop through all the data. 10th time
.loop through all the data. 11th time
.loop through all the data. 12th time
.loop through all the data. 13th time
.loop through all the data. 14th time
.loop through all the data. 15th time
.loop through all the data. 16th time
.loop through all the data. 17th time
.loop through all the data. 18th time
.loop through all the data. 19th time
.0.289412514209
loop through all the data. 20th time
.loop through all the data. 21th time
.loop through all the data. 22th time
.loop through all the data. 23th time
.loop through all the data. 24th time
.loop through all the data. 25th time
.loop through all the data. 26th time
.loop through all the data. 27th time
.loop through all the data. 28th time
.loop through all the data. 29th time
.0.291506027932
loop through all the data. 30th time
.loop through all the data. 31th time
.loop through all the data. 32th time
.loop through all the data. 33th time
.loop through all the data. 34th time
.loop through all the data. 35th time
.loop through all the data. 36th time
.loop through all the data. 37th time
.loop through all the data. 38th time
.loop through all the data. 39th time
.0.282325043425
loop through all the data. 40th time
.loop through all the data. 41th time
.loop through all the data. 42th time
.loop through all the data. 43th time
.loop through all the data. 44th time
.loop through all the data. 45th time
.loop through all the data. 46th time
.loop through all the data. 47th time
.loop through all the data. 48th time
.loop through all the data. 49th time
.0.279878444513
loop through all the data. 50th time
.loop through all the data. 51th time
.loop through all the data. 52th time
.loop through all the data. 53th time
.loop through all the data. 54th time
.loop through all the data. 55th time
.loop through all the data. 56th time
.loop through all the data. 57th time
.loop through all the data. 58th time
.loop through all the data. 59th time
.0.282103200355
loop through all the data. 60th time
.loop through all the data. 61th time
.loop through all the data. 62th time
.loop through all the data. 63th time
.loop through all the data. 64th time
.loop through all the data. 65th time
.loop through all the data. 66th time
.loop through all the data. 67th time
.loop through all the data. 68th time
.loop through all the data. 69th time
.0.278287045911
loop through all the data. 70th time
.loop through all the data. 71th time
.loop through all the data. 72th time
.loop through all the data. 73th time
.loop through all the data. 74th time
.loop through all the data. 75th time
.loop through all the data. 76th time
.loop through all the data. 77th time
.loop through all the data. 78th time
.loop through all the data. 79th time
.0.280071953144
loop through all the data. 80th time
.loop through all the data. 81th time
.loop through all the data. 82th time
.loop through all the data. 83th time
.loop through all the data. 84th time
.loop through all the data. 85th time
.loop through all the data. 86th time
.loop through all the data. 87th time
.loop through all the data. 88th time
.loop through all the data. 89th time
.0.274707655699
loop through all the data. 90th time
.loop through all the data. 91th time
.loop through all the data. 92th time
.loop through all the data. 93th time
.loop through all the data. 94th time
.loop through all the data. 95th time
.loop through all the data. 96th time
.loop through all the data. 97th time
.loop through all the data. 98th time
.loop through all the data. 99th time
.0.274805927064
max iterations (100) reached.
loop through all the data. 100th time
.final normalized training -(log loss): 0.2738807379238113
-(log loss)/N, training:0.2738807379238113



In [12]:

    
hyper_explorer_sgd.train_on_whole_training_set()









    



getting best model.
{'weights': [array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])], 'eta0': [1], 'batch size': [1000], 'lambda': [0], 'iteration': [5400], '# of passes through N pts': [100], 'lambda normalized': [0.0], 'eta': [0.09901475429766744], 'training 0/1 loss': [4062], '-(log loss)/N, training': [0.27388073792381129], 'training (0/1 loss)/N': [0.075222222222222218], '# nonzero weights': [1187], '-(log loss), training': [14789.559847885808], 'log loss': [-14789.559847885808]}
loop through all the data. 1th time
.loop through all the data. 2th time
.loop through all the data. 3th time
.loop through all the data. 4th time
.loop through all the data. 5th time
.loop through all the data. 6th time
.loop through all the data. 7th time
.loop through all the data. 8th time
.loop through all the data. 9th time
.0.287380888317
loop through all the data. 10th time
.loop through all the data. 11th time
.loop through all the data. 12th time
.loop through all the data. 13th time
.loop through all the data. 14th time
.loop through all the data. 15th time
.loop through all the data. 16th time
.loop through all the data. 17th time
.loop through all the data. 18th time
.loop through all the data. 19th time
.0.286496401455
loop through all the data. 20th time
.loop through all the data. 21th time
.loop through all the data. 22th time
.loop through all the data. 23th time
.loop through all the data. 24th time
.loop through all the data. 25th time
.loop through all the data. 26th time
.loop through all the data. 27th time
.loop through all the data. 28th time
.loop through all the data. 29th time
.0.290723494924
loop through all the data. 30th time
.loop through all the data. 31th time
.loop through all the data. 32th time
.loop through all the data. 33th time
.loop through all the data. 34th time
.loop through all the data. 35th time
.loop through all the data. 36th time
.loop through all the data. 37th time
.loop through all the data. 38th time
.loop through all the data. 39th time
.0.279455317412
loop through all the data. 40th time
.loop through all the data. 41th time
.loop through all the data. 42th time
.loop through all the data. 43th time
.loop through all the data. 44th time
.loop through all the data. 45th time
.loop through all the data. 46th time
.loop through all the data. 47th time
.loop through all the data. 48th time
.loop through all the data. 49th time
.0.275628431166
loop through all the data. 50th time
.loop through all the data. 51th time
.loop through all the data. 52th time
.loop through all the data. 53th time
.loop through all the data. 54th time
.loop through all the data. 55th time
.loop through all the data. 56th time
.loop through all the data. 57th time
.loop through all the data. 58th time
.loop through all the data. 59th time
.0.274658678491
loop through all the data. 60th time
.loop through all the data. 61th time
.loop through all the data. 62th time
.loop through all the data. 63th time
.loop through all the data. 64th time
.loop through all the data. 65th time
.loop through all the data. 66th time
.loop through all the data. 67th time
.loop through all the data. 68th time
.loop through all the data. 69th time
.0.270107507287
loop through all the data. 70th time
.loop through all the data. 71th time
.loop through all the data. 72th time
.loop through all the data. 73th time
.loop through all the data. 74th time
.loop through all the data. 75th time
.loop through all the data. 76th time
.loop through all the data. 77th time
.loop through all the data. 78th time
.loop through all the data. 79th time
.0.274346994278
loop through all the data. 80th time
.loop through all the data. 81th time
.loop through all the data. 82th time
.loop through all the data. 83th time
.loop through all the data. 84th time
.loop through all the data. 85th time
.loop through all the data. 86th time
.loop through all the data. 87th time
.loop through all the data. 88th time
.loop through all the data. 89th time
.0.271852232777
loop through all the data. 90th time
.loop through all the data. 91th time
.loop through all the data. 92th time
.loop through all the data. 93th time
.loop through all the data. 94th time
.loop through all the data. 95th time
.loop through all the data. 96th time
.loop through all the data. 97th time
.loop through all the data. 98th time
.loop through all the data. 99th time
.0.269182624635
max iterations (100) reached.
loop through all the data. 100th time
.final normalized training -(log loss): 0.2687266090942076



In [14]:

    
hyper_explorer_sgd.final_model.plot_test_and_train_01_loss_during_fitting()



In [15]:

    
hyper_explorer_sgd.summary.tail()









    Out[15]:






  
    
      
      # nonzero weights
      # of passes through N pts
      -(log loss), training
      -(log loss)/N, training
      batch size
      eta
      eta0
      iteration
      lambda
      lambda normalized
      log loss
      model number
      training (0/1 loss)/N
      training 0/1 loss
      weights
      -(log loss), validation
      -(log loss)/N, validation
      validation (0/1 loss)/N
      validation 0/1 loss
    
  
  
    
      0
      762
      47
      16080.677998
      0.297790
      1000
      0.149071
      1
      2538
      0
      0.0
      -16080.677998
      1
      0.083407
      4504
      [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...
      1986.937357
      0.331156
      0.095667
      574
    
    
      1
      1187
      100
      14789.559848
      0.273881
      1000
      0.099015
      1
      5400
      0
      0.0
      -14789.559848
      2
      0.075222
      4062
      [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...
      1852.165951
      0.308694
      0.088833
      533



In [13]:

    
hyper_explorer_sgd.evaluate_test_data()









    



                                                                           0
# nonzero weights                                                       1435
# of passes through N pts                                                200
-(log loss), test                                                    2832.44
-(log loss)/N, test                                                 0.283244
batch size                                                              1000
eta                                                                0.0995037
eta0                                                                       1
iteration                                                              11400
lambda                                                                     0
lambda normalized                                                          0
log loss                                                            -2832.44
test (0/1 loss)/N                                                     0.0762
test 0/1 loss                                                            762
weights                    [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...



In [10]:

    
assert False









    



---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-10-6e6df518a476> in <module>()
----> 1 assert False

AssertionError:



In [ ]:

    
hyper_explorer_sgd.summary.tail(3)



In [ ]:

    
hyper_explorer_sgd.best('model').results.tail(3)



In [ ]:

    
hyper

fig, ax = plt.subplots(1, 1, figsize=(4, 3)) plot_data = hyper_explorer_sgd.best('model').results print(plot_data.columns) xval = '# of passes through N pts' colors=['b', 'g'] plt.plot(plot_data[xval], plot_data['-(log loss)/N, training'], linestyle='--', marker='o', color=colors[0]) plt.plot(plot_data[xval], plot_data['-(log loss)/N, testing'], linestyle='--', marker='o', color=colors[1]) plt.legend(loc='best') plt.xlabel(xval) plt.ylabel('-(log loss)/N') ax.axhline(y=0, color='k') plt.tight_layout() #fig.savefig("161031_Q-2-1_norm_log_loss_during_fit--no_starting_weights.pdf")



In [ ]:

    
hyper_explorer_sgd.train_on_whole_training_set(max_iter=10**3)



In [ ]:

    
hyper_explorer_sgd.evaluate_test_data()



In [ ]:

	# nonzero weights	# of passes through N pts	-(log loss), training	-(log loss)/N, training	batch size	eta	eta0	iteration	lambda	lambda normalized	log loss	model number	training (0/1 loss)/N	training 0/1 loss	weights	-(log loss), validation	-(log loss)/N, validation	validation (0/1 loss)/N	validation 0/1 loss
0	762	47	16080.677998	0.297790	1000	0.149071	1	2538	0	0.0	-16080.677998	1	0.083407	4504	[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...	1986.937357	0.331156	0.095667	574
1	1187	100	14789.559848	0.273881	1000	0.099015	1	5400	0	0.0	-14789.559848	2	0.075222	4062	[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...	1852.165951	0.308694	0.088833	533