In [1]:
import numpy as np
import matplotlib as mpl
%matplotlib inline
import time
import pandas as pd
import seaborn as sns
from mnist import MNIST # public package for making arrays out of MINST data.
In [2]:
import sys
sys.path.append('../code/')
In [3]:
from logistic_regression import LogisticRegression, LogisticRegressionBinary
from hyperparameter_explorer import HyperparameterExplorer
In [4]:
from mnist_helpers import mnist_training, mnist_testing, mnist_training_binary, mnist_testing_binary
In [5]:
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 4, 3
In [6]:
train_X, train_y = mnist_training_binary(2, shuffled=True)
test_X, test_y = mnist_testing_binary(2)
In [7]:
print(train_X.shape, train_y.shape)
print(test_X.shape, test_y.shape)
In [8]:
print(np.var(train_y))
print(np.var(test_y))
In [9]:
hyper_explorer = HyperparameterExplorer(X=train_X, y=train_y,
model=LogisticRegressionBinary,
validation_split=0.1,
score_name = '-(log loss)/N, training',
use_prev_best_weights=True,
test_X=test_X, test_y=test_y) # need test_X, test_y for loss w/ fit plot
In [10]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [11]:
hyper_explorer.models
Out[11]:
In [12]:
hyper_explorer.models[1].plot_test_and_train_log_loss_during_fitting()
Out[12]:
In [13]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [14]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [15]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [16]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [17]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [18]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=2*10**2, delta_percent=eta0*10, verbose=True)
In [19]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=2*10**2, delta_percent=eta0*10, verbose=True)
In [20]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=2*10**2, delta_percent=eta0*10, verbose=True)
In [21]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [22]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=2*10**2, delta_percent=eta0*10, verbose=True)
In [23]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=2*10**2, delta_percent=eta0*10, verbose=True)
In [24]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=2*10**2, delta_percent=eta0*10, verbose=True)
Run for much longer...
In [25]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=10**3, delta_percent=eta0*10, verbose=False)
In [26]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=False)
In [27]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=False)
In [28]:
eta0=1e-4
hyper_explorer.train_model(lam=0, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*10, verbose=False)
In [30]:
eta0=2*1e-4
hyper_explorer.train_model(lam=10, eta0=eta0, max_iter=10**3, delta_percent=eta0*10, verbose=False)
In [31]:
eta0=2*1e-4
hyper_explorer.train_model(lam=10, eta0=eta0, max_iter=10**3, delta_percent=eta0*10, verbose=False)
In [32]:
eta0=2*1e-4
hyper_explorer.train_model(lam=10, eta0=eta0, max_iter=10**3, delta_percent=eta0*10, verbose=False)
In [34]:
eta0=2*1e-4
hyper_explorer.train_model(lam=10, eta0=eta0, max_iter=10**3, delta_percent=eta0*10, verbose=False)
In [35]:
eta0=2*1e-4
hyper_explorer.train_model(lam=1e2, eta0=eta0, max_iter=10**3, delta_percent=eta0*10, verbose=False)
In [36]:
eta0=2*1e-4
hyper_explorer.train_model(lam=1e2, eta0=eta0, max_iter=10**3, delta_percent=eta0*10, verbose=False)
In [38]:
eta0=2*1e-4
hyper_explorer.train_model(lam=1e2, eta0=eta0, max_iter=10**3, delta_percent=eta0*10, verbose=False)
In [ ]:
hyper_explorerr_explorer.plot
In [39]:
eta0=2*1e-4
hyper_explorer.train_model(lam=100, eta0=eta0, max_iter=5*10**2, delta_percent=eta0*10, verbose=False)
In [40]:
eta0=1*1e-4
hyper_explorer.train_model(lam=100, eta0=eta0, max_iter=5*10**2, delta_percent=eta0*10, verbose=False)
In [41]:
eta0=1*1e-4
hyper_explorer.train_model(lam=100, eta0=eta0, max_iter=5*10**2, delta_percent=eta0*10, verbose=False)
In [42]:
eta0=1*1e-4
hyper_explorer.train_model(lam=100, eta0=eta0, max_iter=5*10**2, delta_percent=eta0*10, verbose=False)
In [43]:
hyper_explorer.plot_best_fits()
In [44]:
eta0=1*1e-4
hyper_explorer.train_model(lam=1e3, eta0=eta0, max_iter=5*10**2, delta_percent=eta0*10, verbose=False)
In [45]:
eta0=1*1e-4
hyper_explorer.train_model(lam=1e3, eta0=eta0, max_iter=5*10**2, delta_percent=eta0*10, verbose=False)
In [46]:
eta0=1*1e-4
hyper_explorer.train_model(lam=1e3, eta0=eta0, max_iter=5*10**2, delta_percent=eta0*10, verbose=False)
In [47]:
eta0=1*1e-4
hyper_explorer.train_model(lam=1e4, eta0=eta0, max_iter=5*10**2, delta_percent=eta0*10, verbose=False)
In [48]:
eta0=1*1e-4
hyper_explorer.train_model(lam=1e4, eta0=eta0, max_iter=5*10**2, delta_percent=eta0*10, verbose=False)
In [49]:
eta0=1*1e-4
hyper_explorer.train_model(lam=1e4, eta0=eta0, max_iter=5*10**2, delta_percent=eta0*10, verbose=False)
In [50]:
hyper_explorer.plot_best_fits()
In [51]:
eta0=1*1e-4
hyper_explorer.train_model(lam=1e5, eta0=eta0, max_iter=5*10**2, delta_percent=eta0*10, verbose=False)
In [52]:
eta0=1*1e-4
hyper_explorer.train_model(lam=1e5, eta0=eta0, max_iter=5*10**2, delta_percent=eta0*10, verbose=False)
In [53]:
eta0=1e-4
hyper_explorer.train_model(lam=1e5, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [54]:
eta0=1e-4
hyper_explorer.train_model(lam=1e5, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [55]:
hyper_explorer.plot_best_fits()
In [56]:
eta0=1e-4
hyper_explorer.train_model(lam=1e6, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [57]:
eta0=1e-4
hyper_explorer.train_model(lam=1e6, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [58]:
eta0=1e-4
hyper_explorer.train_model(lam=1e6, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [59]:
eta0=1e-4
hyper_explorer.train_model(lam=1e7, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [60]:
eta0=1e-4
hyper_explorer.train_model(lam=1e7, eta0=eta0, max_iter=10**2, delta_percent=eta0*10, verbose=True)
In [61]:
eta0=1e-4
hyper_explorer.train_model(lam=1e7, eta0=eta0, max_iter=10**2, delta_percent=eta0, verbose=True)
In [62]:
hyper_explorer.plot_best_fits()
In [63]:
eta0=1e-4
hyper_explorer.train_model(lam=1e8, eta0=eta0, max_iter=3*10**2, delta_percent=eta0, verbose=True)
In [64]:
eta0=1e-4
hyper_explorer.train_model(lam=1e8, eta0=eta0, max_iter=3*10**2, delta_percent=eta0, verbose=False)
In [65]:
eta0=1e-4
hyper_explorer.train_model(lam=1e8, eta0=eta0, max_iter=3*10**2, delta_percent=eta0, verbose=False)
In [66]:
eta0=2e-4
hyper_explorer.train_model(lam=1e8, eta0=eta0, max_iter=3*10**2, delta_percent=eta0, verbose=False)
In [67]:
eta0=2e-4
hyper_explorer.train_model(lam=1e8, eta0=eta0, max_iter=3*10**2, delta_percent=eta0, verbose=False)
In [68]:
eta0=2e-4
hyper_explorer.train_model(lam=1e8, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*10, verbose=False)
In [69]:
hyper_explorer.plot_best_fits()
In [73]:
eta0=1e-4
hyper_explorer.train_model(lam=1e9, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*10, verbose=False)
In [74]:
eta0=1e-4
hyper_explorer.train_model(lam=1e9, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*5, verbose=False)
In [75]:
eta0=1e-4
hyper_explorer.train_model(lam=1e9, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*5, verbose=False)
In [76]:
eta0=1e-4
hyper_explorer.train_model(lam=1e9, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*5, verbose=False)
In [77]:
eta0=1e-4
hyper_explorer.train_model(lam=1e9, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*5, verbose=False)
In [78]:
eta0=1e-4
hyper_explorer.train_model(lam=1e9, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*5, verbose=False)
In [79]:
hyper_explorer.plot_best_fits()
In [80]:
eta0=1e-4
hyper_explorer.train_model(lam=1e10, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*5, verbose=False)
In [81]:
eta0=1e-4
hyper_explorer.train_model(lam=1e10, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*5, verbose=False)
In [82]:
eta0=1e-4
hyper_explorer.train_model(lam=1e10, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*5, verbose=False)
In [83]:
hyper_explorer.plot_best_fits()
In [84]:
eta0=1e-4
hyper_explorer.train_model(lam=1e10, eta0=eta0, max_iter=3*10**2, delta_percent=eta0, verbose=False)
In [85]:
eta0=1e-4
hyper_explorer.train_model(lam=1e11, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*5, verbose=False)
In [86]:
eta0=1e-4
hyper_explorer.train_model(lam=1e11, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*5, verbose=False)
In [87]:
hyper_explorer.plot_best_fits()
In [96]:
eta0=1e-20
hyper_explorer.train_model(lam=1e12, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*5, verbose=False)
In [97]:
eta0=1e-18
hyper_explorer.train_model(lam=1e12, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*5, verbose=False)
In [99]:
eta0=1e-15
hyper_explorer.train_model(lam=1e12, eta0=eta0, max_iter=3*10**2, delta_percent=eta0*5, verbose=False)
In [104]:
pbf = hyper_explorer.plot_best_fits()
For hyper explorer's best model:
In [107]:
hyper_explorer.train_on_whole_training_set(max_iter=1000)
In [101]:
100000000 - 1e8
Out[101]:
In [111]:
final_training_results = hyper_explorer.final_model.results_row()
del final_training_results['weights']
final_training_results
Out[111]:
In [112]:
hyper_explorer.final_model.plot_test_and_train_01_loss_during_fitting()
In [127]:
fig, ax = plt.subplots(1, 1, figsize=(4, 3))
plot_data = hyper_explorer.final_model.results
print(plot_data.columns)
plt.plot(plot_data['iteration'], plot_data['-(log loss)/N, training'], linestyle='--', marker='o', color='c')
plt.plot(plot_data['iteration'], plot_data['-(log loss)/N, testing'], linestyle='--', marker='o', color='g')
plt.legend(loc='best')
plt.xlabel('iteration')
plt.ylabel('-(log loss)/N')
ax.axhline(y=0, color='k')
plt.tight_layout()
fig.savefig("161031_Q-2-1_norm_log_loss_during_fit.pdf")
In [129]:
fig, ax = plt.subplots(1, 1, figsize=(4, 3))
plot_data = hyper_explorer.final_model.results
plt.plot(plot_data['iteration'], plot_data['training (0/1 loss)/N'], linestyle='--', marker='o', color='c')
plt.plot(plot_data['iteration'], plot_data['testing (0/1 loss)/N'], linestyle='--', marker='o', color='g')
plt.legend(loc='best')
plt.xlabel('iteration')
plt.ylabel('(0/1 loss)/N')
ax.axhline(y=0, color='k')
plt.tight_layout()
fig.savefig("161031_Q-2-1_norm_01_loss_during_fit.pdf")
In [124]:
hyper_explorer.final_model.plot_ys(x='iteration', y1='-(log loss)/N, training', y2='-(log loss)/N, testing')
Out[124]:
In [125]:
hyper_explorer.final_model.plot_log_loss_normalized_and_eta()
In [126]:
hyper_explorer.evaluate_test_data()
In [132]:
# For fun, train one model starting at zero.
naive_model = hyper_explorer.final_model.copy()
naive_model.w = naive_model.w.copy() # just to be safe
In [134]:
# `b.base is a`
naive_model.w.base is hyper_explorer.final_model.w
Out[134]:
In [137]:
naive_model.w[:]=0
In [138]:
naive_model.w
Out[138]:
In [139]:
naive_model.lam
Out[139]:
In [143]:
naive_model.max_iter = 1000
In [145]:
naive_model.run()
In [150]:
naive_model.X.shape
Out[150]:
In [149]:
naive_model.results_row()
Out[149]:
In [151]:
fig, ax = plt.subplots(1, 1, figsize=(4, 3))
plot_data = naive_model.results
plt.plot(plot_data['iteration'], plot_data['training (0/1 loss)/N'], linestyle='--', marker='o', color='c')
plt.plot(plot_data['iteration'], plot_data['testing (0/1 loss)/N'], linestyle='--', marker='o', color='g')
plt.legend(loc='best')
plt.xlabel('iteration')
plt.ylabel('(0/1 loss)/N')
ax.axhline(y=0, color='k')
plt.tight_layout()
fig.savefig("161031_Q-2-1_norm_01_loss_during_fit--no_starting_weights.pdf")
In [152]:
fig, ax = plt.subplots(1, 1, figsize=(4, 3))
plot_data = naive_model.results
print(plot_data.columns)
plt.plot(plot_data['iteration'], plot_data['-(log loss)/N, training'], linestyle='--', marker='o', color='c')
plt.plot(plot_data['iteration'], plot_data['-(log loss)/N, testing'], linestyle='--', marker='o', color='g')
plt.legend(loc='best')
plt.xlabel('iteration')
plt.ylabel('-(log loss)/N')
ax.axhline(y=0, color='k')
plt.tight_layout()
fig.savefig("161031_Q-2-1_norm_log_loss_during_fit--no_starting_weights.pdf")
In [ ]: