In [ ]:
import matplotlib as mpl
mpl.use('Agg')
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
import pprint
import seaborn as sns
import sys
In [ ]:
sys.path.append('../code')
sys.path.append('../../HW3/code/') # for PCA
In [ ]:
from NeuralNet import NeuralNet, make_dir
from TransferFunctions import TanhTF, LinearTF
from neural_net_helpers import mnist_training, mnist_testing
In [ ]:
pca_training = pickle.load(file=open('../data/PCA_training_data_uncentered.pickle', "rb"))
In [ ]:
X_train_untransfored, y_train = mnist_training()
X_test_untransfored, y_test = mnist_testing()
In [ ]:
X_train_PCA50 = np.load("../data/X_transformed_by_50_components_uncentered.npy").T
X_test_PCA50 = np.load("../data/X_test_transformed_by_50_components_uncentered.npy").T
In [ ]:
# columns are data points and rows are features
d, N = np.shape(X_train_PCA50)
C = np.unique(y_train).shape[0]
In [ ]:
X_train_PCA50.shape
In [ ]:
X_train_PCA50.shape
In [ ]:
from hyper_explorer import HyperparameterExplorer
In [ ]:
h = HyperparameterExplorer(X=X_train_PCA50, y=y_train,
hiddenTF=TanhTF, outputTF=TanhTF,
learning_rate=1e-3, epochs=3, minibatch_size=10,
test_X=X_test_PCA50, test_y=y_test)
In [ ]:
vals = [10**8, 10**7]
In [ ]:
import itertools
In [ ]:
for t in list(itertools.product(vals, vals)):
print(t)
In [ ]:
h.test_combo_of_tuples(vals)
In [ ]:
h.summary
In [ ]:
h.plot_tests_of_W1_init_scaling()
In [ ]:
assert False
In [ ]:
np.linalg.norm(np.array([[0, 1, 2], [3, 4, 5.]]), axis=0)
In [ ]:
np.mean(np.linalg.norm(np.array([[0, 1, 2], [3, 4, 5.]]), axis=0))
In [ ]:
np.linalg.norm(X_train_PCA50, axis=0)[0:5]
In [ ]:
avg_X_norm = np.mean(np.linalg.norm(X_train_PCA50, axis=0))
avg_X_norm
In [ ]:
n = NeuralNet(X=X_train_PCA50, y=y_train,
hidden_nodes=500,
hiddenTF=TanhTF,
hiddenTF_kwargs={'W1_init_strategy':'Xavier',
"scale_W1":1e8}, #, "scale_W1":1/avg_X_norm},
outputTF=LinearTF,
outputTF_kwargs={"scale_W2":1e8},
minibatch_size=10,
eta0 = 0.0005, #1.0e-3 , #
X_test = X_test_PCA50, y_test = y_test,
PCA = pca_training,
convergence_delta = None, # gets set to eta0/10
verbose=False)
In [ ]:
print(np.round(n.W1.sum(axis=1)[0:20], 1))
In [ ]:
n.outputTF.scale_W2
In [ ]:
pprint.pprint(n.W1[0:5, 0:3])
In [ ]:
pprint.pprint(n.W2[0:5, 0:3])
In [ ]:
print("N: {}".format(n.N))
print("d: {}".format(n.d))
print("C: {}".format(n.C))
print("X shape: {}".format(n.X.shape))
print("y shape: {}".format(n.y.shape))
print("Y shape: {}".format(n.Y.shape))
In [ ]:
n.run(epochs=10)
In [ ]:
n.C
In [ ]:
n.results.tail()
In [ ]:
sl = n.plot_square_loss(logy=False)
In [ ]:
! ls ../figures/Q-2-1-tanh-linear/
In [ ]:
sl.savefig(filename='../figures/Q-2-1-tanh-linear/161211_tanh_linear_square_loss.pdf')
In [ ]:
sl.axes[0].set_ylim([0, 1])
sl
In [ ]:
l01 = n.plot_01_loss()
l01.savefig(filename='../figures/Q-2-1-tanh-linear/161211_tanh_linear_01_loss.pdf')
In [ ]:
#p = n.plot_weight_evolution()
In [ ]:
n.W1_tracking.tail(1)
In [ ]:
p1 = n.plot_sum_of_weights('W1')
p2 = n.plot_sum_of_weights('W2')
In [ ]:
p1 = n.plot_norm_of_gradient('W1', logy=True)
p2 = n.plot_norm_of_gradient('W2', logy=True)
In [ ]:
n.W1_dot_prod_checking.head(2)
In [ ]:
n.W2_dot_prod_checking.head(2)
In [ ]:
d = n.sample_dot_prods()
d.keys()
for key, item in d.items():
print('key: {}'.format(key))
print("{}".format(np.round(item, decimals=1)))
In [ ]:
print(np.round(n.W1.sum(axis=1)[0:20], 1))
In [ ]:
p = n.plot_sample_dot_prods()
In [ ]:
n.results.tail(1).T
In [ ]:
n.display_hidden_node_as_image(n.W1[0,:])
In [ ]:
n.W1[1,:].shape
In [ ]:
n.display_hidden_node_as_image(n.W1[1,:])
In [ ]:
n.display_hidden_node_as_image(n.W1[10,:])
In [ ]:
image_dir = '../figures/Q-2-1-tanh-linear'
make_dir(image_dir)
In [ ]:
n.visualize_10_W1_weights(image_dir)
In [ ]:
In [ ]: