In [ ]:
### imports
from IPython.core.debugger import Tracer
#Tracer()()
import os, sys, time
### prevent the dying jupyter notebook
stdout = sys.stdout
#sys.stdout = sys.__stdout__ # did not work to restoure print -> console
#sys.stdout = open('keras_output.txt', 'a+')
#sys.stdout = stdout
import sys, os, argparse, logging # NOQA
import importlib
from pprint import pprint
from tqdm import tqdm
import twBase
from twBase import * # NOQA
importlib.reload(twBase)
import twQuoraRun
importlib.reload(twQuoraRun)
#from twQuoraRun import * # NOQA
#Allow relative imports to directories above cwd/
sys.path.insert(1, os.path.join(sys.path[0], '..'))
%matplotlib inline
np.random.seed(42)
In [ ]:
params = {
"DATA": {
"BASE_DIR": "./data",
"GLOVE_DIR": './data/glove.6B',
"DATA_FN": './data/train.csv',
"DATA_TEST_FN": './data/test.csv',
"H5DATA_FN": "./data/train_emb.40.300.200000.h5",
"H5DATA_TEST_FN": "./data/test_emb.40.300.200000.h5",
"embedOnly": True,
#"H5DATA_FN": "./data/train.40.300.200000.h5",
#"H5DATA_TEST_FN": "./data/test.40.300.200000.h5",
#"H5DATA_FN": "./data/train.s.40.300.200000.h5.s",
#"H5DATA_TEST_FN": "./data/test.s.40.300.200000.h5.s",
"EMBED_FN": './data/emb.300.200k.npy',
"isSample": False,
},
"SPACY": {
"MODEL": "en1G", # en1G
#"MODEL": "en", # en1G
},
"MODEL": {
"MAX_SEQUENCE_LENGTH": 40, # 60
"MAX_NB_WORDS": 200000, # 40000, 200000
"EMBED_DIM": 300,
"BASIC_FEAT_DIM": 26,
"SUFFIX": "xx",
#"SUFFIX": "01",
"NAME": None,
#"CLASS": "CosModel",
#"CLASS": "RNNSimple",
#"CLASS": "FeatsSimple",
#"CLASS": "BasicSimple",
#"CLASS": "EmbedCNN",
#"CLASS": "CNN1D",
#"CLASS": "All",
"CLASS": "AllSmall",
"LOAD_WEIGHTS": None,
"MASKING": False,
"RNN_TOPOLOGY": [100],
"CNN_TOPOLOGY": [128, 256, 512, 1024],
"TOPOLOGY": [256, 512, 1024],
"DROPOUT": [0.2, 0.3, 0.4, 0.5],
"REGULIZER": [0.01, 0.0], # k_reg, a_reg: 0.005, order of 10
"OPTIMIZER": ['Adam', {"lr": 0.001}]
},
'SIMILARITY': {
#'mode': 'cosine',
'mode': 'euclidean',
'gamma': 1,
'c': 1,
'd': 2,
'dropout': 0.5,
},
"TRAINING": {
"DEVICE": "/cpu:0",
"VALIDATION_SPLIT": 0.2,
"BATCH_SIZE": 2048,
"EPOCHS": 80,
"PATIENCE": 25,
"DECAY_EPOCH": 30,
"SAVE_BEST": True,
"isShuffle": False,
"VERBOSITY": 1,
"UNFREEZE": True
},
"OUTPUT": {
"BASE_DIR": "./data/out",
"MODEL_DIR": None,
"MODEL_PRETRAINED": None,
}
}
# params generated by jinja
###{{params}}###
P = Struct(params)
#P.DATA.H5DATA_FN = "./data/train.{}.{}.{}.h5".format(P.MODEL.MAX_SEQUENCE_LENGTH, P.MODEL.EMBED_DIM, P.MODEL.MAX_NB_WORDS) # build name
#P.DATA.H5DATA_TEST_FN = "./data/test.{}.{}.{}.h5".format(P.MODEL.MAX_SEQUENCE_LENGTH, P.MODEL.EMBED_DIM, P.MODEL.MAX_NB_WORDS) # build name
P.MODEL.NAME = "{}.{}".format(P.MODEL.CLASS, P.MODEL.SUFFIX) # build name
P.OUTPUT.MODEL_DIR = os.path.join(P.OUTPUT.BASE_DIR,
"{}.{}.{}".format(P.MODEL.MAX_SEQUENCE_LENGTH, P.MODEL.EMBED_DIM, P.MODEL.MAX_NB_WORDS),
P.MODEL.NAME) # build output dir
In [ ]:
import importlib
import twQuoraModel
importlib.reload(twQuoraModel)
#from twQuoraModel import CNN1D
e1 = twQuoraRun.Evaluator(P, model=twQuoraModel.CNN1D)
In [ ]:
import importlib
import twQuoraModel
importlib.reload(twQuoraModel)
#from twQuoraModel import All
e3 = twQuoraRun.Evaluator(P, model=twQuoraModel.RNNSimple)
In [ ]:
import importlib
import twQuoraModel
importlib.reload(twQuoraModel)
#from twQuoraModel import All
e2 = twQuoraRun.Evaluator(P, model=twQuoraModel.AllSmall)
In [ ]:
# load the weights
path = './data/out/40.300.200000/CNN1D.03.200k/_weights_epoch_26.0.364.h5'
e1.model.model.load_weights(path)
In [ ]:
# load the weights
path = './data/out/40.300.200000/RNNSimple.03.200k/_weights_epoch_27.0.342.h5'
e3.model.model.load_weights(path)
In [ ]:
# sanity check
cnn_emb = e1.model.model.layers[2].get_weights()
cnn_emb[0].shape
all_emb = e2.model.model.layers[2].get_weights()
all_emb[0].shape
assert cnn_emb[0].shape == all_emb[0].shape
In [ ]:
# syntactic sugar
Cnn = e1.model.model
All = e2.model.model
Rnn = e3.model.model
In [ ]:
import keras
cnn_layerlist = []
rnn_layerlist = []
for i, (allL, cnnL, rnnL) in enumerate(twBase.outer_zip((All.layers, 'x'), (Cnn.layers, '_'), (Rnn.layers, '.'))):
if isinstance(allL, keras.engine.Layer):
name1 = allL.name
else:
name1 = allL
if isinstance(cnnL, keras.engine.Layer):
name2 = cnnL.name
else:
name2 = cnnL
if isinstance(rnnL, keras.engine.Layer):
name3 = rnnL.name
else:
name3 = rnnL
print("{:2d}: {:25.25} {:25.25} {!s:25.25}".format(i, name1, name2, name3))
cnn_layerlist.append((i, name1, name2))
rnn_layerlist.append((i, name1, name3))
df = pd.DataFrame(cnn_layerlist, columns=['ix', 'All', 'Cnn'])
df.to_csv(os.path.join(P.DATA.BASE_DIR, 'weight_transfer_cnn.csv'), index=False, sep=';')
df = pd.DataFrame(rnn_layerlist, columns=['ix', 'All', 'Rnn'])
df.to_csv(os.path.join(P.DATA.BASE_DIR, 'weight_transfer_rnn.csv'), index=False, sep=';')
Update the CSV file in Excel
In [ ]:
# manipulated weight transfer list
df = pd.read_csv(os.path.join(P.DATA.BASE_DIR, 'weight_transfer_cnn.csv'), sep=';')
df
In [ ]:
# Load CNN weights
for i in range(len(df)):
l1 = All.get_layer(df.ix[i, 'All'])
l2 = Cnn.get_layer(df.ix[i, 'Cnn'])
print("Copy weights from {} -> {}".format(l2.name, l1.name))
l1.set_weights(l2.get_weights())
l1.trainable = False # must be set BEVORE compile
In [ ]:
# manipulated weight transfer list
df = pd.read_csv(os.path.join(P.DATA.BASE_DIR, 'weight_transfer_rnn.csv'), sep=';')
df
In [ ]:
# Load RNN weights
for i in range(len(df)):
l1 = All.get_layer(df.ix[i, 'All'])
l2 = Rnn.get_layer(df.ix[i, 'Rnn'])
print("Copy weights from {} -> {}".format(l2.name, l1.name))
l1.set_weights(l2.get_weights())
l1.trainable = False # must be set BEVORE compile
In [ ]:
# compile due to set trainable and save
optimizer = getattr(keras.optimizers, P.MODEL.OPTIMIZER[0])(lr=P.MODEL.OPTIMIZER[1]['lr'])
All.compile(loss='binary_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
All.save(os.path.join(P.OUTPUT.MODEL_DIR, 'AllSmall.preloaded.h5'))
In [ ]:
# Load the model and check trainable
from keras.models import load_model
#del All # deletes the existing model
# returns a compiled model
# identical to the previous one
model = load_model(os.path.join(P.OUTPUT.MODEL_DIR, 'All.preloaded.h5'))
model.get_layer('bidirectional_4').trainable
In [ ]:
# check layer topology
n = 5
w = e1.model.model.layers[n].get_weights()
len(w), type(w)
w[0].shape, w[1].shape
In [ ]:
# check layer identity after copyting over the weights
n = 5
w1 = e1.model.model.layers[n].get_weights()
w2 = e2.model.model.layers[n].get_weights()
if isinstance(w1, list):
np.allclose(w1[0], w2[0])
w1[0].sum(), w2[0].sum()
else:
np.allclose(w1, w2)
w1.sum(), w2.sum()
In [ ]:
# Get layer by name
Cnn.get_layer("CNN1d.ConvL4.CNN1d")
In [ ]: