Transferring Weights

This notebook transfers weights from models with similar architectures based on layer names. A mapping table has to be compiled manually via Excel. The weights are then transferred and the layers are set to trainable = False.

Variables and Imports


In [ ]:
### imports
from IPython.core.debugger import Tracer
#Tracer()()

import os, sys, time

### prevent the dying jupyter notebook
stdout = sys.stdout
#sys.stdout = sys.__stdout__  # did not work to restoure print -> console
#sys.stdout = open('keras_output.txt', 'a+')
#sys.stdout = stdout

import sys, os, argparse, logging  # NOQA
import importlib
from pprint import pprint
from tqdm import tqdm

import twBase
from twBase import *  # NOQA
importlib.reload(twBase)

import twQuoraRun
importlib.reload(twQuoraRun)
#from twQuoraRun import *  # NOQA

#Allow relative imports to directories above cwd/
sys.path.insert(1, os.path.join(sys.path[0], '..'))

%matplotlib inline
np.random.seed(42)

In [ ]:
params = {
    "DATA": {
        "BASE_DIR": "./data",
        "GLOVE_DIR": './data/glove.6B',
        "DATA_FN": './data/train.csv',
        "DATA_TEST_FN": './data/test.csv',
        "H5DATA_FN": "./data/train_emb.40.300.200000.h5",
        "H5DATA_TEST_FN": "./data/test_emb.40.300.200000.h5",
        "embedOnly": True,
        #"H5DATA_FN": "./data/train.40.300.200000.h5",
        #"H5DATA_TEST_FN": "./data/test.40.300.200000.h5",
        #"H5DATA_FN": "./data/train.s.40.300.200000.h5.s",
        #"H5DATA_TEST_FN": "./data/test.s.40.300.200000.h5.s",
        "EMBED_FN": './data/emb.300.200k.npy',
        "isSample": False,
    },
    "SPACY": {
        "MODEL": "en1G",  # en1G
        #"MODEL": "en",  # en1G
    },
    "MODEL": {
        "MAX_SEQUENCE_LENGTH": 40,  # 60
        "MAX_NB_WORDS": 200000,  # 40000, 200000
        "EMBED_DIM": 300,
        "BASIC_FEAT_DIM": 26,
        "SUFFIX": "xx",
        #"SUFFIX": "01",
        "NAME": None,
        #"CLASS": "CosModel",
        #"CLASS": "RNNSimple",
        #"CLASS": "FeatsSimple",
        #"CLASS": "BasicSimple",
        #"CLASS": "EmbedCNN",
        #"CLASS": "CNN1D",
        #"CLASS": "All",
        "CLASS": "AllSmall",
        "LOAD_WEIGHTS": None,
        "MASKING": False,
        "RNN_TOPOLOGY": [100],
        "CNN_TOPOLOGY": [128, 256, 512, 1024],
        "TOPOLOGY": [256, 512, 1024],
        "DROPOUT": [0.2, 0.3, 0.4, 0.5],
        "REGULIZER": [0.01, 0.0],  # k_reg, a_reg: 0.005, order of 10
        "OPTIMIZER": ['Adam', {"lr": 0.001}]
    },
    'SIMILARITY': {
        #'mode': 'cosine',
        'mode': 'euclidean',
        'gamma': 1,
        'c': 1,
        'd': 2,
        'dropout': 0.5,
    },
    "TRAINING": {
        "DEVICE": "/cpu:0",
        "VALIDATION_SPLIT": 0.2,
        "BATCH_SIZE": 2048,
        "EPOCHS": 80,
        "PATIENCE": 25,
        "DECAY_EPOCH": 30,
        "SAVE_BEST": True,
        "isShuffle": False,
        "VERBOSITY": 1,
        "UNFREEZE": True
    },
    "OUTPUT": {
        "BASE_DIR": "./data/out",
        "MODEL_DIR": None,
        "MODEL_PRETRAINED": None,
    }
}

# params generated by jinja
###{{params}}###
P = Struct(params)
#P.DATA.H5DATA_FN = "./data/train.{}.{}.{}.h5".format(P.MODEL.MAX_SEQUENCE_LENGTH, P.MODEL.EMBED_DIM, P.MODEL.MAX_NB_WORDS)  # build name
#P.DATA.H5DATA_TEST_FN = "./data/test.{}.{}.{}.h5".format(P.MODEL.MAX_SEQUENCE_LENGTH, P.MODEL.EMBED_DIM, P.MODEL.MAX_NB_WORDS)  # build name
P.MODEL.NAME = "{}.{}".format(P.MODEL.CLASS, P.MODEL.SUFFIX)  # build name
P.OUTPUT.MODEL_DIR = os.path.join(P.OUTPUT.BASE_DIR,
                                  "{}.{}.{}".format(P.MODEL.MAX_SEQUENCE_LENGTH, P.MODEL.EMBED_DIM, P.MODEL.MAX_NB_WORDS),
                                  P.MODEL.NAME)  # build output dir

Load the Models

The trained models are created and the weights imported.


In [ ]:
import importlib
import twQuoraModel
importlib.reload(twQuoraModel)
#from twQuoraModel import CNN1D
e1 = twQuoraRun.Evaluator(P, model=twQuoraModel.CNN1D)

In [ ]:
import importlib
import twQuoraModel
importlib.reload(twQuoraModel)
#from twQuoraModel import All
e3 = twQuoraRun.Evaluator(P, model=twQuoraModel.RNNSimple)

In [ ]:
import importlib
import twQuoraModel
importlib.reload(twQuoraModel)
#from twQuoraModel import All
e2 = twQuoraRun.Evaluator(P, model=twQuoraModel.AllSmall)

In [ ]:
# load the weights
path = './data/out/40.300.200000/CNN1D.03.200k/_weights_epoch_26.0.364.h5'
e1.model.model.load_weights(path)

In [ ]:
# load the weights
path = './data/out/40.300.200000/RNNSimple.03.200k/_weights_epoch_27.0.342.h5'
e3.model.model.load_weights(path)

In [ ]:
# sanity check
cnn_emb = e1.model.model.layers[2].get_weights()
cnn_emb[0].shape

all_emb = e2.model.model.layers[2].get_weights()
all_emb[0].shape
assert cnn_emb[0].shape == all_emb[0].shape

Create the Mapping and Tranfer the Weights


In [ ]:
# syntactic sugar
Cnn = e1.model.model
All = e2.model.model
Rnn = e3.model.model

In [ ]:
import keras
cnn_layerlist = []
rnn_layerlist = []
for i, (allL, cnnL, rnnL) in enumerate(twBase.outer_zip((All.layers, 'x'), (Cnn.layers, '_'), (Rnn.layers, '.'))):
    if isinstance(allL, keras.engine.Layer):
        name1 = allL.name
    else:
        name1 = allL
    if isinstance(cnnL, keras.engine.Layer):
        name2 = cnnL.name
    else:
        name2 = cnnL
    if isinstance(rnnL, keras.engine.Layer):
        name3 = rnnL.name
    else:
        name3 = rnnL
    print("{:2d}: {:25.25} {:25.25} {!s:25.25}".format(i, name1, name2, name3))
    cnn_layerlist.append((i, name1, name2))
    rnn_layerlist.append((i, name1, name3))
    
df = pd.DataFrame(cnn_layerlist, columns=['ix', 'All', 'Cnn'])
df.to_csv(os.path.join(P.DATA.BASE_DIR, 'weight_transfer_cnn.csv'), index=False, sep=';')

df = pd.DataFrame(rnn_layerlist, columns=['ix', 'All', 'Rnn'])
df.to_csv(os.path.join(P.DATA.BASE_DIR, 'weight_transfer_rnn.csv'), index=False, sep=';')

Update the CSV file in Excel


In [ ]:
# manipulated weight transfer list
df = pd.read_csv(os.path.join(P.DATA.BASE_DIR, 'weight_transfer_cnn.csv'), sep=';')
df

In [ ]:
# Load CNN weights
for i in range(len(df)):
    l1 = All.get_layer(df.ix[i, 'All'])
    l2 = Cnn.get_layer(df.ix[i, 'Cnn'])
    print("Copy weights from {} -> {}".format(l2.name, l1.name))
    l1.set_weights(l2.get_weights())
    l1.trainable = False  # must be set BEVORE compile

In [ ]:
# manipulated weight transfer list
df = pd.read_csv(os.path.join(P.DATA.BASE_DIR, 'weight_transfer_rnn.csv'), sep=';')
df

In [ ]:
# Load RNN weights
for i in range(len(df)):
    l1 = All.get_layer(df.ix[i, 'All'])
    l2 = Rnn.get_layer(df.ix[i, 'Rnn'])
    print("Copy weights from {} -> {}".format(l2.name, l1.name))
    l1.set_weights(l2.get_weights())
    l1.trainable = False  # must be set BEVORE compile

In [ ]:
# compile due to set trainable and save
optimizer = getattr(keras.optimizers, P.MODEL.OPTIMIZER[0])(lr=P.MODEL.OPTIMIZER[1]['lr'])
All.compile(loss='binary_crossentropy',
            optimizer=optimizer,
            metrics=['accuracy'])


All.save(os.path.join(P.OUTPUT.MODEL_DIR, 'AllSmall.preloaded.h5'))

In [ ]:
# Load the model and check trainable
from keras.models import load_model

#del All  # deletes the existing model

# returns a compiled model
# identical to the previous one
model = load_model(os.path.join(P.OUTPUT.MODEL_DIR, 'All.preloaded.h5'))
model.get_layer('bidirectional_4').trainable

In [ ]:
# check layer topology
n = 5
w = e1.model.model.layers[n].get_weights()
len(w), type(w)
w[0].shape, w[1].shape

In [ ]:
# check layer identity after copyting over the weights
n = 5
w1 = e1.model.model.layers[n].get_weights()
w2 = e2.model.model.layers[n].get_weights()

if isinstance(w1, list):
    np.allclose(w1[0], w2[0])
    w1[0].sum(), w2[0].sum()
else:
    np.allclose(w1, w2)
    w1.sum(), w2.sum()

In [ ]:
# Get layer by name
Cnn.get_layer("CNN1d.ConvL4.CNN1d")

In [ ]: