Transferring Weights

This notebook transfers weights from models with similar architectures based on layer names. A mapping table has to be compiled manually via Excel. The weights are then transferred and the layers are set to trainable = False.

Variables and Imports


In [ ]:
### imports
from IPython.core.debugger import Tracer
#Tracer()()

import os, sys, time

### prevent the dying jupyter notebook
stdout = sys.stdout
#sys.stdout = sys.__stdout__  # did not work to restoure print -> console
#sys.stdout = open('keras_output.txt', 'a+')
#sys.stdout = stdout

import sys, os, argparse, logging  # NOQA
import importlib
from pprint import pprint
from tqdm import tqdm

import twBase
from twBase import *  # NOQA
importlib.reload(twBase)

#Allow relative imports to directories above cwd/
sys.path.insert(1, os.path.join(sys.path[0], '..'))

%matplotlib inline
np.random.seed(42)

In [ ]:
import twQuoraRun
importlib.reload(twQuoraRun)
#from twQuoraRun import *  # NOQA

args = twQuoraRun.process_command_line(["train"])
args

P = twQuoraRun.get_parameters(args)

Load the Models

The trained models are created and the weights imported.


In [ ]:
import importlib
import twQuoraModel
importlib.reload(twQuoraModel)
#from twQuoraModel import CNN1D
#e1 = twQuoraRun.Evaluator(P, model=twQuoraModel.CNN1D)
sourceModel = twQuoraRun.Evaluator(P, model=twQuoraModel.RNNCos)

In [ ]:
import importlib
import twQuoraModel
importlib.reload(twQuoraModel)
#from twQuoraModel import All
targetModel = twQuoraRun.Evaluator(P, model=twQuoraModel.RNNCosFix)

In [ ]:
# load the weights
path = './data/out/40.300.200000/RNNCos.01.cl2/_weights_epoch_58.0.158.h5'
sourceModel.model.model.load_weights(path)

In [ ]:
# sanity check
source_emb = sourceModel.model.model.layers[2].get_weights()
source_emb[0].shape

target_emb = targetModel.model.model.layers[2].get_weights()
target_emb[0].shape
assert source_emb[0].shape == target_emb[0].shape

Create the Mapping and Tranfer the Weights


In [ ]:
# syntactic sugar
sModel = sourceModel.model.model
tModel = targetModel.model.model

In [ ]:
import keras
source_layerlist = []
target_layerlist = []
for i, (tL, sL) in enumerate(twBase.outer_zip((tModel.layers, 'x'), (sModel.layers, '_'))):
    if isinstance(tL, keras.engine.Layer):
        name1 = tL.name
    else:
        name1 = tL
    if isinstance(sL, keras.engine.Layer):
        name2 = sL.name
    else:
        name2 = sL
    print("{:2d}: {:25.25} {!s:25.25}".format(i, name1, name2))
    source_layerlist.append((i, name1, name2))
    
df = pd.DataFrame(source_layerlist, columns=['ix', 'target', 'source'])
df.to_csv(os.path.join(P.DATA.BASE_DIR, 'weight_transfer_source.csv'), index=False, sep=';')

Update the CSV file in Excel


In [ ]:
# manipulated weight transfer list
df = pd.read_csv(os.path.join(P.DATA.BASE_DIR, 'weight_transfer_source.csv'), sep=';')
df

In [ ]:
# Load Source weights into Target
for i in range(len(df)):
    l1 = tModel.get_layer(df.ix[i, 'target'])
    l2 = sModel.get_layer(df.ix[i, 'source'])
    print("Copy weights from {} -> {}".format(l2.name, l1.name))
    l1.set_weights(l2.get_weights())
    l1.trainable = False  # must be set BEVORE compile

In [ ]:
import keras
# compile due to set trainable and save
optimizer = getattr(keras.optimizers, P.MODEL.OPTIMIZER[0])(lr=P.MODEL.OPTIMIZER[1]['lr'])
tModel.compile(loss='binary_crossentropy',
            optimizer=optimizer,
            metrics=['accuracy'])


tModel.save(os.path.join(P.OUTPUT.MODEL_DIR, 'targetModel.preloaded.h5'))

### in case of Lamda Layer: serialization problem
#tModel.save_weights(os.path.join(P.OUTPUT.MODEL_DIR, 'targetModel.preloaded_weights.h5'))

In [ ]:
# Load the model and check trainable
from keras.models import load_model

#del All  # deletes the existing model

# returns a compiled model
# identical to the previous one
model = load_model(os.path.join(P.OUTPUT.MODEL_DIR, 'targetModel.preloaded.h5'))
model.get_layer('bidirectional_8').trainable

In [ ]:
# check layer topology
n = 5
w = e1.model.model.layers[n].get_weights()
len(w), type(w)
w[0].shape, w[1].shape

In [ ]:
# check layer identity after copyting over the weights
n = 5
w1 = e1.model.model.layers[n].get_weights()
w2 = e2.model.model.layers[n].get_weights()

if isinstance(w1, list):
    np.allclose(w1[0], w2[0])
    w1[0].sum(), w2[0].sum()
else:
    np.allclose(w1, w2)
    w1.sum(), w2.sum()

In [ ]:
# Get layer by name
Cnn.get_layer("CNN1d.ConvL4.CNN1d")

In [ ]: