Исследуем, почему модель падает на этапе сохранения чекпоинта

y = copier(x, memo) File "/usr/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/usr/lib/python2.7/copy.py", line 190, in deepcopy y = _reconstruct(x, rv, 1, memo) File "/usr/lib/python2.7/copy.py", line 334, in _reconstruct state = deepcopy(state, memo) File "/usr/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/usr/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/usr/lib/python2.7/copy.py", line 190, in deepcopy y = _reconstruct(x, rv, 1, memo) File "/usr/lib/python2.7/copy.py", line 334, in _reconstruct state = deepcopy(state, memo) File "/usr/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/usr/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/usr/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/usr/lib/python2.7/copy.py", line 230, in _deepcopy_list y.append(deepcopy(a, memo)) File "/usr/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/usr/lib/python2.7/copy.py", line 237, in _deepcopy_tuple y.append(deepcopy(a, memo)) File "/usr/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/usr/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/usr/lib/python2.7/copy.py", line 190, in deepcopy y = _reconstruct(x, rv, 1, memo) File "/usr/lib/python2.7/copy.py", line 334, in _reconstruct state = deepcopy(state, memo) File "/usr/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/usr/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/usr/lib/python2.7/copy.py", line 174, in deepcopy y = copier(memo) TypeError: cannot deepcopy this pattern object

In [1]:
cd ../..


/home/anatoly/Desktop/squad

In [2]:
from __future__ import print_function
from __future__ import division

In [3]:
import numpy as np
import argparse

In [4]:
import keras
from keras.callbacks import ModelCheckpoint
from models import RNet, FastQA
from data import BatchGen, load_dataset

import sys
sys.setrecursionlimit(100000)

np.random.seed(10)


Using TensorFlow backend.

In [5]:
args = {}
args['model'] = 'fastqa'
args['hdim']  = 300
args['batch_size'] = 64
args['nb_epochs'] = 50
args['optimizer'] = 'Adam'
args['lr'] = 0.001
args['name'] = ''
args['loss'] = 'categorical_crossentropy'
args['dropout'] = 0

args['train_data'] = 'data/train_data.pkl'
args['valid_data'] = 'data/valid_data.pkl'

In [16]:
import numpy as np
from keras import backend as K
import keras
from keras.models import Model
from keras.layers import Input, Dense, RepeatVector, Masking, Dropout, Flatten, Activation, Reshape, Lambda, Permute, merge, multiply, concatenate
from keras.layers.merge import Concatenate
from keras.layers.wrappers import Bidirectional, TimeDistributed
from keras.layers.recurrent import LSTM

In [24]:
class FastQA(Model):
    def __init__(self, inputs=None, outputs=None,
                       N=None, M=None, unroll=False,
                       hdim=300, word2vec_dim=300, dropout_rate=0.2,
                       **kwargs):
        # Load model from config
        if inputs is not None and outputs is not None:
            super(FastQA, self).__init__(inputs=inputs,
                                       outputs=outputs,
                                       **kwargs)
            return
        
        '''Dimensions'''
        B = None
        H = hdim
        W = word2vec_dim

        '''Inputs'''
        P = Input(shape=(N, W), name='P')
        Q = Input(shape=(M, W), name='Q')

        '''Word in question binary'''
        def wiq_feature(P,Q):
            '''
            Binary feature mentioned in the paper.
            For each word in passage returns if that word is present in question.
            '''
            slice = []
            for i in range(N):
                word_sim = K.tf.equal(W, K.tf.reduce_sum(K.tf.cast(K.tf.equal(K.tf.expand_dims(P[:, i, :],1), Q), K.tf.int32), axis=2))
                question_sim = K.tf.equal(M, K.tf.reduce_sum(K.tf.cast(word_sim, K.tf.int32), axis=1))
                slice.append(K.tf.cast(question_sim, K.tf.float32))

            wiqout = K.tf.expand_dims(K.tf.stack(slice, axis=1),2)
            return wiqout

        wiq_p = Lambda(lambda arg: wiq_feature(arg[0],arg[1]))([P,Q])
        wiq_q = Lambda(lambda q: K.tf.ones([K.tf.shape(Q)[0], M, 1], dtype=K.tf.float32))(Q)

        passage_input = P
        question_input = Q
        #passage_input = Lambda(lambda arg: concatenate([arg[0], arg[1]], axis=2))([P, wiq_p])
        #question_input = Lambda(lambda arg: concatenate([arg[0], arg[1]], axis=2))([Q, wiq_q])

        '''Encoding'''
        encoder = Bidirectional(LSTM(units=W,
                           return_sequences=True,
                           dropout=dropout_rate,
                           unroll=unroll))

        passage_encoding = passage_input
        passage_encoding = encoder (passage_encoding)
        passage_encoding = TimeDistributed(
            Dense(W,
                use_bias=False,
                trainable=True,
                weights=np.concatenate((np.eye(W),np.eye(W)), axis=1))) (passage_encoding)

        question_encoding = question_input
        question_encoding = encoder(question_encoding)
        question_encoding = TimeDistributed(
            Dense(W,
                use_bias=False,
                trainable=True,
                weights=np.concatenate((np.eye(W),np.eye(W)), axis=1))) (question_encoding)

        '''Attention over question'''
        # compute the importance of each step
        question_attention_vector = TimeDistributed(Dense(1))(question_encoding)
        question_attention_vector = Lambda(lambda q: keras.activations.softmax(q, axis=1))(question_attention_vector)

        # apply the attention
        question_attention_vector = Lambda(lambda q: q[0]*q[1])([question_encoding, question_attention_vector])
        question_attention_vector = Lambda(lambda q: K.sum(q, axis=1))(question_attention_vector)
        question_attention_vector = RepeatVector(N)(question_attention_vector)

        '''Answer span prediction'''

        # Answer start prediction
        answer_start = Lambda(lambda arg:
                              concatenate([arg[0], arg[1], arg[2]]))([
            passage_encoding,
            question_attention_vector,
            multiply([passage_encoding, question_attention_vector])])

        answer_start = TimeDistributed(Dense(W, activation='relu'))(answer_start)
        answer_start = TimeDistributed(Dense(1))(answer_start)
        answer_start = Flatten()(answer_start)
        answer_start = Activation('softmax')(answer_start)

        # Answer end prediction depends on the start prediction
        def s_answer_feature(x):
            maxind = K.argmax(
                x,
                axis=1,
            )
            return maxind

        x = Lambda(lambda x: K.tf.cast(s_answer_feature(x), dtype=K.tf.int32))(answer_start)
        start_feature = Lambda(lambda arg: K.tf.gather_nd(arg[0], K.tf.stack([K.tf.range(K.tf.shape(arg[1])[0]), K.tf.cast(arg[1], K.tf.int32)], axis=1)))([passage_encoding,x])
        start_feature = RepeatVector(N)(start_feature)

        # Answer end prediction
        answer_end = Lambda(lambda arg: concatenate([
            arg[0],
            arg[1],
            arg[2],
            multiply([arg[0], arg[1]]),
            multiply([arg[0], arg[2]])
        ]))([passage_encoding, question_attention_vector, start_feature])

        answer_end = TimeDistributed(Dense(W, activation='relu'))(answer_end)
        answer_end = TimeDistributed(Dense(1))(answer_end)
        answer_end = Flatten()(answer_end)
        answer_end = Activation('softmax')(answer_end)

        input_placeholders = [P, Q]
        inputs = input_placeholders
        outputs = [answer_start, answer_end]

        super(FastQA, self).__init__(inputs=inputs,
                                   outputs=outputs,
                                   **kwargs)

In [25]:
model = FastQA(hdim=args['hdim'], dropout_rate=args['dropout'], N=300, M=30)

In [26]:
optimizer_config = {'class_name': args['optimizer'],
                    'config': {'lr': args['lr']} if args['lr'] else {}}

In [27]:
model.compile(optimizer=optimizer_config,
              loss=args['loss'],
              metrics=['accuracy'])

In [28]:
model.layers


Out[28]:
[<keras.engine.topology.InputLayer at 0x7f4033ba4f50>,
 <keras.engine.topology.InputLayer at 0x7f4033ba4f90>,
 <keras.layers.wrappers.Bidirectional at 0x7f4027f25990>,
 <keras.layers.wrappers.TimeDistributed at 0x7f4027871510>,
 <keras.layers.wrappers.TimeDistributed at 0x7f4027768050>,
 <keras.layers.core.Lambda at 0x7f40277680d0>,
 <keras.layers.core.Lambda at 0x7f4027768b50>,
 <keras.layers.core.Lambda at 0x7f4027780ed0>,
 <keras.layers.wrappers.TimeDistributed at 0x7f4027b388d0>,
 <keras.layers.core.RepeatVector at 0x7f4027780d90>,
 <keras.layers.merge.Multiply at 0x7f402773e150>,
 <keras.layers.core.Lambda at 0x7f4027793f10>,
 <keras.layers.wrappers.TimeDistributed at 0x7f4027750850>,
 <keras.layers.wrappers.TimeDistributed at 0x7f40276e4050>,
 <keras.layers.core.Flatten at 0x7f4027816710>,
 <keras.layers.core.Activation at 0x7f40276f8b90>,
 <keras.layers.core.Lambda at 0x7f402770c210>,
 <keras.layers.core.Lambda at 0x7f40276b9810>,
 <keras.layers.core.RepeatVector at 0x7f40276b9210>,
 <keras.layers.core.Lambda at 0x7f40276d1a90>,
 <keras.layers.wrappers.TimeDistributed at 0x7f4027691410>,
 <keras.layers.wrappers.TimeDistributed at 0x7f4027625f10>,
 <keras.layers.core.Flatten at 0x7f4027625510>,
 <keras.layers.core.Activation at 0x7f402763a790>]

In [32]:
model.save('my_model.h5')  # creates a HDF5 file 'my_model.h5'

Все работает теперь. Мешала wiq в коде модели.