In [1]:
import numpy as np

from keras.models import Sequential, Model
from keras.layers import Convolution1D, MaxPooling1D, AveragePooling1D, GlobalAveragePooling1D, GlobalMaxPooling1D
from keras.layers import Embedding, LSTM, Input, Merge, Dense, TimeDistributed


Using TensorFlow backend.

In [2]:
max_word_len=3
max_seq_len=5
w_embed_size=6
c_embed_size=3
max_chars=2
max_words=20
c_nb_filters=4
c_filter_length=3

In [3]:
char_cnn_layer=Sequential()
char_cnn_layer.add(Embedding(max_chars, c_embed_size, input_length=max_word_len, name="char_embed"))
char_cnn_layer.add(Convolution1D(c_nb_filters,c_filter_length, activation='relu'))
char_cnn_layer.add(GlobalAveragePooling1D(name="char_based_word_embed"))

In [4]:
char_cnn_layer.output_shape, char_cnn_layer.input_shape


Out[4]:
((None, 4), (None, 3))

In [5]:
char_cnn_layer.summary()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
char_embed (Embedding)           (None, 3, 3)          6           embedding_input_1[0][0]          
____________________________________________________________________________________________________
convolution1d_1 (Convolution1D)  (None, 1, 4)          40          char_embed[0][0]                 
____________________________________________________________________________________________________
char_based_word_embed (GlobalAve (None, 4)             0           convolution1d_1[0][0]            
====================================================================================================
Total params: 46
Trainable params: 46
Non-trainable params: 0
____________________________________________________________________________________________________

In [6]:
char_seq_layer=Sequential()
char_seq_layer.add(TimeDistributed(char_cnn_layer, input_shape=(max_seq_len, max_word_len), name="tdcnn"))
char_seq_layer.summary()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
tdcnn (TimeDistributed)          (None, 5, 4)          46          timedistributed_input_1[0][0]    
====================================================================================================
Total params: 46
Trainable params: 46
Non-trainable params: 0
____________________________________________________________________________________________________

In [7]:
word_seq_layer=Sequential()
word_seq_layer.add(Embedding(max_words, w_embed_size, input_length=max_seq_len, name="word_embed"))
word_seq_layer.summary()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
word_embed (Embedding)           (None, 5, 6)          120         embedding_input_2[0][0]          
====================================================================================================
Total params: 120
Trainable params: 120
Non-trainable params: 0
____________________________________________________________________________________________________

In [8]:
full_seq_layer=Sequential()
full_seq_layer.add(Merge([char_seq_layer, word_seq_layer], mode="concat", name="char_word_embedding"))
full_seq_layer.add(LSTM(10, return_sequences=True, name="shared_lstm"))
full_seq_layer.add(TimeDistributed(Dense(1, activation='sigmoid')))
full_seq_layer.summary()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
tdcnn (TimeDistributed)          (None, 5, 4)          46          timedistributed_input_1[0][0]    
____________________________________________________________________________________________________
word_embed (Embedding)           (None, 5, 6)          120         embedding_input_2[0][0]          
____________________________________________________________________________________________________
shared_lstm (LSTM)               (None, 5, 10)         840         char_word_embedding[0][0]        
____________________________________________________________________________________________________
timedistributed_1 (TimeDistribut (None, 5, 1)          11          shared_lstm[0][0]                
====================================================================================================
Total params: 1,017
Trainable params: 1,017
Non-trainable params: 0
____________________________________________________________________________________________________

In [9]:
full_seq_layer.compile(loss='sparse_categorical_crossentropy', optimizer='sgd')

In [10]:
full_seq_layer.input_shape


Out[10]:
[(None, 5, 3), (None, 5)]

In [11]:
X_words = [
    [1,2,1,4,5],
    [1,2,1,4,5]
          ]
X_chars = [
    [
        [0,1,0],
        [0,1,1],
        [0,1,0],
        [1,1,0],
        [0,1,0]
    ],
    [
        [0,1,1],
        [0,1,1],
        [0,1,0],
        [0,0,0],
        [0,1,0]
    ]
          ]
X_words = np.array(X_words)
X_chars = np.array(X_chars)
X_chars.shape, X_words.shape


Out[11]:
((2, 5, 3), (2, 5))

In [12]:
y = [
    [0, 1, 1, 1, 0],
    [0, 0, 0, 1, 0]
]
y = np.expand_dims(np.array(y), -1)
y.shape


Out[12]:
(2, 5, 1)

In [13]:
full_seq_layer.fit([X_chars, X_words], y)


Epoch 1/10
2/2 [==============================] - 7s - loss: nan
Epoch 2/10
2/2 [==============================] - 0s - loss: nan
Epoch 3/10
2/2 [==============================] - 0s - loss: nan
Epoch 4/10
2/2 [==============================] - 0s - loss: nan
Epoch 5/10
2/2 [==============================] - 0s - loss: nan
Epoch 6/10
2/2 [==============================] - 0s - loss: nan
Epoch 7/10
2/2 [==============================] - 0s - loss: nan
Epoch 8/10
2/2 [==============================] - 0s - loss: nan
Epoch 9/10
2/2 [==============================] - 0s - loss: nan
Epoch 10/10
2/2 [==============================] - 0s - loss: nan
Out[13]:
<keras.callbacks.History at 0x7f6641411da0>

In [ ]: