In [384]:
from __future__ import print_function
from keras.models import Sequential
from keras.engine.training import slice_X
from keras.layers import Lambda, Flatten, Permute
from keras.layers import merge, Merge, Activation, TimeDistributed, Dense, RepeatVector, recurrent
import numpy as np
from six.moves import range
import keras.backend as K
from sklearn.metrics import accuracy_score
In [385]:
class CharacterTable(object):
'''
Given a set of characters:
+ Encode them to a one hot integer representation
+ Decode the one hot integer representation to their character output
+ Decode a vector of probabilities to their character output
'''
def __init__(self, chars, maxlen):
self.chars = sorted(set(chars))
self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
self.maxlen = maxlen
def encode(self, C, maxlen=None):
maxlen = maxlen if maxlen else self.maxlen
X = np.zeros((maxlen, len(self.chars)))
for i, c in enumerate(C):
X[i, self.char_indices[c]] = 1
return X
def decode(self, X, calc_argmax=True):
if calc_argmax:
X = X.argmax(axis=-1)
return ''.join(self.indices_char[x] for x in X)
In [386]:
class colors:
ok = '\033[92m'
fail = '\033[91m'
close = '\033[0m'
In [387]:
# Parameters for the model and dataset
TRAINING_SIZE = 5000
DIGITS = 2
OPS = 2
INVERT = True
# Try replacing GRU, or SimpleRNN
RNN = recurrent.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1
MAXLEN = OPS * DIGITS + OPS - 1
In [388]:
chars = '0123456789+ '
ctable = CharacterTable(chars, MAXLEN)
In [389]:
def generate_data(training_size, num_digits, num_ops):
MAXLEN = num_ops * num_digits + num_ops - 1
questions = []
expected = []
seen = set()
print('Generating data... ')
while len(questions) < training_size:
f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, num_digits + 1))))
ops = []
for i in range(num_ops):
ops.append(f())
# Skip any addition questions we've already seen
# Also skip any such that X+Y == Y+X (hence the sorting)
ops.sort()
key = tuple(ops)
if key in seen:
continue
seen.add(key)
# Pad the data with spaces such that it is always MAXLEN
q = '+'.join([str(op) for op in ops])
query = q + ' ' * (MAXLEN - len(q))
ans = str(sum(ops))
# Answers can be of maximum size DIGITS + 1
ans += ' ' * (num_digits + 1 - len(ans))
if INVERT:
query = query[::-1]
ans = ans[::-1]
questions.append(query)
expected.append(ans)
print('Total addition questions:', len(questions))
return questions, expected
In [390]:
def create_train_valid(questions, expected, num_digits, num_ops):
MAXLEN = num_ops * num_digits + num_ops - 1
print('Vectorization...')
X = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), num_digits + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
X[i] = ctable.encode(sentence, maxlen=MAXLEN)
for i, sentence in enumerate(expected):
y[i] = ctable.encode(sentence, maxlen=num_digits + 1)
# Shuffle (X, y) in unison as the later parts of X will almost all be larger digits
indices = np.arange(len(y))
np.random.shuffle(indices)
X = X[indices]
y = y[indices]
# Explicitly set apart 10% for validation data that we never train over
split_at = len(X) - len(X) / 10
(X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at))
(y_train, y_val) = (y[:split_at], y[split_at:])
print(X_train.shape)
print(y_train.shape)
return X_train, y_train, X_val, y_val
In [8]:
def build_model(hidden_size, num_layers, num_digits, num_ops):
MAXLEN = num_ops * num_digits + num_ops - 1
# Most simple seq2seq model using encoder-decoder framework
print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
# note: in a situation where your input sequences have a variable length,
# use input_shape=(None, nb_feature).
encoder = RNN(hidden_size, input_shape=(MAXLEN, len(chars)))
model.add(encoder)
# For the decoder's input, we repeat the encoded input for each time step
model.add(RepeatVector(num_digits + 1))
# The decoder RNN could be multiple layers stacked or a single layer
for _ in range(num_layers):
decoder = RNN(hidden_size, return_sequences=True)
model.add(decoder)
# For each of step of the output sequence, decide which character should be chosen
mapper = TimeDistributed(Dense(len(chars)))
model.add(mapper)
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
inputs = [K.learning_phase()] + model.inputs
encoder_f = K.function(inputs, [encoder.output])
decoder_f = K.function(inputs, [decoder.output])
mapper_f = K.function(inputs, [mapper.output])
return model, encoder_f, decoder_f, mapper_f, encoder, decoder, mapper
In [391]:
import theano.tensor as T
def get_Y(X, xmaxlen):
return X[:, :xmaxlen, :] # get first xmaxlen elem from time dim
def get_R(X):
Y, alpha = X[0], X[1]
ans = K.T.batched_dot(Y, alpha)
return ans
def stack_R(X):
r_1, r_2, r_3 = X[0], X[1], X[2]
ans = K.concatenate([r_1, r_2, r_3], axis=1)
return ans
def build_att_seq2seq_model(hidden_size, num_layers, num_digits, num_ops, chars):
SEQ_LEN = num_ops * num_digits + num_ops - 1
main_input = Input(shape=(SEQ_LEN,len(chars)), name='main_input')
encoder = RNN(hidden_size,
input_shape=(SEQ_LEN, len(chars)),
return_sequences=True)(main_input)
Y = Lambda(get_Y, arguments={"xmaxlen": SEQ_LEN}, name="Y", output_shape=(SEQ_LEN, hidden_size))(encoder)
WY1 = TimeDistributed(Dense(hidden_size), name="WY1")(Y)
# WY2 = TimeDistributed(Dense(hidden_size), name="WY2")(Y)
# WY3 = TimeDistributed(Dense(hidden_size), name="WY3")(Y)
M1 = Activation('tanh', name="M1")(WY1)
# M2 = Activation('tanh', name="M2")(WY2)
# M3 = Activation('tanh', name="M3")(WY3)
alpha_1 = TimeDistributed(Dense(1, activation='linear'), name="alpha_1")(M1)
alpha_2 = TimeDistributed(Dense(1, activation='linear'), name="alpha_2")(M1)
alpha_3 = TimeDistributed(Dense(1, activation='linear'), name="alpha_3")(M1)
flat_alpha1 = Flatten(name="flat_alpha1")(alpha_1)
flat_alpha2 = Flatten(name="flat_alpha2")(alpha_2)
flat_alpha3 = Flatten(name="flat_alpha3")(alpha_3)
alpha1 = Dense(SEQ_LEN, activation='softmax', name="alpha1")(flat_alpha1)
alpha2 = Dense(SEQ_LEN, activation='softmax', name="alpha2")(flat_alpha2)
alpha3 = Dense(SEQ_LEN, activation='softmax', name="alpha3")(flat_alpha3)
Y_trans = Permute((2, 1), name="y_trans")(Y) # of shape (None,300,20)
r_1 = merge([Y_trans, alpha1], output_shape=(hidden_size, 1), name="r_1", mode=get_R)
r_2 = merge([Y_trans, alpha2], output_shape=(hidden_size, 1), name="r_2", mode=get_R)
r_3 = merge([Y_trans, alpha3], output_shape=(hidden_size, 1), name="r_3", mode=get_R)
r1 = Reshape((1,hidden_size))(r_1)
r2 = Reshape((1,hidden_size))(r_2)
r3 = Reshape((1,hidden_size))(r_3)
# r_1_trans = Permute((2, 1))(r1)
# r_2_trans = Permute((2, 1))(r2)
# r_3_trans = Permute((2, 1))(r3)
# r = RepeatVector(num_digits+1)(r_1)
# r = T.stack([r_1, r_2, r_3])
r = merge([r1, r2, r3], mode=stack_R, output_shape=(3, hidden_size))
# decoder_input = Permute((2, 1))(r)
decoded_result = RNN(hidden_size, input_shape=(num_digits+1, hidden_size), return_sequences=True)(r)
mapping = TimeDistributed(Dense(len(chars)))(decoded_result)
out = Activation('softmax')(mapping)
output = out
model = Model(input=[main_input], output=output)
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
return model
In [261]:
from keras.layers import Reshape
def build_seq_model(hidden_size, num_layers, num_digits, num_ops):
MAXLEN = num_ops * num_digits + num_ops - 1
# Most simple seq2seq model using encoder-decoder framework
print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE
# note: in a situation where your input sequences have a variable length,
# use input_shape=(None, nb_feature).
encoder = RNN(hidden_size*(num_digits+1), input_shape=(MAXLEN, len(chars)))
model.add(encoder)
# For the decoder's input, we repeat the encoded input for each time step
model.add(Reshape((num_digits+1, hidden_size)))
# The decoder RNN could be multiple layers stacked or a single layer
for _ in range(num_layers):
decoder = RNN(hidden_size, return_sequences=True)
model.add(decoder)
# For each of step of the output sequence, decide which character should be chosen
mapper = TimeDistributed(Dense(len(chars)))
model.add(mapper)
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
inputs = [K.learning_phase()] + model.inputs
encoder_f = K.function(inputs, [encoder.output])
decoder_f = K.function(inputs, [decoder.output])
mapper_f = K.function(inputs, [mapper.output])
return model, encoder_f, decoder_f, mapper_f, encoder, decoder, mapper
In [10]:
from keras.layers import Input, merge, TimeDistributedDense
from keras.models import Model
def feedback_model(hidden_size, num_layers, num_digits, num_ops):
MAXLEN = num_ops * num_digits + num_ops - 1
## Alternative model architecture
encoder_input = Input(shape=(MAXLEN, len(chars)), name = 'encoder_input')
decoder_input = Input(shape=(num_digits + 1, len(chars)), name = 'decoder_input')
x = RNN(hidden_size)(encoder_input)
context_input = RepeatVector(num_digits + 1)(x)
x = merge([context_input, decoder_input], mode='concat')
x = RNN(hidden_size, return_sequences=True)(x)
# loss = TimeDistributed(Dense(len(chars), activation='softmax'))(x)
loss = TimeDistributedDense(len(chars), activation='softmax')(x)
model_alt = Model(input=[encoder_input, decoder_input], output=[loss])
model_alt.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
return model_alt
In [392]:
def learning(model, X_train, y_train, iterations, X_val, y_val):
y_true = []
for idx in range(y_val.shape[0]):
y_true.append(ctable.decode(y_val[idx]))
val_acc_array = []
# Train the model each generation and show predictions against the validation dataset
for iteration in range(1, iterations):
print()
print('-' * 50)
print('Iteration', iteration)
training_obj = model.fit(X_train, y_train, batch_size=BATCH_SIZE, nb_epoch=1,
validation_data=(X_val, y_val))
###
# Select 10 samples from the validation set at random so we can visualize errors
# for i in range(10):
# ind = np.random.randint(0, len(X_val))
# rowX, rowy = X_val[np.array([ind])], y_val[np.array([ind])]
# preds = model.predict_classes(rowX, verbose=0)
# q = ctable.decode(rowX[0])
# correct = ctable.decode(rowy[0])
# guess = ctable.decode(preds[0], calc_argmax=False)
# print('Q', q[::-1] if INVERT else q)
# print('T', correct)
# print(colors.ok + '☑' + colors.close if correct == guess else colors.fail + '☒' + colors.close, guess)
# print('---')
# preds = model.predict_classes(X_val, verbose=0)
# y_preds = []
# for idx in range(preds.shape[0]):
# y_preds.append(ctable.decode(preds[idx], calc_argmax=False))
# acc = accuracy_score(y_true, y_preds)
acc = training_obj.history['val_acc']
val_acc_array.append(acc)
print('Current validation accuracy = ' + str(acc))
return val_acc_array
In [393]:
DIGITS = 2
OPS = 2
TRAINING_SIZE = 5000
questions, expected = generate_data(TRAINING_SIZE, DIGITS, OPS)
X_train, y_train, X_val, y_val = create_train_valid(questions, expected, DIGITS, OPS)
# model, encoder_f_, decoder_f_, mapper_f_, encoder, decoder, mapper = build_att_seq2seq_model(HIDDEN_SIZE, LAYERS, DIGITS, OPS, chars)
# val_acc_2_2 = learning(model, X_train, y_train, 50, X_val, y_val)
model = build_att_seq2seq_model(HIDDEN_SIZE, LAYERS, DIGITS, OPS, chars)
val_acc_2_2 = learning(model, X_train, y_train, 50, X_val, y_val)
# y_train_pre = np.zeros(y_train.shape)
# for idx in range(len(y_train_pre)):
# y_train_pre[idx][1:] = y_train[idx][:-1]
# y_val_pre = np.zeros(y_val.shape)
# for idx in range(len(y_val_pre)):
# y_val_pre[idx][1:] = y_val[idx][:-1]
# fb_model = feedback_model(HIDDEN_SIZE, LAYERS, DIGITS, OPS)
# fb_val_acc_2_2 = learning(fb_model, [X_train, y_train_pre], y_train, 50, [X_val, y_val_pre], y_val)
In [300]:
len(model.layers[3].get_weights()[1])
Out[300]:
In [11]:
DIGITS = 3
OPS = 3
TRAINING_SIZE = 50000
questions, expected = generate_data(TRAINING_SIZE, DIGITS, OPS)
X_train, y_train, X_val, y_val = create_train_valid(questions, expected, DIGITS, OPS)
model = build_model(HIDDEN_SIZE, LAYERS, DIGITS, OPS)
val_acc_3_3 = learning(model, X_train, y_train, 50, X_val, y_val)
y_train_pre = np.zeros(y_train.shape)
for idx in range(len(y_train_pre)):
y_train_pre[idx][1:] = y_train[idx][:-1]
y_val_pre = np.zeros(y_val.shape)
for idx in range(len(y_val_pre)):
y_val_pre[idx][1:] = y_val[idx][:-1]
fb_model = feedback_model(HIDDEN_SIZE, LAYERS, DIGITS, OPS)
fb_val_acc_3_3 = learning(fb_model, [X_train, y_train_pre], y_train, 50, [X_val, y_val_pre], y_val)
In [42]:
X_train, y_train, X_val, y_val = create_train_valid(questions, expected, 3, 2)
In [43]:
model = build_model(HIDDEN_SIZE, LAYERS, 3, 2)
In [44]:
val_acc_3_2 = learning(model, X_train, y_train, 50, X_val, y_val)
In [46]:
questions, expected = generate_data(TRAINING_SIZE, 2, 3)
X_train, y_train, X_val, y_val = create_train_valid(questions, expected, 2, 3)
model = build_model(HIDDEN_SIZE, LAYERS, 2, 3)
val_acc_2_3 = learning(model, X_train, y_train, 50, X_val, y_val)
In [48]:
questions, expected = generate_data(TRAINING_SIZE, 3, 3)
X_train, y_train, X_val, y_val = create_train_valid(questions, expected, 3, 3)
model = build_model(HIDDEN_SIZE, LAYERS, 3, 3)
val_acc_3_3 = learning(model, X_train, y_train, 50, X_val, y_val)
In [50]:
questions, expected = generate_data(TRAINING_SIZE, 3, 4)
X_train, y_train, X_val, y_val = create_train_valid(questions, expected, 3, 4)
model = build_model(HIDDEN_SIZE, LAYERS, 3, 4)
val_acc_3_4 = learning(model, X_train, y_train, 50, X_val, y_val)
In [13]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure()
# plt.plot(fb_val_acc_2_2, 'y')
# plt.plot(val_acc_2_2)
# plt.plot(val_acc_3_2, 'r')
# plt.plot(val_acc_2_3, 'g')
plt.plot(val_acc_3_3, 'k')
plt.plot(fb_val_acc_3_3, 'g')
# plt.plot(val_acc_3_4, 'c')
# plt.legend(['2 digits 2 ops', '3 digits 2 ops', '2 digits 3 ops', '3 digits 3 ops', '3 digits 4 ops'])
Out[13]:
In [70]:
# Analysis of the learned encoder-decoder model
In [13]:
def mapper_f(X):
# The [0] is to disable the training phase flag
return mapper_f_([0] + [X])
def decoder_f(X):
# The [0] is to disable the training phase flag
return decoder_f_([0] + [X])
def encoder_f(X):
# The [0] is to disable the training phase flag
return encoder_f_([0] + [X])
In [404]:
X_str = '81+27'
X_str = X_str[::-1]
print(X_str)
X = ctable.encode(X_str, maxlen=MAXLEN).reshape([1,5,12])
# preds = model.predict_classes(X, verbose=0)
preds = model.predict(X, verbose=0)
# answer = ctable.decode(preds[0], calc_argmax=False)
# print(answer[::-1])
# X = ctable.encode(X_str).reshape([1,5,12])
# W, b = mapper.get_weights()
# mapper_f(X)
# H = decoder_f(X)
X = preds[0].argmax(axis=-1)
''.join(ctable.indices_char[x] for x in X)# ctable.indices_char[x]
Out[404]:
In [69]:
type(np.array(K.eval(model.layers[0].W_i)))
a = np.tanh
a(np.array([0.1, 0.2]))
model.layers[0].output_dim
Out[69]:
In [205]:
class LSTMVisualizer:
# LSTM weights
def __sigmoid__(self, x):
return (1 / (1 + np.exp(-x)))
def __init__(self, LSTM):
# input gate
self.W_i = np.array(K.eval(LSTM.W_i))
self.U_i = np.array(K.eval(LSTM.U_i))
self.b_i = np.transpose(np.array(K.eval(LSTM.b_i)))
# forget gate
self.W_f = np.array(K.eval(LSTM.W_f))
self.U_f = np.array(K.eval(LSTM.U_f))
self.b_f = np.transpose(np.array(K.eval(LSTM.b_f)))
# cell
self.W_c = np.array(K.eval(LSTM.W_c))
self.U_c = np.array(K.eval(LSTM.U_c))
self.b_c = np.transpose(np.array(K.eval(LSTM.b_c)))
# output gate
self.W_o = np.array(K.eval(LSTM.W_o))
self.U_o = np.array(K.eval(LSTM.U_o))
self.b_o = np.transpose(np.array(K.eval(LSTM.b_o)))
# activation functions
self.activation = np.tanh # LSTM.activation
self.inner_activation = self.__sigmoid__ # LSTM.inner_activation
self.dim = LSTM.output_dim
def compute(self, X):
# Initialize h
H = [np.zeros((self.dim,))]
C = [np.zeros((self.dim,))]
I = []
F = []
O = []
for x in X:
# Get previous hidden state
print('x shape=', x.shape)
h_tml = H[-1]
print(h_tml.shape)
c_tml = C[-1]
x_i = np.inner(np.transpose(self.W_i), x) + self.b_i
print('x_i.shape=', x_i.shape)
x_f = np.inner(np.transpose(self.W_f), x) + self.b_f
x_c = np.inner(np.transpose(self.W_c), x) + self.b_c
x_o = np.inner(np.transpose(self.W_o), x) + self.b_o
i = self.inner_activation(x_i + np.inner(np.transpose(self.U_i), H[-1]))
print('i.shape=', i.shape)
f = self.inner_activation(x_f + np.inner(np.transpose(self.U_f), H[-1]))
print(f.shape)
c_new = self.activation(x_c + np.inner(np.transpose(self.U_c), H[-1]))
c = np.multiply(f, c_tml) + np.multiply(i, c_new)
print(c.shape)
o = self.inner_activation(x_o + np.inner(np.transpose(self.U_o), H[-1]))
print(o.shape)
h = np.multiply(o, self.activation(c))
print(h.shape)
I.append(i)
F.append(f)
C.append(c)
O.append(o)
H.append(h)
return I, F, C, O, H
print(W[:,4]) np.argmax(W[:,4]), np.max(W[:,4])
In [206]:
X1_str = '95+43'
X1_str = X1_str[::-1]
print(X1_str)
X1 = ctable.encode(X1_str, maxlen=MAXLEN).reshape([1,5,12])
preds1 = model.predict_classes(X1, verbose=0)
answer1 = ctable.decode(preds1[0], calc_argmax=False)
print(answer1[::-1])
X2_str = '65+73'
X2_str = X2_str[::-1]
print(X2_str)
X2 = ctable.encode(X2_str, maxlen=MAXLEN).reshape([1,5,12])
preds2 = model.predict_classes(X2, verbose=0)
answer2 = ctable.decode(preds2[0], calc_argmax=False)
print(answer2[::-1])
In [233]:
viz = LSTMVisualizer(model.layers[0])
np.transpose(viz.W_i).shape
I1, F1, C1, O1, H1 = viz.compute(X1[0])
I2, F2, C2, O2, H2 = viz.compute(X2[0])
In [237]:
H11 = encoder_f(X1)
H22 = encoder_f(X2)
In [238]:
H11[0][0][:10]-H22[0][0][:10]
Out[238]:
In [239]:
H1[5][:10]-H2[5][:10]
Out[239]:
In [243]:
H11[0][0][1], H22[0][0][1]
Out[243]:
In [242]:
decoder_f = K.function(decoder.inputs, [decoder.output])
In [173]:
H = decoder_f(X)
H[0].shape
np.argmax(H[0][0,0,:]), np.max(H[0][0,0,:])
Out[173]:
In [272]:
print(chars)
print([ np.inner(H[0][0,0,:], W[:,idx]) for idx in range(12) ])
preds = model.predict_classes(X, verbose=0)
ctable.decode(preds[0], calc_argmax=False)
Out[272]:
In [99]:
H[0][0,0,:]
Out[99]:
In [100]:
H[0][0,:2,:]
Out[100]:
In [101]:
import matplotlib.pyplot as plt
import pylab as pl
import matplotlib.cm as cm
%matplotlib inline
pl.imshow(np.transpose(H[0][0,:3,:]), interpolation='nearest', cmap=cm.binary, aspect='auto')
plt.show()
In [102]:
pl.imshow(W, interpolation='nearest', cmap=cm.binary, aspect='auto')
plt.show()
In [103]:
(np.argmax(H[0][0,0,:] * W[:,4]), np.max(H[0][0,0,:] * W[:,4]))
Out[103]:
In [104]:
(np.argmax(H[0][0,1,:] * W[:,4]), np.max(H[0][0,1,:] * W[:,4]))
Out[104]:
In [105]:
H[0][0,:3,34]
Out[105]:
In [106]:
W[34,:]
Out[106]:
In [107]:
(np.argmax(H[0][0,2,:] * W[:,4]), np.max(H[0][0,2,:] * W[:,4]))
Out[107]:
In [108]:
(np.argmin(H[0][0,2,:] * W[:,4]), np.min(H[0][0,2,:] * W[:,4]))
Out[108]:
In [109]:
(np.argmin(H[0][0,1,:] * W[:,4]), np.min(H[0][0,1,:] * W[:,4]))
Out[109]:
In [110]:
np.sort(H[0][0,2,:]*W[:,4])
Out[110]:
In [111]:
np.sort(H[0][0,2,:]*W[:,0])
Out[111]:
In [112]:
np.sort(H[0][0,1,:]*W[:,0])
Out[112]:
In [113]:
np.sort(H[0][0,2,:]*W[:,4])
Out[113]:
In [273]:
np.argsort(H[0][0,0,:]*W[:,8])
Out[273]:
In [275]:
np.argsort(H[0][0,1,:]*W[:,8])
Out[275]:
In [278]:
np.argsort(H[0][0,1,:]*W[:,8])
Out[278]:
In [117]:
dweights = decoder.get_weights()
In [118]:
len(dweights)
Out[118]:
In [122]:
dweights[3].shape
Out[122]:
In [129]:
context = encoder_f(X)
context = context[0][0]
In [130]:
In [131]:
context
Out[131]:
In [137]:
dweights[2].shape
Out[137]:
In [148]:
questions
Out[148]:
In [150]:
[s for s in questions if '2' in s and '4' in s]
Out[150]:
In [ ]: