In [0]:
# Adapted from
# https://github.com/keras-team/keras/blob/master/examples/addition_rnn.py

In [0]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
%matplotlib inline
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [4]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)


1.12.0

In [5]:
# let's see what compute devices we have available, hopefully a GPU 
sess = tf.Session()
devices = sess.list_devices()
for d in devices:
    print(d.name)


/job:localhost/replica:0/task:0/device:CPU:0
/job:localhost/replica:0/task:0/device:XLA_CPU:0
/job:localhost/replica:0/task:0/device:XLA_GPU:0
/job:localhost/replica:0/task:0/device:GPU:0

In [6]:
# a small sanity check, does tf seem to work ok?
hello = tf.constant('Hello TF!')
print(sess.run(hello))


b'Hello TF!'

In [7]:
from tensorflow import keras
print(keras.__version__)


2.1.6-tf

Step 1: Generate sample equations


In [0]:
class CharacterTable(object):
    """Given a set of characters:
    + Encode them to a one hot integer representation
    + Decode the one hot integer representation to their character output
    + Decode a vector of probabilities to their character output
    """
    def __init__(self, chars):
        """Initialize character table.

        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One hot encode given string C.

        # Arguments
            num_rows: Number of rows in the returned one hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)

In [0]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [10]:
# Parameters for the model and dataset.
TRAINING_SIZE = 50000
DIGITS = 3
# REVERSE = True
REVERSE = False

# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
# int is DIGITS.
MAXLEN = DIGITS + 1 + DIGITS

# All the numbers, plus sign and space for padding.
chars = '0123456789+ '
ctable = CharacterTable(chars)

questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789'))
                    for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    # Skip any addition questions we've already seen
    # Also skip any such that x+Y == Y+x (hence the sorting).
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    # Pad the data with spaces such that it is always MAXLEN.
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    # Answers can be of maximum size DIGITS + 1.
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
        # space used for padding.)
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))


Generating data...
Total addition questions: 50000

In [11]:
questions[0]


Out[11]:
'83+335 '

In [12]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)


Vectorization...

In [13]:
len(x[0])


Out[13]:
7

In [14]:
len(questions[0])


Out[14]:
7

In [15]:
questions[0]


Out[15]:
'83+335 '

Input is encoded as one-hot, 7 digits times 12 possibilities


In [16]:
x[0]


Out[16]:
array([[False, False, False, False, False, False, False, False, False,
        False,  True, False],
       [False, False, False, False, False,  True, False, False, False,
        False, False, False],
       [False,  True, False, False, False, False, False, False, False,
        False, False, False],
       [False, False, False, False, False,  True, False, False, False,
        False, False, False],
       [False, False, False, False, False,  True, False, False, False,
        False, False, False],
       [False, False, False, False, False, False, False,  True, False,
        False, False, False],
       [ True, False, False, False, False, False, False, False, False,
        False, False, False]])

Same for output, but at most 4 digits


In [17]:
y[0]


Out[17]:
array([[False, False, False, False, False, False,  True, False, False,
        False, False, False],
       [False, False, False,  True, False, False, False, False, False,
        False, False, False],
       [False, False, False, False, False, False, False, False, False,
        False,  True, False],
       [ True, False, False, False, False, False, False, False, False,
        False, False, False]])

In [18]:
expected[0]


Out[18]:
'418 '

In [0]:
# Shuffle (x, y) in unison as the later parts of x will almost all be larger
# digits.
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

Step 2: Training/Validation Split


In [20]:
# Explicitly set apart 10% for validation data that we never train over.
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)


Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)

Step 3: Create Model


In [21]:
# input shape: 7 digits, each being 0-9, + or space (12 possibilities)
MAXLEN, len(chars)


Out[21]:
(7, 12)

In [22]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import  LSTM, GRU, SimpleRNN, Dense, RepeatVector

# Try replacing LSTM, GRU, or SimpleRNN.
# RNN = LSTM
RNN = SimpleRNN # should be enough since we do not have long sequences and only local dependencies
# RNN = GRU
HIDDEN_SIZE = 128
BATCH_SIZE = 128

model = Sequential()
# encoder 
model.add(RNN(units=HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))

# latent space
encoding_dim = 32
model.add(Dense(units=encoding_dim, activation='relu', name="encoder"))

# decoder: have 4 temporal outputs one for each of the digits of the results
model.add(RepeatVector(DIGITS + 1))

# return_sequences=True tells it to keep all 4 temporal outputs, not only the final one (we need all four digits for the results)
model.add(RNN(units=HIDDEN_SIZE, return_sequences=True))

model.add(Dense(name='classifier', units=len(chars), activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
simple_rnn (SimpleRNN)       (None, 128)               18048     
_________________________________________________________________
encoder (Dense)              (None, 32)                4128      
_________________________________________________________________
repeat_vector (RepeatVector) (None, 4, 32)             0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 4, 128)            20608     
_________________________________________________________________
classifier (Dense)           (None, 4, 12)             1548      
=================================================================
Total params: 44,332
Trainable params: 44,332
Non-trainable params: 0
_________________________________________________________________

Before training lets look at sample input and output


In [23]:
# input one-hot
x_val[0]


Out[23]:
array([[False, False, False, False, False, False, False, False, False,
        False,  True, False],
       [False, False,  True, False, False, False, False, False, False,
        False, False, False],
       [False, False, False, False, False, False, False, False, False,
        False,  True, False],
       [False,  True, False, False, False, False, False, False, False,
        False, False, False],
       [False, False, False, False, False, False, False, False, False,
        False,  True, False],
       [False, False, False,  True, False, False, False, False, False,
        False, False, False],
       [False, False, False, False, False, False, False, False, False,
        False, False,  True]])

In [24]:
# output "one-hot" scores
model.predict(np.array([x_val[0]]))


Out[24]:
array([[[0.11194286, 0.08703223, 0.04466162, 0.09614258, 0.07991789,
         0.10015881, 0.08792052, 0.0613222 , 0.07658774, 0.07327285,
         0.09509439, 0.08594634],
        [0.0828578 , 0.09545451, 0.04661759, 0.08456481, 0.06100081,
         0.12409856, 0.10386992, 0.05089214, 0.10278594, 0.06949714,
         0.08618999, 0.09217083],
        [0.06040806, 0.0901994 , 0.03527799, 0.09478931, 0.04698731,
         0.16602011, 0.1034363 , 0.04319282, 0.11064598, 0.07751279,
         0.08932592, 0.08220395],
        [0.05838025, 0.0976936 , 0.04050993, 0.11656898, 0.05230108,
         0.15842348, 0.10815699, 0.0403192 , 0.10056987, 0.09268498,
         0.06567447, 0.06871716]]], dtype=float32)

In [25]:
# output decoded by only showing highest score for digit
model.predict_classes(np.array([x_val[0]]))


Out[25]:
array([[0, 5, 5, 5]])

Step 4: Train


In [35]:
%%time

# Train the model each generation and show predictions against the validation
# dataset.

merged_losses = {
    "loss": [],
    "val_loss": []
}

for iteration in range(1, 50):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    iteration_history = model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    
    merged_losses["loss"].append(iteration_history.history["loss"])
    merged_losses["val_loss"].append(iteration_history.history["val_loss"])

    # Select 10 samples from the validation set at random so we can visualize
    # errors.
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 1
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 124us/step - loss: 1.1818 - acc: 0.5528 - val_loss: 1.1146 - val_acc: 0.5736
Q 203+184 T 387   414 
Q 964+71  T 1035  1044
Q 50+985  T 1035  1044
Q 844+486 T 1330  1312
Q 584+764 T 1348  1446
Q 224+14  T 238   237 
Q 81+0    T 81    88  
Q 68+194  T 262   259 
Q 306+562 T 868   774 
Q 652+931 T 1583  1406

--------------------------------------------------
Iteration 2
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 130us/step - loss: 1.0473 - acc: 0.6013 - val_loss: 1.0548 - val_acc: 0.6030
Q 34+303  T 337   339 
Q 17+732  T 749   740 
Q 3+533   T 536   536 
Q 83+446  T 529   520 
Q 16+905  T 921   901 
Q 90+259  T 349   341 
Q 68+299  T 367   376 
Q 482+494 T 976   1075
Q 371+56  T 427   420 
Q 1+653   T 654   654 

--------------------------------------------------
Iteration 3
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 129us/step - loss: 0.9535 - acc: 0.6367 - val_loss: 0.9425 - val_acc: 0.6354
Q 17+732  T 749   740 
Q 33+395  T 428   420 
Q 62+294  T 356   353 
Q 689+9   T 698   690 
Q 926+55  T 981   983 
Q 685+813 T 1498  1595
Q 577+462 T 1039  1049
Q 350+949 T 1299  1393
Q 762+472 T 1234  1238
Q 17+62   T 79    88  

--------------------------------------------------
Iteration 4
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 128us/step - loss: 0.8691 - acc: 0.6678 - val_loss: 0.8321 - val_acc: 0.6811
Q 535+22  T 557   555 
Q 887+9   T 896   895 
Q 65+66   T 131   136 
Q 8+473   T 481   481 
Q 901+4   T 905   911 
Q 31+939  T 970   971 
Q 182+0   T 182   181 
Q 154+21  T 175   165 
Q 78+825  T 903   814 
Q 72+686  T 758   757 

--------------------------------------------------
Iteration 5
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 131us/step - loss: 0.7958 - acc: 0.6945 - val_loss: 0.7789 - val_acc: 0.6985
Q 3+262   T 265   265 
Q 8+578   T 586   686 
Q 84+482  T 566   567 
Q 73+862  T 935   936 
Q 93+265  T 358   359 
Q 551+729 T 1280  1277
Q 38+916  T 954   954 
Q 95+992  T 1087  1087
Q 37+99   T 136   134 
Q 682+7   T 689   685 

--------------------------------------------------
Iteration 6
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 130us/step - loss: 0.7290 - acc: 0.7204 - val_loss: 0.7124 - val_acc: 0.7223
Q 775+59  T 834   834 
Q 59+739  T 798   887 
Q 34+750  T 784   774 
Q 14+900  T 914   915 
Q 978+259 T 1237  1232
Q 644+685 T 1329  1324
Q 972+97  T 1069  1079
Q 74+872  T 946   956 
Q 2+829   T 831   830 
Q 140+99  T 239   241 

--------------------------------------------------
Iteration 7
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 135us/step - loss: 0.6665 - acc: 0.7434 - val_loss: 0.6511 - val_acc: 0.7456
Q 6+503   T 509   508 
Q 637+812 T 1449  1441
Q 533+57  T 590   599 
Q 17+533  T 550   540 
Q 6+466   T 472   472 
Q 341+21  T 362   364 
Q 44+871  T 915   915 
Q 82+100  T 182   183 
Q 523+605 T 1128  1126
Q 593+78  T 671   672 

--------------------------------------------------
Iteration 8
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 135us/step - loss: 0.6176 - acc: 0.7619 - val_loss: 0.6083 - val_acc: 0.7672
Q 47+183  T 230   229 
Q 817+99  T 916   909 
Q 925+490 T 1415  1415
Q 702+9   T 711   711 
Q 48+74   T 122   123 
Q 859+34  T 893   895 
Q 313+3   T 316   313 
Q 84+688  T 772   772 
Q 71+993  T 1064  1055
Q 7+856   T 863   863 

--------------------------------------------------
Iteration 9
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 134us/step - loss: 0.5656 - acc: 0.7820 - val_loss: 0.5540 - val_acc: 0.7871
Q 64+196  T 260   250 
Q 579+3   T 582   582 
Q 614+60  T 674   673 
Q 599+70  T 669   669 
Q 14+192  T 206   206 
Q 57+676  T 733   732 
Q 6+223   T 229   229 
Q 85+868  T 953   953 
Q 33+310  T 343   353 
Q 944+68  T 1012  1010

--------------------------------------------------
Iteration 10
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 133us/step - loss: 0.5196 - acc: 0.7995 - val_loss: 0.5263 - val_acc: 0.7921
Q 14+14   T 28    28  
Q 75+10   T 85    85  
Q 4+190   T 194   194 
Q 3+948   T 951   951 
Q 445+92  T 537   537 
Q 60+853  T 913   913 
Q 59+413  T 472   472 
Q 93+88   T 181   172 
Q 885+41  T 926   927 
Q 52+155  T 207   108 

--------------------------------------------------
Iteration 11
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 132us/step - loss: 0.4793 - acc: 0.8160 - val_loss: 0.4953 - val_acc: 0.8056
Q 80+258  T 338   338 
Q 8+693   T 701   701 
Q 14+14   T 28    27  
Q 131+86  T 217   217 
Q 28+445  T 473   473 
Q 896+6   T 902   905 
Q 587+53  T 640   740 
Q 57+37   T 94    94  
Q 22+274  T 296   286 
Q 45+598  T 643   643 

--------------------------------------------------
Iteration 12
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 133us/step - loss: 0.4482 - acc: 0.8281 - val_loss: 0.4521 - val_acc: 0.8255
Q 51+604  T 655   655 
Q 764+224 T 988   997 
Q 460+5   T 465   466 
Q 80+886  T 966   966 
Q 189+512 T 701   709 
Q 327+69  T 396   306 
Q 887+9   T 896   897 
Q 3+366   T 369   369 
Q 76+76   T 152   153 
Q 295+98  T 393   393 

--------------------------------------------------
Iteration 13
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 130us/step - loss: 0.4175 - acc: 0.8414 - val_loss: 0.4283 - val_acc: 0.8350
Q 44+170  T 214   214 
Q 352+7   T 359   350 
Q 907+93  T 1000  1001
Q 66+203  T 269   269 
Q 9+848   T 857   857 
Q 942+4   T 946   946 
Q 631+485 T 1116  1110
Q 53+23   T 76    78  
Q 644+58  T 702   702 
Q 76+717  T 793   793 

--------------------------------------------------
Iteration 14
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 132us/step - loss: 0.3876 - acc: 0.8526 - val_loss: 0.4027 - val_acc: 0.8426
Q 724+73  T 797   797 
Q 38+912  T 950   959 
Q 695+792 T 1487  1483
Q 489+32  T 521   521 
Q 1+704   T 705   705 
Q 47+395  T 442   442 
Q 55+929  T 984   984 
Q 974+8   T 982   981 
Q 667+379 T 1046  1044
Q 841+344 T 1185  1182

--------------------------------------------------
Iteration 15
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 134us/step - loss: 0.3643 - acc: 0.8607 - val_loss: 0.3892 - val_acc: 0.8457
Q 26+98   T 124   124 
Q 446+86  T 532   532 
Q 781+475 T 1256  1256
Q 93+404  T 497   597 
Q 2+238   T 240   240 
Q 966+2   T 968   967 
Q 55+738  T 793   892 
Q 906+532 T 1438  1437
Q 19+589  T 608   608 
Q 954+13  T 967   967 

--------------------------------------------------
Iteration 16
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 131us/step - loss: 0.3441 - acc: 0.8671 - val_loss: 0.3614 - val_acc: 0.8617
Q 849+59  T 908   808 
Q 5+208   T 213   213 
Q 676+5   T 681   681 
Q 716+96  T 812   811 
Q 188+4   T 192   192 
Q 38+734  T 772   772 
Q 69+159  T 228   228 
Q 89+589  T 678   678 
Q 15+688  T 703   603 
Q 973+214 T 1187  1196

--------------------------------------------------
Iteration 17
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 133us/step - loss: 0.3186 - acc: 0.8772 - val_loss: 0.3380 - val_acc: 0.8664
Q 381+4   T 385   386 
Q 30+550  T 580   580 
Q 44+57   T 101   102 
Q 12+546  T 558   558 
Q 256+217 T 473   473 
Q 741+474 T 1215  1215
Q 843+244 T 1087  1087
Q 33+634  T 667   667 
Q 685+372 T 1057  106 
Q 876+90  T 966   966 

--------------------------------------------------
Iteration 18
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 131us/step - loss: 0.3009 - acc: 0.8840 - val_loss: 0.3164 - val_acc: 0.8743
Q 109+62  T 171   171 
Q 178+235 T 413   402 
Q 385+84  T 469   469 
Q 187+1   T 188   188 
Q 0+245   T 245   245 
Q 882+807 T 1689  1678
Q 57+517  T 574   574 
Q 66+62   T 128   128 
Q 802+163 T 965   954 
Q 83+638  T 721   721 

--------------------------------------------------
Iteration 19
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 130us/step - loss: 0.2800 - acc: 0.8935 - val_loss: 0.2959 - val_acc: 0.8868
Q 89+685  T 774   774 
Q 661+69  T 730   730 
Q 517+99  T 616   615 
Q 53+902  T 955   955 
Q 588+569 T 1157  1157
Q 5+256   T 261   261 
Q 264+27  T 291   201 
Q 42+603  T 645   645 
Q 627+794 T 1421  1422
Q 886+53  T 939   930 

--------------------------------------------------
Iteration 20
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 133us/step - loss: 0.2530 - acc: 0.9035 - val_loss: 0.2812 - val_acc: 0.8897
Q 649+0   T 649   649 
Q 172+34  T 206   207 
Q 76+741  T 817   817 
Q 172+504 T 676   676 
Q 505+64  T 569   560 
Q 541+37  T 578   568 
Q 272+324 T 596   596 
Q 584+764 T 1348  1448
Q 81+211  T 292   292 
Q 417+196 T 613   615 

--------------------------------------------------
Iteration 21
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 134us/step - loss: 0.2447 - acc: 0.9063 - val_loss: 0.2768 - val_acc: 0.8944
Q 604+65  T 669   669 
Q 68+96   T 164   165 
Q 703+95  T 798   798 
Q 927+448 T 1375  1375
Q 301+776 T 1077  997 
Q 86+132  T 218   218 
Q 64+14   T 78    78  
Q 528+39  T 567   567 
Q 951+90  T 1041  1022
Q 41+154  T 195   195 

--------------------------------------------------
Iteration 22
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 131us/step - loss: 0.2320 - acc: 0.9122 - val_loss: 0.2362 - val_acc: 0.9100
Q 79+939  T 1018  1018
Q 208+848 T 1056  1057
Q 53+23   T 76    76  
Q 84+82   T 166   166 
Q 754+36  T 790   790 
Q 7+184   T 191   191 
Q 821+5   T 826   826 
Q 512+850 T 1362  1362
Q 890+599 T 1489  1588
Q 59+510  T 569   569 

--------------------------------------------------
Iteration 23
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 133us/step - loss: 0.2160 - acc: 0.9189 - val_loss: 0.2234 - val_acc: 0.9140
Q 137+455 T 592   592 
Q 659+830 T 1489  1489
Q 172+90  T 262   252 
Q 823+207 T 1030  1020
Q 928+71  T 999   1000
Q 656+915 T 1571  1570
Q 612+47  T 659   659 
Q 870+82  T 952   952 
Q 204+5   T 209   209 
Q 75+104  T 179   179 

--------------------------------------------------
Iteration 24
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 134us/step - loss: 0.2179 - acc: 0.9175 - val_loss: 0.2103 - val_acc: 0.9218
Q 594+46  T 640   640 
Q 31+57   T 88    88  
Q 468+136 T 604   505 
Q 458+52  T 510   509 
Q 247+84  T 331   331 
Q 224+27  T 251   251 
Q 49+507  T 556   556 
Q 948+4   T 952   951 
Q 601+479 T 1080  1080
Q 539+50  T 589   589 

--------------------------------------------------
Iteration 25
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 132us/step - loss: 0.1945 - acc: 0.9276 - val_loss: 0.2258 - val_acc: 0.9120
Q 41+571  T 612   612 
Q 483+527 T 1010  1010
Q 505+64  T 569   569 
Q 150+284 T 434   434 
Q 4+190   T 194   194 
Q 270+72  T 342   332 
Q 895+29  T 924   924 
Q 513+329 T 842   841 
Q 794+29  T 823   823 
Q 18+23   T 41    40  

--------------------------------------------------
Iteration 26
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 133us/step - loss: 0.1880 - acc: 0.9302 - val_loss: 0.2084 - val_acc: 0.9199
Q 807+6   T 813   813 
Q 3+741   T 744   744 
Q 794+40  T 834   834 
Q 68+962  T 1030  1030
Q 0+662   T 662   662 
Q 573+30  T 603   603 
Q 219+223 T 442   442 
Q 951+25  T 976   976 
Q 204+5   T 209   209 
Q 450+22  T 472   472 

--------------------------------------------------
Iteration 27
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 131us/step - loss: 0.1834 - acc: 0.9320 - val_loss: 0.2449 - val_acc: 0.9032
Q 9+782   T 791   791 
Q 33+679  T 712   712 
Q 539+347 T 886   876 
Q 16+374  T 390   490 
Q 8+701   T 709   709 
Q 76+76   T 152   152 
Q 332+71  T 403   403 
Q 17+211  T 228   228 
Q 42+528  T 570   560 
Q 62+294  T 356   356 

--------------------------------------------------
Iteration 28
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 131us/step - loss: 0.1707 - acc: 0.9373 - val_loss: 0.1949 - val_acc: 0.9262
Q 90+76   T 166   166 
Q 517+385 T 902   902 
Q 977+920 T 1897  1897
Q 556+19  T 575   575 
Q 71+463  T 534   534 
Q 888+5   T 893   893 
Q 373+163 T 536   536 
Q 698+33  T 731   731 
Q 55+627  T 682   692 
Q 27+656  T 683   683 

--------------------------------------------------
Iteration 29
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 131us/step - loss: 0.1655 - acc: 0.9391 - val_loss: 0.1836 - val_acc: 0.9320
Q 614+36  T 650   650 
Q 681+71  T 752   752 
Q 0+175   T 175   175 
Q 450+22  T 472   472 
Q 623+417 T 1040  1040
Q 95+35   T 130   130 
Q 820+38  T 858   858 
Q 39+705  T 744   744 
Q 973+214 T 1187  1187
Q 364+18  T 382   382 

--------------------------------------------------
Iteration 30
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 131us/step - loss: 0.1575 - acc: 0.9422 - val_loss: 0.1827 - val_acc: 0.9318
Q 506+6   T 512   511 
Q 13+99   T 112   112 
Q 28+223  T 251   251 
Q 41+589  T 630   630 
Q 278+34  T 312   312 
Q 734+448 T 1182  1182
Q 555+786 T 1341  1341
Q 691+5   T 696   696 
Q 209+9   T 218   218 
Q 33+20   T 53    53  

--------------------------------------------------
Iteration 31
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 134us/step - loss: 0.1532 - acc: 0.9435 - val_loss: 0.2160 - val_acc: 0.9189
Q 505+769 T 1274  1274
Q 68+679  T 747   747 
Q 86+516  T 602   602 
Q 847+450 T 1297  1297
Q 659+830 T 1489  1589
Q 35+70   T 105   106 
Q 84+482  T 566   566 
Q 261+66  T 327   327 
Q 57+92   T 149   149 
Q 713+618 T 1331  1331

--------------------------------------------------
Iteration 32
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 133us/step - loss: 0.1612 - acc: 0.9404 - val_loss: 0.2402 - val_acc: 0.9095
Q 33+80   T 113   113 
Q 83+191  T 274   274 
Q 49+98   T 147   137 
Q 462+29  T 491   481 
Q 24+412  T 436   436 
Q 4+624   T 628   628 
Q 982+943 T 1925  1925
Q 861+477 T 1338  1338
Q 22+730  T 752   752 
Q 250+31  T 281   271 

--------------------------------------------------
Iteration 33
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 130us/step - loss: 0.1486 - acc: 0.9454 - val_loss: 0.1670 - val_acc: 0.9377
Q 5+424   T 429   429 
Q 797+99  T 896   897 
Q 80+79   T 159   159 
Q 90+76   T 166   166 
Q 82+865  T 947   947 
Q 80+387  T 467   467 
Q 815+3   T 818   818 
Q 0+443   T 443   443 
Q 821+471 T 1292  1292
Q 104+287 T 391   491 

--------------------------------------------------
Iteration 34
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 130us/step - loss: 0.1414 - acc: 0.9487 - val_loss: 0.1657 - val_acc: 0.9366
Q 523+25  T 548   548 
Q 718+35  T 753   752 
Q 12+247  T 259   269 
Q 11+310  T 321   321 
Q 90+318  T 408   408 
Q 775+907 T 1682  1682
Q 460+5   T 465   465 
Q 44+170  T 214   214 
Q 8+286   T 294   294 
Q 111+37  T 148   148 

--------------------------------------------------
Iteration 35
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 129us/step - loss: 0.1426 - acc: 0.9483 - val_loss: 0.1698 - val_acc: 0.9354
Q 879+829 T 1708  1708
Q 219+893 T 1112  1111
Q 644+58  T 702   702 
Q 24+2    T 26    26  
Q 13+436  T 449   449 
Q 301+558 T 859   859 
Q 965+680 T 1645  1645
Q 12+266  T 278   278 
Q 80+43   T 123   123 
Q 696+55  T 751   751 

--------------------------------------------------
Iteration 36
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 130us/step - loss: 0.1274 - acc: 0.9536 - val_loss: 0.1474 - val_acc: 0.9440
Q 79+262  T 341   341 
Q 736+47  T 783   783 
Q 73+862  T 935   935 
Q 890+599 T 1489  1589
Q 883+626 T 1509  1519
Q 3+83    T 86    85  
Q 85+101  T 186   186 
Q 329+825 T 1154  1154
Q 5+254   T 259   259 
Q 692+68  T 760   750 

--------------------------------------------------
Iteration 37
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 130us/step - loss: 0.1273 - acc: 0.9546 - val_loss: 0.1848 - val_acc: 0.9289
Q 55+537  T 592   592 
Q 93+265  T 358   358 
Q 19+753  T 772   772 
Q 1+395   T 396   396 
Q 4+614   T 618   619 
Q 1+412   T 413   413 
Q 437+4   T 441   441 
Q 594+9   T 603   603 
Q 61+83   T 144   144 
Q 352+84  T 436   426 

--------------------------------------------------
Iteration 38
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 134us/step - loss: 0.1154 - acc: 0.9588 - val_loss: 0.1933 - val_acc: 0.9258
Q 326+423 T 749   749 
Q 89+335  T 424   424 
Q 11+592  T 603   603 
Q 45+327  T 372   372 
Q 137+52  T 189   190 
Q 996+14  T 1010  1010
Q 788+679 T 1467  1467
Q 631+64  T 695   695 
Q 338+440 T 778   788 
Q 84+120  T 204   205 

--------------------------------------------------
Iteration 39
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 131us/step - loss: 0.1304 - acc: 0.9522 - val_loss: 0.1654 - val_acc: 0.9384
Q 117+839 T 956   955 
Q 309+344 T 653   653 
Q 644+68  T 712   712 
Q 314+2   T 316   316 
Q 421+53  T 474   474 
Q 23+40   T 63    63  
Q 0+668   T 668   669 
Q 846+82  T 928   929 
Q 51+995  T 1046  1046
Q 0+170   T 170   170 

--------------------------------------------------
Iteration 40
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 129us/step - loss: 0.1403 - acc: 0.9489 - val_loss: 0.1792 - val_acc: 0.9315
Q 572+14  T 586   586 
Q 34+303  T 337   337 
Q 405+89  T 494   494 
Q 207+3   T 210   209 
Q 166+0   T 166   166 
Q 46+432  T 478   478 
Q 64+196  T 260   250 
Q 779+55  T 834   834 
Q 3+510   T 513   513 
Q 5+238   T 243   243 

--------------------------------------------------
Iteration 41
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 128us/step - loss: 0.1174 - acc: 0.9582 - val_loss: 0.1440 - val_acc: 0.9469
Q 644+685 T 1329  1329
Q 106+48  T 154   164 
Q 815+622 T 1437  1437
Q 599+70  T 669   669 
Q 518+13  T 531   541 
Q 2+876   T 878   878 
Q 228+74  T 302   312 
Q 45+311  T 356   356 
Q 486+96  T 582   582 
Q 33+720  T 753   753 

--------------------------------------------------
Iteration 42
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 129us/step - loss: 0.1096 - acc: 0.9601 - val_loss: 0.1370 - val_acc: 0.9485
Q 679+20  T 699   609 
Q 966+920 T 1886  1886
Q 526+8   T 534   534 
Q 10+95   T 105   105 
Q 47+629  T 676   676 
Q 48+53   T 101   101 
Q 870+88  T 958   958 
Q 29+886  T 915   915 
Q 125+655 T 780   780 
Q 5+215   T 220   220 

--------------------------------------------------
Iteration 43
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 130us/step - loss: 0.1066 - acc: 0.9614 - val_loss: 0.1416 - val_acc: 0.9483
Q 95+986  T 1081  1081
Q 559+6   T 565   565 
Q 178+235 T 413   413 
Q 966+2   T 968   969 
Q 290+218 T 508   508 
Q 21+24   T 45    45  
Q 830+94  T 924   924 
Q 99+805  T 904   804 
Q 30+10   T 40    40  
Q 90+818  T 908   908 

--------------------------------------------------
Iteration 44
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 128us/step - loss: 0.1111 - acc: 0.9600 - val_loss: 0.1428 - val_acc: 0.9473
Q 469+97  T 566   566 
Q 4+598   T 602   602 
Q 2+401   T 403   403 
Q 10+685  T 695   695 
Q 589+1   T 590   580 
Q 551+61  T 612   612 
Q 219+893 T 1112  1112
Q 305+67  T 372   372 
Q 962+80  T 1042  1042
Q 921+9   T 930   920 

--------------------------------------------------
Iteration 45
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 128us/step - loss: 0.1107 - acc: 0.9597 - val_loss: 0.2269 - val_acc: 0.9200
Q 151+577 T 728   728 
Q 203+76  T 279   279 
Q 749+9   T 758   758 
Q 239+112 T 351   341 
Q 844+13  T 857   857 
Q 352+4   T 356   356 
Q 555+690 T 1245  1245
Q 453+44  T 497   497 
Q 976+45  T 1021  1010
Q 348+25  T 373   373 

--------------------------------------------------
Iteration 46
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 132us/step - loss: 0.1046 - acc: 0.9628 - val_loss: 0.1339 - val_acc: 0.9486
Q 511+967 T 1478  1478
Q 788+679 T 1467  1457
Q 732+35  T 767   767 
Q 5+916   T 921   921 
Q 882+8   T 890   890 
Q 650+51  T 701   701 
Q 10+540  T 550   550 
Q 29+13   T 42    52  
Q 36+999  T 1035  1045
Q 561+87  T 648   649 

--------------------------------------------------
Iteration 47
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 129us/step - loss: 0.1135 - acc: 0.9592 - val_loss: 0.1424 - val_acc: 0.9461
Q 109+490 T 599   609 
Q 761+12  T 773   773 
Q 462+29  T 491   491 
Q 519+75  T 594   594 
Q 282+585 T 867   857 
Q 839+86  T 925   925 
Q 948+134 T 1082  1082
Q 20+221  T 241   241 
Q 666+71  T 737   737 
Q 50+6    T 56    57  

--------------------------------------------------
Iteration 48
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 132us/step - loss: 0.1084 - acc: 0.9607 - val_loss: 0.1481 - val_acc: 0.9453
Q 23+27   T 50    50  
Q 447+84  T 531   531 
Q 8+753   T 761   761 
Q 482+769 T 1251  1251
Q 7+399   T 406   406 
Q 33+36   T 69    69  
Q 59+480  T 539   549 
Q 355+118 T 473   474 
Q 352+84  T 436   436 
Q 749+399 T 1148  1148

--------------------------------------------------
Iteration 49
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
45000/45000 [==============================] - 6s 132us/step - loss: 0.0883 - acc: 0.9692 - val_loss: 0.1109 - val_acc: 0.9580
Q 163+94  T 257   257 
Q 13+166  T 179   179 
Q 46+554  T 600   500 
Q 905+444 T 1349  1349
Q 90+818  T 908   908 
Q 76+344  T 420   420 
Q 90+797  T 887   887 
Q 853+4   T 857   857 
Q 722+6   T 728   728 
Q 667+34  T 701   601 
CPU times: user 6min 6s, sys: 50.1 s, total: 6min 57s
Wall time: 4min 53s

In [41]:
import matplotlib.pyplot as plt

plt.ylabel('loss')
plt.xlabel('epoch')
plt.yscale('log')

plt.plot(merged_losses['loss'], 'b')
plt.plot(merged_losses['val_loss'], 'r')

plt.legend(['loss', 'validation loss'])
plt.plot()


Out[41]:
[]

In [0]: