Addition as a Sequence to Sequence Translation

Adapted from https://github.com/keras-team/keras/blob/master/examples/addition_rnn.py


In [1]:
!pip install -q tf-nightly-gpu-2.0-preview


     |████████████████████████████████| 346.4MB 59kB/s 
     |████████████████████████████████| 430kB 51.3MB/s 
     |████████████████████████████████| 61kB 29.5MB/s 
     |████████████████████████████████| 3.1MB 44.4MB/s 
  Building wheel for wrapt (setup.py) ... done
ERROR: thinc 6.12.1 has requirement wrapt<1.11.0,>=1.10.0, but you'll have wrapt 1.11.1 which is incompatible.

In [2]:
import tensorflow as tf
print(tf.__version__)


2.0.0-dev20190502

Step 1: Generate sample equations


In [0]:
class CharacterTable(object):
    """Given a set of characters:
    + Encode them to a one hot integer representation
    + Decode the one hot integer representation to their character output
    + Decode a vector of probabilities to their character output
    """
    def __init__(self, chars):
        """Initialize character table.

        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One hot encode given string C.

        # Arguments
            num_rows: Number of rows in the returned one hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)

In [0]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [6]:
import numpy as np

# Parameters for the model and dataset.
TRAINING_SIZE = 50000
DIGITS = 3
# REVERSE = True
REVERSE = False

# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
# int is DIGITS.
MAXLEN = DIGITS + 1 + DIGITS

# All the numbers, plus sign and space for padding.
chars = '0123456789+ '
ctable = CharacterTable(chars)

questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789'))
                    for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    # Skip any addition questions we've already seen
    # Also skip any such that x+Y == Y+x (hence the sorting).
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    # Pad the data with spaces such that it is always MAXLEN.
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    # Answers can be of maximum size DIGITS + 1.
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
        # space used for padding.)
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))


Generating data...
Total addition questions: 50000

In [13]:
questions[0]


Out[13]:
'506+0  '

In [14]:
expected[0]


Out[14]:
'506 '

In [8]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)


Vectorization...

In [9]:
len(x[0])


Out[9]:
7

In [10]:
len(questions[0])


Out[10]:
7

Input is encoded as one-hot, 7 digits times 12 possibilities


In [12]:
x[0]


Out[12]:
array([[False, False, False, False, False, False, False,  True, False,
        False, False, False],
       [False, False,  True, False, False, False, False, False, False,
        False, False, False],
       [False, False, False, False, False, False, False, False,  True,
        False, False, False],
       [False,  True, False, False, False, False, False, False, False,
        False, False, False],
       [False, False,  True, False, False, False, False, False, False,
        False, False, False],
       [ True, False, False, False, False, False, False, False, False,
        False, False, False],
       [ True, False, False, False, False, False, False, False, False,
        False, False, False]])

Same for output, but at most 4 digits


In [15]:
y[0]


Out[15]:
array([[False, False, False, False, False, False, False,  True, False,
        False, False, False],
       [False, False,  True, False, False, False, False, False, False,
        False, False, False],
       [False, False, False, False, False, False, False, False,  True,
        False, False, False],
       [ True, False, False, False, False, False, False, False, False,
        False, False, False]])

In [0]:
# Shuffle (x, y) in unison as the later parts of x will almost all be larger
# digits.
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

Step 2: Training/Validation Split


In [17]:
# Explicitly set apart 10% for validation data that we never train over.
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)


Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)

Step 3: Create Model


In [18]:
# input shape: 7 digits, each being 0-9, + or space (12 possibilities)
MAXLEN, len(chars)


Out[18]:
(7, 12)

In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import  LSTM, GRU, SimpleRNN, Dense, RepeatVector

# Try replacing LSTM, GRU, or SimpleRNN.
# RNN = LSTM
RNN = SimpleRNN # should be enough since we do not have long sequences and only local dependencies
# RNN = GRU
HIDDEN_SIZE = 128
BATCH_SIZE = 128

model = Sequential()
# encoder 
model.add(RNN(units=HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))

# latent space
encoding_dim = 32
model.add(Dense(units=encoding_dim, activation='relu', name="encoder"))

# decoder: have 4 temporal outputs one for each of the digits of the results
model.add(RepeatVector(DIGITS + 1))

# return_sequences=True tells it to keep all 4 temporal outputs, not only the final one (we need all four digits for the results)
model.add(RNN(units=HIDDEN_SIZE, return_sequences=True))

model.add(Dense(name='classifier', units=len(chars), activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
simple_rnn_4 (SimpleRNN)     (None, 128)               18048     
_________________________________________________________________
encoder (Dense)              (None, 32)                4128      
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 4, 32)             0         
_________________________________________________________________
simple_rnn_5 (SimpleRNN)     (None, 4, 128)            20608     
_________________________________________________________________
classifier (Dense)           (None, 4, 12)             1548      
=================================================================
Total params: 44,332
Trainable params: 44,332
Non-trainable params: 0
_________________________________________________________________

Step 4: Train


In [24]:
%%time

# Train the model each generation and show predictions against the validation
# dataset.

merged_losses = {
    "loss": [],
    "val_loss": [],
    "accuracy": [],
    "val_accuracy": [],
    
}

for iteration in range(1, 50):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    iteration_history = model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    
    merged_losses["loss"].append(iteration_history.history["loss"])
    merged_losses["val_loss"].append(iteration_history.history["val_loss"])
    merged_losses["accuracy"].append(iteration_history.history["accuracy"])
    merged_losses["val_accuracy"].append(iteration_history.history["val_accuracy"])

    # Select 10 samples from the validation set at random so we can visualize
    # errors.
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 1
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 6s 123us/sample - loss: 1.6689 - accuracy: 0.3896 - val_loss: 1.5134 - val_accuracy: 0.4410
Q 658+775 T 1433  1407
Q 557+41  T 598   666 
Q 660+44  T 704   666 
Q 79+411  T 490   444 
Q 684+760 T 1444  1406
Q 4+844   T 848   548 
Q 816+42  T 858   891 
Q 644+68  T 712   623 
Q 500+83  T 583   603 
Q 9+667   T 676   666 

--------------------------------------------------
Iteration 2
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 102us/sample - loss: 1.3752 - accuracy: 0.4873 - val_loss: 1.2841 - val_accuracy: 0.5138
Q 97+760  T 857   882 
Q 914+5   T 919   912 
Q 72+60   T 132   146 
Q 915+21  T 936   991 
Q 734+101 T 835   851 
Q 3+39    T 42    39  
Q 175+17  T 192   261 
Q 411+66  T 477   591 
Q 82+50   T 132   146 
Q 663+7   T 670   664 

--------------------------------------------------
Iteration 3
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 1.2055 - accuracy: 0.5461 - val_loss: 1.1436 - val_accuracy: 0.5627
Q 862+89  T 951   958 
Q 41+764  T 805   819 
Q 985+908 T 1893  1864
Q 126+6   T 132   102 
Q 82+308  T 390   399 
Q 714+225 T 939   902 
Q 363+84  T 447   441 
Q 29+428  T 457   458 
Q 690+7   T 697   795 
Q 630+603 T 1233  1237

--------------------------------------------------
Iteration 4
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 102us/sample - loss: 1.0847 - accuracy: 0.5892 - val_loss: 1.0366 - val_accuracy: 0.6080
Q 22+69   T 91    70  
Q 374+22  T 396   390 
Q 7+944   T 951   941 
Q 5+194   T 199   190 
Q 16+907  T 923   901 
Q 46+312  T 358   360 
Q 48+179  T 227   223 
Q 443+53  T 496   490 
Q 482+3   T 485   490 
Q 566+921 T 1487  1467

--------------------------------------------------
Iteration 5
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.9985 - accuracy: 0.6217 - val_loss: 0.9810 - val_accuracy: 0.6253
Q 508+94  T 602   591 
Q 5+518   T 523   523 
Q 251+280 T 531   541 
Q 31+962  T 993   986 
Q 503+318 T 821   832 
Q 908+27  T 935   939 
Q 55+507  T 562   564 
Q 98+592  T 690   689 
Q 738+893 T 1631  1661
Q 21+939  T 960   963 

--------------------------------------------------
Iteration 6
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.9376 - accuracy: 0.6464 - val_loss: 0.9181 - val_accuracy: 0.6517
Q 420+0   T 420   424 
Q 708+54  T 762   765 
Q 85+205  T 290   380 
Q 38+654  T 692   694 
Q 569+825 T 1394  1385
Q 540+11  T 551   540 
Q 32+146  T 178   171 
Q 29+214  T 243   239 
Q 72+550  T 622   628 
Q 446+71  T 517   525 

--------------------------------------------------
Iteration 7
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 102us/sample - loss: 0.8887 - accuracy: 0.6643 - val_loss: 0.8859 - val_accuracy: 0.6597
Q 20+71   T 91    82  
Q 940+45  T 985   986 
Q 363+84  T 447   440 
Q 499+8   T 507   500 
Q 48+788  T 836   831 
Q 507+724 T 1231  1237
Q 996+315 T 1311  1364
Q 2+108   T 110   111 
Q 781+8   T 789   788 
Q 27+836  T 863   861 

--------------------------------------------------
Iteration 8
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.8458 - accuracy: 0.6792 - val_loss: 0.8466 - val_accuracy: 0.6753
Q 769+78  T 847   845 
Q 537+34  T 571   571 
Q 761+799 T 1560  1566
Q 296+11  T 307   303 
Q 55+595  T 650   643 
Q 77+54   T 131   130 
Q 199+751 T 950   943 
Q 368+51  T 419   415 
Q 977+997 T 1974  1953
Q 947+42  T 989   997 

--------------------------------------------------
Iteration 9
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.7997 - accuracy: 0.6957 - val_loss: 0.7910 - val_accuracy: 0.6970
Q 595+67  T 662   666 
Q 632+69  T 701   704 
Q 756+70  T 826   824 
Q 97+564  T 661   657 
Q 29+144  T 173   176 
Q 78+306  T 384   387 
Q 696+181 T 877   870 
Q 161+851 T 1012  1001
Q 770+69  T 839   847 
Q 96+723  T 819   817 

--------------------------------------------------
Iteration 10
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 113us/sample - loss: 0.7469 - accuracy: 0.7143 - val_loss: 0.7541 - val_accuracy: 0.7044
Q 437+4   T 441   449 
Q 54+849  T 903   903 
Q 776+644 T 1420  1424
Q 42+812  T 854   864 
Q 929+34  T 963   969 
Q 444+664 T 1108  1117
Q 646+257 T 903   990 
Q 837+677 T 1514  1532
Q 142+111 T 253   276 
Q 28+95   T 123   125 

--------------------------------------------------
Iteration 11
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 119us/sample - loss: 0.6938 - accuracy: 0.7321 - val_loss: 0.6920 - val_accuracy: 0.7304
Q 66+877  T 943   943 
Q 891+349 T 1240  1244
Q 883+86  T 969   979 
Q 74+416  T 490   490 
Q 84+554  T 638   639 
Q 726+849 T 1575  1577
Q 20+103  T 123   122 
Q 33+16   T 49    49  
Q 226+58  T 284   279 
Q 19+836  T 855   852 

--------------------------------------------------
Iteration 12
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 111us/sample - loss: 0.6440 - accuracy: 0.7518 - val_loss: 0.6505 - val_accuracy: 0.7487
Q 843+644 T 1487  1480
Q 903+656 T 1559  1565
Q 58+501  T 559   569 
Q 229+99  T 328   314 
Q 290+951 T 1241  1235
Q 642+645 T 1287  1280
Q 613+446 T 1059  1054
Q 49+159  T 208   208 
Q 106+8   T 114   118 
Q 57+788  T 845   845 

--------------------------------------------------
Iteration 13
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 102us/sample - loss: 0.6014 - accuracy: 0.7696 - val_loss: 0.6059 - val_accuracy: 0.7653
Q 0+686   T 686   676 
Q 9+655   T 664   664 
Q 838+3   T 841   847 
Q 368+54  T 422   420 
Q 78+849  T 927   927 
Q 855+90  T 945   945 
Q 83+987  T 1070  1070
Q 8+650   T 658   658 
Q 732+9   T 741   730 
Q 49+72   T 121   122 

--------------------------------------------------
Iteration 14
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.5643 - accuracy: 0.7828 - val_loss: 0.5742 - val_accuracy: 0.7739
Q 632+69  T 701   791 
Q 92+462  T 554   564 
Q 81+64   T 145   146 
Q 568+53  T 621   620 
Q 134+175 T 309   211 
Q 126+6   T 132   138 
Q 731+936 T 1667  1670
Q 854+47  T 901   891 
Q 47+152  T 199   199 
Q 243+504 T 747   746 

--------------------------------------------------
Iteration 15
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 102us/sample - loss: 0.5265 - accuracy: 0.7962 - val_loss: 0.5248 - val_accuracy: 0.7945
Q 866+0   T 866   866 
Q 930+314 T 1244  1244
Q 448+5   T 453   453 
Q 809+357 T 1166  1153
Q 971+901 T 1872  1857
Q 71+510  T 581   581 
Q 165+3   T 168   161 
Q 621+423 T 1044  1049
Q 926+632 T 1558  1558
Q 229+35  T 264   269 

--------------------------------------------------
Iteration 16
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.4846 - accuracy: 0.8140 - val_loss: 0.4778 - val_accuracy: 0.8141
Q 776+834 T 1610  1516
Q 455+77  T 532   532 
Q 33+856  T 889   889 
Q 200+59  T 259   259 
Q 331+221 T 552   544 
Q 79+426  T 505   505 
Q 444+61  T 505   504 
Q 454+9   T 463   462 
Q 767+28  T 795   795 
Q 400+976 T 1376  1367

--------------------------------------------------
Iteration 17
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.4558 - accuracy: 0.8247 - val_loss: 0.4723 - val_accuracy: 0.8170
Q 28+220  T 248   248 
Q 346+46  T 392   392 
Q 831+681 T 1512  1510
Q 24+817  T 841   841 
Q 11+924  T 935   935 
Q 7+311   T 318   327 
Q 0+699   T 699   699 
Q 421+46  T 467   467 
Q 996+382 T 1378  1364
Q 29+327  T 356   356 

--------------------------------------------------
Iteration 18
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.4177 - accuracy: 0.8411 - val_loss: 0.4305 - val_accuracy: 0.8309
Q 2+228   T 230   230 
Q 22+186  T 208   208 
Q 632+77  T 709   709 
Q 11+268  T 279   279 
Q 51+148  T 199   199 
Q 96+573  T 669   679 
Q 97+349  T 446   447 
Q 17+656  T 673   673 
Q 25+755  T 780   780 
Q 1+758   T 759   759 

--------------------------------------------------
Iteration 19
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 100us/sample - loss: 0.3974 - accuracy: 0.8490 - val_loss: 0.4203 - val_accuracy: 0.8382
Q 974+6   T 980   980 
Q 977+1   T 978   978 
Q 94+953  T 1047  1046
Q 22+467  T 489   489 
Q 698+905 T 1603  1697
Q 6+390   T 396   396 
Q 959+0   T 959   969 
Q 41+836  T 877   877 
Q 282+33  T 315   315 
Q 88+766  T 854   854 

--------------------------------------------------
Iteration 20
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.3690 - accuracy: 0.8605 - val_loss: 0.4144 - val_accuracy: 0.8386
Q 964+942 T 1906  1805
Q 31+329  T 360   350 
Q 736+98  T 834   935 
Q 4+923   T 927   927 
Q 242+18  T 260   250 
Q 77+31   T 108   108 
Q 85+802  T 887   888 
Q 550+84  T 634   635 
Q 26+517  T 543   543 
Q 46+420  T 466   466 

--------------------------------------------------
Iteration 21
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.3525 - accuracy: 0.8654 - val_loss: 0.3803 - val_accuracy: 0.8492
Q 12+953  T 965   965 
Q 3+514   T 517   517 
Q 3+389   T 392   392 
Q 54+223  T 277   277 
Q 57+322  T 379   389 
Q 274+74  T 348   348 
Q 716+931 T 1647  1657
Q 580+3   T 583   582 
Q 791+0   T 791   791 
Q 665+735 T 1400  1490

--------------------------------------------------
Iteration 22
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 4s 99us/sample - loss: 0.3335 - accuracy: 0.8710 - val_loss: 0.3734 - val_accuracy: 0.8537
Q 181+935 T 1116  1218
Q 6+835   T 841   841 
Q 218+2   T 220   220 
Q 756+24  T 780   780 
Q 121+958 T 1079  1089
Q 45+160  T 205   204 
Q 646+70  T 716   716 
Q 25+591  T 616   616 
Q 31+653  T 684   684 
Q 598+35  T 633   632 

--------------------------------------------------
Iteration 23
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 4s 100us/sample - loss: 0.3027 - accuracy: 0.8842 - val_loss: 0.3326 - val_accuracy: 0.8688
Q 537+34  T 571   571 
Q 9+627   T 636   636 
Q 31+922  T 953   954 
Q 29+144  T 173   183 
Q 220+77  T 297   297 
Q 461+77  T 538   538 
Q 18+16   T 34    35  
Q 32+146  T 178   178 
Q 342+82  T 424   424 
Q 16+904  T 920   920 

--------------------------------------------------
Iteration 24
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 4s 100us/sample - loss: 0.2873 - accuracy: 0.8889 - val_loss: 0.3101 - val_accuracy: 0.8758
Q 6+534   T 540   540 
Q 186+389 T 575   575 
Q 22+571  T 593   693 
Q 54+671  T 725   725 
Q 302+372 T 674   679 
Q 507+62  T 569   579 
Q 956+828 T 1784  1785
Q 538+19  T 557   557 
Q 81+490  T 571   571 
Q 995+93  T 1088  1089

--------------------------------------------------
Iteration 25
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.2698 - accuracy: 0.8966 - val_loss: 0.2871 - val_accuracy: 0.8850
Q 110+949 T 1059  1070
Q 530+861 T 1391  1390
Q 539+938 T 1477  1477
Q 512+487 T 999   904 
Q 3+39    T 42    42  
Q 951+583 T 1534  1539
Q 732+0   T 732   732 
Q 183+6   T 189   188 
Q 272+68  T 340   330 
Q 63+790  T 853   853 

--------------------------------------------------
Iteration 26
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 100us/sample - loss: 0.2541 - accuracy: 0.9022 - val_loss: 0.2847 - val_accuracy: 0.8892
Q 0+461   T 461   461 
Q 834+87  T 921   921 
Q 936+3   T 939   939 
Q 579+423 T 1002  901 
Q 38+987  T 1025  1025
Q 118+88  T 206   205 
Q 98+228  T 326   326 
Q 680+54  T 734   734 
Q 97+206  T 303   303 
Q 696+69  T 765   765 

--------------------------------------------------
Iteration 27
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 104us/sample - loss: 0.2359 - accuracy: 0.9098 - val_loss: 0.2687 - val_accuracy: 0.8934
Q 593+91  T 684   684 
Q 36+36   T 72    72  
Q 960+395 T 1355  1365
Q 98+922  T 1020  1010
Q 622+98  T 720   710 
Q 685+19  T 704   604 
Q 454+346 T 800   890 
Q 188+95  T 283   283 
Q 824+248 T 1072  1071
Q 591+869 T 1460  1460

--------------------------------------------------
Iteration 28
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 118us/sample - loss: 0.2206 - accuracy: 0.9170 - val_loss: 0.2558 - val_accuracy: 0.9000
Q 13+12   T 25    25  
Q 73+41   T 114   124 
Q 3+624   T 627   627 
Q 542+8   T 550   550 
Q 22+72   T 94    94  
Q 74+790  T 864   864 
Q 0+525   T 525   525 
Q 33+778  T 811   811 
Q 663+541 T 1204  1204
Q 85+642  T 727   727 

--------------------------------------------------
Iteration 29
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 112us/sample - loss: 0.2189 - accuracy: 0.9174 - val_loss: 0.2123 - val_accuracy: 0.9194
Q 5+689   T 694   694 
Q 74+219  T 293   293 
Q 88+355  T 443   443 
Q 998+7   T 1005  1005
Q 78+8    T 86    85  
Q 86+875  T 961   971 
Q 426+41  T 467   467 
Q 701+7   T 708   708 
Q 5+232   T 237   237 
Q 907+569 T 1476  1475

--------------------------------------------------
Iteration 30
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 122us/sample - loss: 0.1883 - accuracy: 0.9302 - val_loss: 0.2169 - val_accuracy: 0.9171
Q 77+32   T 109   119 
Q 59+5    T 64    55  
Q 3+544   T 547   547 
Q 0+181   T 181   181 
Q 97+526  T 623   623 
Q 612+2   T 614   615 
Q 22+69   T 91    90  
Q 464+62  T 526   526 
Q 2+606   T 608   608 
Q 92+31   T 123   123 

--------------------------------------------------
Iteration 31
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 118us/sample - loss: 0.1889 - accuracy: 0.9294 - val_loss: 0.2154 - val_accuracy: 0.9152
Q 45+109  T 154   154 
Q 8+160   T 168   168 
Q 198+258 T 456   455 
Q 369+362 T 731   730 
Q 880+87  T 967   977 
Q 164+110 T 274   375 
Q 230+561 T 791   791 
Q 704+32  T 736   736 
Q 100+384 T 484   584 
Q 5+855   T 860   869 

--------------------------------------------------
Iteration 32
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 4s 99us/sample - loss: 0.1755 - accuracy: 0.9346 - val_loss: 0.1961 - val_accuracy: 0.9248
Q 30+94   T 124   124 
Q 96+581  T 677   677 
Q 6+729   T 735   735 
Q 29+327  T 356   356 
Q 510+602 T 1112  1113
Q 881+16  T 897   897 
Q 358+6   T 364   364 
Q 251+93  T 344   344 
Q 98+473  T 571   571 
Q 837+34  T 871   871 

--------------------------------------------------
Iteration 33
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 4s 100us/sample - loss: 0.1597 - accuracy: 0.9411 - val_loss: 0.2021 - val_accuracy: 0.9237
Q 3+186   T 189   189 
Q 842+234 T 1076  986 
Q 169+32  T 201   201 
Q 199+751 T 950   940 
Q 4+820   T 824   824 
Q 173+78  T 251   252 
Q 992+95  T 1087  1087
Q 38+20   T 58    58  
Q 117+102 T 219   229 
Q 23+44   T 67    67  

--------------------------------------------------
Iteration 34
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 4s 100us/sample - loss: 0.1622 - accuracy: 0.9409 - val_loss: 0.1928 - val_accuracy: 0.9276
Q 627+1   T 628   628 
Q 36+22   T 58    58  
Q 307+2   T 309   309 
Q 566+93  T 659   659 
Q 53+208  T 261   261 
Q 1+69    T 70    60  
Q 1+485   T 486   486 
Q 425+200 T 625   626 
Q 5+212   T 217   217 
Q 785+254 T 1039  1039

--------------------------------------------------
Iteration 35
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.1604 - accuracy: 0.9407 - val_loss: 0.1869 - val_accuracy: 0.9306
Q 31+358  T 389   389 
Q 423+57  T 480   470 
Q 417+31  T 448   448 
Q 946+28  T 974   974 
Q 22+261  T 283   283 
Q 787+254 T 1041  1042
Q 233+57  T 290   290 
Q 93+20   T 113   113 
Q 75+406  T 481   481 
Q 88+54   T 142   142 

--------------------------------------------------
Iteration 36
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 100us/sample - loss: 0.1406 - accuracy: 0.9494 - val_loss: 0.1691 - val_accuracy: 0.9341
Q 695+900 T 1595  1595
Q 202+144 T 346   345 
Q 140+69  T 209   209 
Q 673+57  T 730   730 
Q 503+32  T 535   535 
Q 48+943  T 991   991 
Q 78+43   T 121   122 
Q 479+58  T 537   537 
Q 860+406 T 1266  1266
Q 857+673 T 1530  1530

--------------------------------------------------
Iteration 37
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 4s 100us/sample - loss: 0.1454 - accuracy: 0.9466 - val_loss: 0.1708 - val_accuracy: 0.9360
Q 62+585  T 647   647 
Q 17+118  T 135   135 
Q 178+71  T 249   249 
Q 88+615  T 703   703 
Q 380+2   T 382   382 
Q 1+586   T 587   587 
Q 8+897   T 905   905 
Q 775+68  T 843   843 
Q 4+923   T 927   927 
Q 421+502 T 923   923 

--------------------------------------------------
Iteration 38
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.1379 - accuracy: 0.9497 - val_loss: 0.1815 - val_accuracy: 0.9316
Q 964+301 T 1265  1264
Q 9+465   T 474   474 
Q 843+43  T 886   887 
Q 375+55  T 430   430 
Q 92+828  T 920   920 
Q 13+0    T 13    12  
Q 99+181  T 280   280 
Q 341+449 T 790   790 
Q 812+35  T 847   847 
Q 501+20  T 521   511 

--------------------------------------------------
Iteration 39
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 101us/sample - loss: 0.1360 - accuracy: 0.9506 - val_loss: 0.1662 - val_accuracy: 0.9395
Q 58+78   T 136   136 
Q 78+707  T 785   785 
Q 369+4   T 373   373 
Q 317+3   T 320   320 
Q 398+18  T 416   416 
Q 369+362 T 731   731 
Q 687+226 T 913   913 
Q 747+784 T 1531  1531
Q 819+60  T 879   879 
Q 25+572  T 597   697 

--------------------------------------------------
Iteration 40
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 100us/sample - loss: 0.1312 - accuracy: 0.9534 - val_loss: 0.1454 - val_accuracy: 0.9470
Q 118+53  T 171   171 
Q 73+71   T 144   154 
Q 624+339 T 963   963 
Q 109+33  T 142   142 
Q 15+955  T 970   970 
Q 31+665  T 696   696 
Q 985+43  T 1028  1028
Q 708+991 T 1699  1799
Q 90+187  T 277   277 
Q 955+9   T 964   974 

--------------------------------------------------
Iteration 41
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 100us/sample - loss: 0.1244 - accuracy: 0.9556 - val_loss: 0.1295 - val_accuracy: 0.9516
Q 72+550  T 622   622 
Q 977+997 T 1974  1874
Q 372+3   T 375   375 
Q 701+415 T 1116  1116
Q 837+677 T 1514  1514
Q 916+68  T 984   984 
Q 288+778 T 1066  1065
Q 274+73  T 347   347 
Q 869+9   T 878   878 
Q 21+888  T 909   909 

--------------------------------------------------
Iteration 42
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 100us/sample - loss: 0.1161 - accuracy: 0.9588 - val_loss: 0.1546 - val_accuracy: 0.9433
Q 96+564  T 660   660 
Q 701+7   T 708   707 
Q 68+570  T 638   638 
Q 835+953 T 1788  1788
Q 3+389   T 392   392 
Q 90+111  T 201   201 
Q 957+728 T 1685  1685
Q 428+41  T 469   469 
Q 670+539 T 1209  1209
Q 15+101  T 116   116 

--------------------------------------------------
Iteration 43
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 4s 100us/sample - loss: 0.1304 - accuracy: 0.9525 - val_loss: 0.1577 - val_accuracy: 0.9416
Q 168+744 T 912   912 
Q 93+31   T 124   124 
Q 893+46  T 939   939 
Q 416+51  T 467   467 
Q 3+944   T 947   947 
Q 696+24  T 720   720 
Q 250+24  T 274   274 
Q 532+610 T 1142  1142
Q 38+329  T 367   367 
Q 18+502  T 520   520 

--------------------------------------------------
Iteration 44
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 107us/sample - loss: 0.1017 - accuracy: 0.9647 - val_loss: 0.1239 - val_accuracy: 0.9543
Q 184+89  T 273   273 
Q 415+850 T 1265  1255
Q 258+920 T 1178  1178
Q 352+52  T 404   404 
Q 912+2   T 914   914 
Q 684+760 T 1444  1444
Q 108+487 T 595   695 
Q 12+652  T 664   664 
Q 387+88  T 475   475 
Q 910+57  T 967   967 

--------------------------------------------------
Iteration 45
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 118us/sample - loss: 0.1101 - accuracy: 0.9606 - val_loss: 0.1779 - val_accuracy: 0.9352
Q 80+871  T 951   951 
Q 25+44   T 69    69  
Q 1+535   T 536   536 
Q 22+69   T 91    91  
Q 205+93  T 298   298 
Q 16+35   T 51    52  
Q 8+756   T 764   764 
Q 19+469  T 488   488 
Q 482+318 T 800   800 
Q 316+24  T 340   340 

--------------------------------------------------
Iteration 46
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 5s 110us/sample - loss: 0.1004 - accuracy: 0.9646 - val_loss: 0.1983 - val_accuracy: 0.9311
Q 559+82  T 641   651 
Q 776+80  T 856   856 
Q 341+52  T 393   393 
Q 273+57  T 330   330 
Q 2+366   T 368   368 
Q 960+395 T 1355  1355
Q 365+13  T 378   378 
Q 2+734   T 736   746 
Q 9+932   T 941   941 
Q 951+583 T 1534  1534

--------------------------------------------------
Iteration 47
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 4s 99us/sample - loss: 0.1153 - accuracy: 0.9586 - val_loss: 0.1560 - val_accuracy: 0.9417
Q 9+119   T 128   128 
Q 91+201  T 292   292 
Q 402+88  T 490   491 
Q 603+88  T 691   691 
Q 24+78   T 102   102 
Q 980+59  T 1039  1039
Q 67+476  T 543   543 
Q 297+402 T 699   699 
Q 22+748  T 770   770 
Q 698+0   T 698   698 

--------------------------------------------------
Iteration 48
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 4s 99us/sample - loss: 0.0984 - accuracy: 0.9653 - val_loss: 0.1244 - val_accuracy: 0.9545
Q 501+589 T 1090  1080
Q 10+638  T 648   648 
Q 435+154 T 589   599 
Q 75+66   T 141   141 
Q 109+84  T 193   193 
Q 90+111  T 201   201 
Q 2+108   T 110   110 
Q 774+76  T 850   850 
Q 498+884 T 1382  1283
Q 641+986 T 1627  1627

--------------------------------------------------
Iteration 49
Train on 45000 samples, validate on 5000 samples
45000/45000 [==============================] - 4s 99us/sample - loss: 0.1059 - accuracy: 0.9632 - val_loss: 0.1153 - val_accuracy: 0.9582
Q 7+61    T 68    68  
Q 1+134   T 135   135 
Q 92+584  T 676   676 
Q 57+58   T 115   115 
Q 2+658   T 660   660 
Q 268+830 T 1098  1098
Q 840+94  T 934   934 
Q 69+160  T 229   229 
Q 62+6    T 68    68  
Q 618+8   T 626   625 
CPU times: user 5min 37s, sys: 30.6 s, total: 6min 8s
Wall time: 3min 54s

In [25]:
import matplotlib.pyplot as plt

plt.ylabel('loss')
plt.xlabel('epoch')
plt.yscale('log')

plt.plot(merged_losses['loss'])
plt.plot(merged_losses['val_loss'])

plt.legend(['loss', 'validation loss'])


Out[25]:
<matplotlib.legend.Legend at 0x7f2c5d49d780>

In [27]:
plt.ylabel('accuracy')
plt.xlabel('epoch')
# plt.yscale('log')

plt.plot(merged_losses['accuracy'])
plt.plot(merged_losses['val_accuracy'])

plt.legend(['accuracy', 'validation accuracy'])


Out[27]:
<matplotlib.legend.Legend at 0x7f2c5d2759e8>

In [0]: