In [1]:
%env CUDA_VISIBLE_DEVICES=9

from tensorflow.python.client import device_lib
print (device_lib.list_local_devices())


env: CUDA_VISIBLE_DEVICES=9
[name: "/cpu:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 13732871715516252033
, name: "/gpu:0"
device_type: "GPU"
memory_limit: 10978911847
locality {
  bus_id: 1
}
incarnation: 10304481651797778960
physical_device_desc: "device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:0f:00.0"
]

In [2]:
# from NLPutils.models.dilatedconv import DilatedConv

In [3]:
# from NLPutils.trainers.dconv_trainer import DConvTrainer

In [4]:
train = '/data/datasets/ner/eng.train'
valid = '/data/datasets/ner/eng.testa'
test = '/data/datasets/ner/eng.testb'
word_embed_loc = '/data/embeddings/GoogleNews-vectors-negative300.bin'
valsplit=0.15

In [5]:
from src.util.conll_util import *
from src.embeddings import Word2VecModel, RandomInitVecModel

maxs, maxw, vocab_ch, vocab_word = conllBuildVocab([train, valid,
                                                    test])

# Vocab LUTs
word_vocab = None
char_vocab = None

word_vec = Word2VecModel(word_embed_loc, vocab_word, 0.25)
word_vocab = word_vec.vocab

# if FLAGS.charsz != FLAGS.wsz and FLAGS.cbow is True:
#     print('Warning, you have opted for CBOW char embeddings, but have provided differing sizes for char embedding depth and word depth.  This is not possible, forcing char embedding depth to be word depth ' + FLAGS.wsz)
#     FLAGS.charsz = FLAGS.wsz

char_vec = RandomInitVecModel(16, vocab_ch, 0.25)
char_vocab = char_vec.vocab

f2i = {"<PAD>":0}

ts, f2i, _ = conllSentsToIndices(train, word_vocab, char_vocab, maxs, maxw, f2i, 3)
print(len(ts))
print('Loaded  training data')

if valid is not None:
    print('Using provided validation data')
    vs, f2i,_ = conllSentsToIndices(valid, word_vocab, char_vocab, maxs, maxw, f2i, 3)
else:
    ts, vs = validSplit(ts, valsplit)
    print('Created validation split')


es, f2i,txts = conllSentsToIndices(test, word_vocab, char_vocab, maxs, maxw, f2i, 3)
print('Loaded test data')


14986
Loaded  training data
Using provided validation data
Loaded test data

In [6]:
print('word embed weights shape: ', word_vec.weights.shape)
print( 'max sentence len', maxs)


word embed weights shape:  (30291, 300)
max sentence len 124

In [7]:
import tensorflow as tf

%load_ext autoreload
%autoreload 2

from src.models.dconv3 import DConv
from src.trainers.dconv3 import DConvTrainer

trainer = DConvTrainer(None, DConv, 'conll-ner-dconv3-9')
trainer.train('ner', ts, f2i, vs, es,
             char_vec,
             word_vec,
             'eval',
              batchsz=2**7,
              optim='adam',
              eta=0.0005,
              epochs=500,
              dropout=0.65,
              patience=50,
              cfiltsz='1,3,5,7',
             maxlen=maxs,
             maxw=maxw,
              num_filt=300,
             num_layers=3,
             num_iterations=2,
             fscore=1,
              viz=1,
             crf=True)

#1 -- 86.55 dropout at .45 and 300 filters. stopped at epoc 24.
#2 -- 86.7 dropout at .35, 350 filters.
#3 -- added dilation 1 conv at end of block. 86.28
#4 -- 3 layers, 3 iters. got much worse. peeked at 91.8 in training, started dropping.
#5 -- layers in block should be 1,2,1 from strubell's code. lowered dropout to .15 -- layers would have been 1,1,1.
#6 -- clipped gradients'
#7 -- 3 layers, 2 iterations.
#8 -- 2**6 batch size, to reflect strubell's config. hit 91.9 in validation, 84.6 in test.
#9 -- upped dropout to .35 from .15. 86.49
#10 -- drop to .65.
#11 -- removed a layer from the block. from 3 (block's dilation should be 1,2,1) to 2, (dilation to 1,1).
#   -- 86.57
#12 -- increasing iterations. blocks have 3 layers again.
#13 -- lowered blocks to 2 layers. decreased eta to .00005. 86.82
#14 -- lowered adam's epsilon from 1e-8 to 1e-6. eta to .0005 from .00005, batch size to 2**7 from 2**6. 86.46

#strubell's dconv3
#1 -- 83.6 F1. wtf is going on.
#2 -- batch size up to 2**7. seems misconfigured with two dropouts..
#3 -- she has middle dropout and hidden dropout in the same place. middle drop is not used, though. i missed this. 87.09
#4  -- added word dropout straight to the word vectors. dropout == .85.  87.54
#   -- I forgot to account for dropout keep in evaluation....
#5 -- setting dropout keep to 1 in evaluation using pkeep and word_keep.  88.12
#6 -- adding in gaussian noise to gradients according to https://arxiv.org/pdf/1511.06807.pdf --
#  -- did flat normal noise, no scaling over t. 88.80. dropout at .65
#7 -- increased noise variance to 0.01 from 0.001. 88.43
#8 -- reverted noise variance to 0.001. changing dropout. my dropout is 1-dropout. it needs to be .15. 88.38
#9 -- dropout back to 0.65. using dropout loss from the paper. in branch refactor.
#  -- creats 11 Sentence level loss functions??


/src/src/trainers/dconv3.py:55: SyntaxWarning: assertion is always true, perhaps remove parentheses?
  assert(length == len(guess[b]), "lengths differ: length-- {}, len(guess[b])-- {} ".format(length, len(guess[b])))
block_sore length 2
block_score_no_dropout length 2
block_score length after anothe fwd 2
(?, 124, 9)
crf=True, creating SLL
(?, 124, 9)
(?,)
(?, 124)
(?, 124, 9)
crf=True, creating SLL
(?, 124, 9)
(?,)
(?, 124)
/opt/conda/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py:91: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
Setting up word embedding visualization
Writing metadata
Training epoch 1.
Train (Loss 0.0722) (99.862 sec)
Validation (F1 = 0.7551) (Acc 49185/51409 = 0.9567) (5.513 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 2.
	(last improvement @ 1)
Train (Loss 0.0400) (89.025 sec)
Validation (F1 = 0.8431) (Acc 50101/51409 = 0.9746) (4.809 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 3.
	(last improvement @ 2)
Train (Loss 0.0305) (89.775 sec)
Validation (F1 = 0.8908) (Acc 50492/51409 = 0.9822) (4.899 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 4.
	(last improvement @ 3)
Train (Loss 0.0263) (90.332 sec)
Validation (F1 = 0.8968) (Acc 50534/51409 = 0.9830) (4.992 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 5.
	(last improvement @ 4)
Train (Loss 0.0237) (90.184 sec)
Validation (F1 = 0.9063) (Acc 50631/51409 = 0.9849) (4.527 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 6.
	(last improvement @ 5)
Train (Loss 0.0216) (89.496 sec)
Validation (F1 = 0.9094) (Acc 50628/51409 = 0.9848) (5.021 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 7.
	(last improvement @ 6)
Train (Loss 0.0202) (91.086 sec)
Validation (F1 = 0.9116) (Acc 50675/51409 = 0.9857) (4.517 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 8.
	(last improvement @ 7)
Train (Loss 0.0189) (90.518 sec)
Validation (F1 = 0.9209) (Acc 50769/51409 = 0.9876) (4.958 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 9.
	(last improvement @ 8)
Train (Loss 0.0178) (90.991 sec)
Validation (F1 = 0.9232) (Acc 50773/51409 = 0.9876) (4.883 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 10.
	(last improvement @ 9)
Train (Loss 0.0169) (90.599 sec)
Validation (F1 = 0.9251) (Acc 50780/51409 = 0.9878) (4.642 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 11.
	(last improvement @ 10)
Train (Loss 0.0161) (90.679 sec)
Validation (F1 = 0.9277) (Acc 50798/51409 = 0.9881) (4.970 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 12.
	(last improvement @ 11)
Train (Loss 0.0153) (90.471 sec)
Validation (F1 = 0.9309) (Acc 50829/51409 = 0.9887) (4.871 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 13.
	(last improvement @ 12)
Train (Loss 0.0147) (91.234 sec)
Validation (F1 = 0.9274) (Acc 50797/51409 = 0.9881) (5.041 sec)
Training epoch 14.
	(last improvement @ 12)
Train (Loss 0.0141) (90.584 sec)
Validation (F1 = 0.9305) (Acc 50819/51409 = 0.9885) (4.645 sec)
Training epoch 15.
	(last improvement @ 12)
Train (Loss 0.0137) (91.175 sec)
Validation (F1 = 0.9329) (Acc 50846/51409 = 0.9890) (4.903 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 16.
	(last improvement @ 15)
Train (Loss 0.0133) (89.460 sec)
Validation (F1 = 0.9319) (Acc 50834/51409 = 0.9888) (4.835 sec)
Training epoch 17.
	(last improvement @ 15)
Train (Loss 0.0128) (90.513 sec)
Validation (F1 = 0.9337) (Acc 50846/51409 = 0.9890) (4.759 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 18.
	(last improvement @ 17)
Train (Loss 0.0126) (91.490 sec)
Validation (F1 = 0.9328) (Acc 50828/51409 = 0.9887) (4.895 sec)
Training epoch 19.
	(last improvement @ 17)
Train (Loss 0.0122) (90.665 sec)
Validation (F1 = 0.9325) (Acc 50814/51409 = 0.9884) (4.883 sec)
Training epoch 20.
	(last improvement @ 17)
Train (Loss 0.0119) (91.280 sec)
Validation (F1 = 0.9326) (Acc 50839/51409 = 0.9889) (4.612 sec)
Training epoch 21.
	(last improvement @ 17)
Train (Loss 0.0117) (91.090 sec)
Validation (F1 = 0.9307) (Acc 50810/51409 = 0.9883) (4.888 sec)
Training epoch 22.
	(last improvement @ 17)
Train (Loss 0.0115) (90.794 sec)
Validation (F1 = 0.9305) (Acc 50795/51409 = 0.9881) (4.851 sec)
Training epoch 23.
	(last improvement @ 17)
Train (Loss 0.0113) (90.925 sec)
Validation (F1 = 0.9324) (Acc 50829/51409 = 0.9887) (4.806 sec)
Training epoch 24.
	(last improvement @ 17)
Train (Loss 0.0110) (91.070 sec)
Validation (F1 = 0.9324) (Acc 50820/51409 = 0.9885) (4.973 sec)
Training epoch 25.
	(last improvement @ 17)
Train (Loss 0.0108) (90.954 sec)
Validation (F1 = 0.9327) (Acc 50830/51409 = 0.9887) (4.987 sec)
Training epoch 26.
	(last improvement @ 17)
Train (Loss 0.0107) (92.099 sec)
Validation (F1 = 0.9352) (Acc 50851/51409 = 0.9891) (4.597 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 27.
	(last improvement @ 26)
Train (Loss 0.0105) (91.735 sec)
Validation (F1 = 0.9338) (Acc 50841/51409 = 0.9890) (4.686 sec)
Training epoch 28.
	(last improvement @ 26)
Train (Loss 0.0103) (91.562 sec)
Validation (F1 = 0.9337) (Acc 50833/51409 = 0.9888) (4.753 sec)
Training epoch 29.
	(last improvement @ 26)
Train (Loss 0.0102) (91.063 sec)
Validation (F1 = 0.9332) (Acc 50821/51409 = 0.9886) (4.588 sec)
Training epoch 30.
	(last improvement @ 26)
Train (Loss 0.0100) (91.379 sec)
Validation (F1 = 0.9310) (Acc 50809/51409 = 0.9883) (4.788 sec)
Training epoch 31.
	(last improvement @ 26)
Train (Loss 0.0099) (91.141 sec)
Validation (F1 = 0.9335) (Acc 50822/51409 = 0.9886) (4.965 sec)
Training epoch 32.
	(last improvement @ 26)
Train (Loss 0.0099) (91.771 sec)
Validation (F1 = 0.9318) (Acc 50824/51409 = 0.9886) (4.547 sec)
Training epoch 33.
	(last improvement @ 26)
Train (Loss 0.0098) (90.474 sec)
Validation (F1 = 0.9330) (Acc 50829/51409 = 0.9887) (4.817 sec)
Training epoch 34.
	(last improvement @ 26)
Train (Loss 0.0097) (89.284 sec)
Validation (F1 = 0.9332) (Acc 50805/51409 = 0.9883) (5.104 sec)
Training epoch 35.
	(last improvement @ 26)
Train (Loss 0.0095) (89.197 sec)
Validation (F1 = 0.9322) (Acc 50825/51409 = 0.9886) (4.750 sec)
Training epoch 36.
	(last improvement @ 26)
Train (Loss 0.0095) (88.943 sec)
Validation (F1 = 0.9323) (Acc 50809/51409 = 0.9883) (4.737 sec)
Training epoch 37.
	(last improvement @ 26)
Train (Loss 0.0093) (88.279 sec)
Validation (F1 = 0.9359) (Acc 50851/51409 = 0.9891) (4.037 sec)
Highest dev F1 achieved yet -- writing model
Test (F1 = 0.8977) (Acc 45705/46665 = 0.9794) (351.653 sec)
Training epoch 38.
	(last improvement @ 37)
Train (Loss 0.0092) (88.395 sec)
Validation (F1 = 0.9341) (Acc 50843/51409 = 0.9890) (4.792 sec)
Training epoch 39.
	(last improvement @ 37)
Train (Loss 0.0092) (89.513 sec)
Validation (F1 = 0.9373) (Acc 50853/51409 = 0.9892) (5.024 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 40.
	(last improvement @ 39)
Train (Loss 0.0092) (88.935 sec)
Validation (F1 = 0.9341) (Acc 50830/51409 = 0.9887) (5.024 sec)
Training epoch 41.
	(last improvement @ 39)
Train (Loss 0.0090) (88.805 sec)
Validation (F1 = 0.9352) (Acc 50850/51409 = 0.9891) (5.045 sec)
Training epoch 42.
	(last improvement @ 39)
Train (Loss 0.0090) (89.286 sec)
Validation (F1 = 0.9330) (Acc 50828/51409 = 0.9887) (4.785 sec)
Training epoch 43.
	(last improvement @ 39)
Train (Loss 0.0089) (89.653 sec)
Validation (F1 = 0.9328) (Acc 50811/51409 = 0.9884) (4.982 sec)
Training epoch 44.
	(last improvement @ 39)
Train (Loss 0.0088) (87.980 sec)
Validation (F1 = 0.9355) (Acc 50818/51409 = 0.9885) (4.856 sec)
Training epoch 45.
	(last improvement @ 39)
Train (Loss 0.0088) (88.028 sec)
Validation (F1 = 0.9347) (Acc 50818/51409 = 0.9885) (4.969 sec)
Training epoch 46.
	(last improvement @ 39)
Train (Loss 0.0088) (89.805 sec)
Validation (F1 = 0.9323) (Acc 50832/51409 = 0.9888) (4.606 sec)
Training epoch 47.
	(last improvement @ 39)
Train (Loss 0.0087) (88.364 sec)
Validation (F1 = 0.9349) (Acc 50826/51409 = 0.9887) (4.897 sec)
Training epoch 48.
	(last improvement @ 39)
Train (Loss 0.0087) (90.842 sec)
Validation (F1 = 0.9337) (Acc 50816/51409 = 0.9885) (5.122 sec)
Training epoch 49.
	(last improvement @ 39)
Train (Loss 0.0086) (87.390 sec)
Validation (F1 = 0.9338) (Acc 50818/51409 = 0.9885) (4.515 sec)
Training epoch 50.
	(last improvement @ 39)
Train (Loss 0.0085) (88.643 sec)
Validation (F1 = 0.9333) (Acc 50824/51409 = 0.9886) (4.790 sec)
Training epoch 51.
	(last improvement @ 39)
Train (Loss 0.0085) (88.652 sec)
Validation (F1 = 0.9329) (Acc 50830/51409 = 0.9887) (4.882 sec)
Training epoch 52.
	(last improvement @ 39)
Train (Loss 0.0085) (87.943 sec)
Validation (F1 = 0.9312) (Acc 50816/51409 = 0.9885) (4.955 sec)
Training epoch 53.
	(last improvement @ 39)
Train (Loss 0.0085) (89.119 sec)
Validation (F1 = 0.9327) (Acc 50823/51409 = 0.9886) (4.944 sec)
Training epoch 54.
	(last improvement @ 39)
Train (Loss 0.0084) (88.968 sec)
Validation (F1 = 0.9311) (Acc 50811/51409 = 0.9884) (4.216 sec)
Training epoch 55.
	(last improvement @ 39)
Train (Loss 0.0084) (87.817 sec)
Validation (F1 = 0.9344) (Acc 50840/51409 = 0.9889) (4.915 sec)
Training epoch 56.
	(last improvement @ 39)
Train (Loss 0.0083) (89.729 sec)
Validation (F1 = 0.9354) (Acc 50854/51409 = 0.9892) (5.203 sec)
Training epoch 57.
	(last improvement @ 39)
Train (Loss 0.0084) (88.428 sec)
Validation (F1 = 0.9337) (Acc 50841/51409 = 0.9890) (5.112 sec)
Training epoch 58.
	(last improvement @ 39)
Train (Loss 0.0083) (89.788 sec)
Validation (F1 = 0.9335) (Acc 50833/51409 = 0.9888) (4.663 sec)
Training epoch 59.
	(last improvement @ 39)
Train (Loss 0.0082) (89.238 sec)
Validation (F1 = 0.9362) (Acc 50841/51409 = 0.9890) (5.024 sec)
Training epoch 60.
	(last improvement @ 39)
Train (Loss 0.0081) (89.696 sec)
Validation (F1 = 0.9332) (Acc 50840/51409 = 0.9889) (4.851 sec)
Training epoch 61.
	(last improvement @ 39)
Train (Loss 0.0081) (89.579 sec)
Validation (F1 = 0.9355) (Acc 50862/51409 = 0.9894) (4.955 sec)
Training epoch 62.
	(last improvement @ 39)
Train (Loss 0.0081) (88.635 sec)
Validation (F1 = 0.9347) (Acc 50847/51409 = 0.9891) (5.136 sec)
Training epoch 63.
	(last improvement @ 39)
Train (Loss 0.0081) (89.451 sec)
Validation (F1 = 0.9342) (Acc 50840/51409 = 0.9889) (4.554 sec)
Training epoch 64.
	(last improvement @ 39)
Train (Loss 0.0081) (89.634 sec)
Validation (F1 = 0.9328) (Acc 50834/51409 = 0.9888) (4.564 sec)
Training epoch 65.
	(last improvement @ 39)
Train (Loss 0.0080) (90.765 sec)
Validation (F1 = 0.9335) (Acc 50839/51409 = 0.9889) (4.932 sec)
Training epoch 66.
	(last improvement @ 39)
Train (Loss 0.0080) (89.560 sec)
Validation (F1 = 0.9361) (Acc 50864/51409 = 0.9894) (5.054 sec)
Training epoch 67.
	(last improvement @ 39)
Train (Loss 0.0080) (89.759 sec)
Validation (F1 = 0.9342) (Acc 50842/51409 = 0.9890) (5.075 sec)
Training epoch 68.
	(last improvement @ 39)
Train (Loss 0.0080) (91.389 sec)
Validation (F1 = 0.9350) (Acc 50843/51409 = 0.9890) (4.689 sec)
Training epoch 69.
	(last improvement @ 39)
Train (Loss 0.0080) (90.909 sec)
Validation (F1 = 0.9360) (Acc 50864/51409 = 0.9894) (5.090 sec)
Training epoch 70.
	(last improvement @ 39)
Train (Loss 0.0079) (89.446 sec)
Validation (F1 = 0.9305) (Acc 50822/51409 = 0.9886) (4.726 sec)
Training epoch 71.
	(last improvement @ 39)
Train (Loss 0.0079) (88.760 sec)
Validation (F1 = 0.9316) (Acc 50817/51409 = 0.9885) (4.933 sec)
Training epoch 72.
	(last improvement @ 39)
Train (Loss 0.0079) (89.003 sec)
Validation (F1 = 0.9332) (Acc 50828/51409 = 0.9887) (5.026 sec)
Training epoch 73.
	(last improvement @ 39)
Train (Loss 0.0078) (89.149 sec)
Validation (F1 = 0.9347) (Acc 50838/51409 = 0.9889) (5.039 sec)
Training epoch 74.
	(last improvement @ 39)
Train (Loss 0.0079) (90.693 sec)
Validation (F1 = 0.9346) (Acc 50832/51409 = 0.9888) (4.708 sec)
Training epoch 75.
	(last improvement @ 39)
Train (Loss 0.0078) (90.460 sec)
Validation (F1 = 0.9343) (Acc 50841/51409 = 0.9890) (4.887 sec)
Training epoch 76.
	(last improvement @ 39)
Train (Loss 0.0078) (90.264 sec)
Validation (F1 = 0.9336) (Acc 50835/51409 = 0.9888) (4.778 sec)
Training epoch 77.
	(last improvement @ 39)
Train (Loss 0.0078) (89.794 sec)
Validation (F1 = 0.9348) (Acc 50842/51409 = 0.9890) (5.326 sec)
Training epoch 78.
	(last improvement @ 39)
Train (Loss 0.0078) (89.021 sec)
Validation (F1 = 0.9322) (Acc 50832/51409 = 0.9888) (5.168 sec)
Training epoch 79.
	(last improvement @ 39)
Train (Loss 0.0078) (91.300 sec)
Validation (F1 = 0.9377) (Acc 50868/51409 = 0.9895) (4.164 sec)
Highest dev F1 achieved yet -- writing model
Test (F1 = 0.8964) (Acc 45695/46665 = 0.9792) (355.695 sec)
Training epoch 80.
	(last improvement @ 79)
Train (Loss 0.0077) (90.054 sec)
Validation (F1 = 0.9345) (Acc 50860/51409 = 0.9893) (4.899 sec)
Training epoch 81.
	(last improvement @ 79)
Train (Loss 0.0076) (89.371 sec)
Validation (F1 = 0.9346) (Acc 50832/51409 = 0.9888) (5.144 sec)
Training epoch 82.
	(last improvement @ 79)
Train (Loss 0.0077) (89.390 sec)
Validation (F1 = 0.9327) (Acc 50823/51409 = 0.9886) (4.847 sec)
Training epoch 83.
	(last improvement @ 79)
Train (Loss 0.0076) (90.274 sec)
Validation (F1 = 0.9360) (Acc 50851/51409 = 0.9891) (4.941 sec)
Training epoch 84.
	(last improvement @ 79)
Train (Loss 0.0077) (89.343 sec)
Validation (F1 = 0.9364) (Acc 50852/51409 = 0.9892) (4.831 sec)
Training epoch 85.
	(last improvement @ 79)
Train (Loss 0.0076) (90.208 sec)
Validation (F1 = 0.9332) (Acc 50817/51409 = 0.9885) (4.936 sec)
Training epoch 86.
	(last improvement @ 79)
Train (Loss 0.0076) (89.856 sec)
Validation (F1 = 0.9354) (Acc 50844/51409 = 0.9890) (4.736 sec)
Training epoch 87.
	(last improvement @ 79)
Train (Loss 0.0076) (90.307 sec)
Validation (F1 = 0.9335) (Acc 50828/51409 = 0.9887) (5.003 sec)
Training epoch 88.
	(last improvement @ 79)
Train (Loss 0.0076) (89.910 sec)
Validation (F1 = 0.9328) (Acc 50837/51409 = 0.9889) (4.925 sec)
Training epoch 89.
	(last improvement @ 79)
Train (Loss 0.0076) (90.554 sec)
Validation (F1 = 0.9339) (Acc 50841/51409 = 0.9890) (4.868 sec)
Training epoch 90.
	(last improvement @ 79)
Train (Loss 0.0075) (90.204 sec)
Validation (F1 = 0.9322) (Acc 50834/51409 = 0.9888) (4.819 sec)
Training epoch 91.
	(last improvement @ 79)
Train (Loss 0.0075) (89.428 sec)
Validation (F1 = 0.9339) (Acc 50834/51409 = 0.9888) (5.037 sec)
Training epoch 92.
	(last improvement @ 79)
Train (Loss 0.0076) (89.896 sec)
Validation (F1 = 0.9347) (Acc 50843/51409 = 0.9890) (4.807 sec)
Training epoch 93.
	(last improvement @ 79)
Train (Loss 0.0075) (89.643 sec)
Validation (F1 = 0.9327) (Acc 50832/51409 = 0.9888) (4.949 sec)
Training epoch 94.
	(last improvement @ 79)
Train (Loss 0.0075) (89.018 sec)
Validation (F1 = 0.9339) (Acc 50840/51409 = 0.9889) (5.099 sec)
Training epoch 95.
	(last improvement @ 79)
Train (Loss 0.0075) (90.102 sec)
Validation (F1 = 0.9321) (Acc 50827/51409 = 0.9887) (4.747 sec)
Training epoch 96.
	(last improvement @ 79)
Train (Loss 0.0075) (91.240 sec)
Validation (F1 = 0.9304) (Acc 50811/51409 = 0.9884) (4.973 sec)
Training epoch 97.
	(last improvement @ 79)
Train (Loss 0.0075) (90.058 sec)
Validation (F1 = 0.9328) (Acc 50832/51409 = 0.9888) (5.265 sec)
Training epoch 98.
	(last improvement @ 79)
Train (Loss 0.0075) (90.113 sec)
Validation (F1 = 0.9319) (Acc 50821/51409 = 0.9886) (4.362 sec)
Training epoch 99.
	(last improvement @ 79)
Train (Loss 0.0075) (90.084 sec)
Validation (F1 = 0.9319) (Acc 50819/51409 = 0.9885) (5.031 sec)
Training epoch 100.
	(last improvement @ 79)
Train (Loss 0.0075) (89.217 sec)
Validation (F1 = 0.9304) (Acc 50803/51409 = 0.9882) (5.010 sec)
Training epoch 101.
	(last improvement @ 79)
Train (Loss 0.0075) (90.330 sec)
Validation (F1 = 0.9290) (Acc 50798/51409 = 0.9881) (5.074 sec)
Training epoch 102.
	(last improvement @ 79)
Train (Loss 0.0075) (90.227 sec)
Validation (F1 = 0.9317) (Acc 50819/51409 = 0.9885) (5.236 sec)
Training epoch 103.
	(last improvement @ 79)
Train (Loss 0.0074) (89.721 sec)
Validation (F1 = 0.9312) (Acc 50826/51409 = 0.9887) (5.281 sec)
Training epoch 104.
	(last improvement @ 79)
Train (Loss 0.0074) (91.153 sec)
Validation (F1 = 0.9314) (Acc 50812/51409 = 0.9884) (5.124 sec)
Training epoch 105.
	(last improvement @ 79)
Train (Loss 0.0074) (89.793 sec)
Validation (F1 = 0.9286) (Acc 50802/51409 = 0.9882) (5.077 sec)
Training epoch 106.
	(last improvement @ 79)
Train (Loss 0.0075) (90.762 sec)
Validation (F1 = 0.9287) (Acc 50804/51409 = 0.9882) (4.505 sec)
Training epoch 107.
	(last improvement @ 79)
Train (Loss 0.0074) (89.548 sec)
Validation (F1 = 0.9313) (Acc 50810/51409 = 0.9883) (4.753 sec)
Training epoch 108.
	(last improvement @ 79)
Train (Loss 0.0074) (89.375 sec)
Validation (F1 = 0.9319) (Acc 50827/51409 = 0.9887) (4.869 sec)
Training epoch 109.
	(last improvement @ 79)
Train (Loss 0.0075) (89.851 sec)
Validation (F1 = 0.9352) (Acc 50843/51409 = 0.9890) (5.054 sec)
Training epoch 110.
	(last improvement @ 79)
Train (Loss 0.0074) (88.713 sec)
Validation (F1 = 0.9322) (Acc 50838/51409 = 0.9889) (5.117 sec)
Training epoch 111.
	(last improvement @ 79)
Train (Loss 0.0074) (89.475 sec)
Validation (F1 = 0.9315) (Acc 50825/51409 = 0.9886) (4.469 sec)
Training epoch 112.
	(last improvement @ 79)
Train (Loss 0.0073) (89.173 sec)
Validation (F1 = 0.9332) (Acc 50825/51409 = 0.9886) (5.095 sec)
Training epoch 113.
	(last improvement @ 79)
Train (Loss 0.0073) (89.283 sec)
Validation (F1 = 0.9322) (Acc 50823/51409 = 0.9886) (5.020 sec)
Training epoch 114.
	(last improvement @ 79)
Train (Loss 0.0073) (89.298 sec)
Validation (F1 = 0.9330) (Acc 50828/51409 = 0.9887) (4.670 sec)
Training epoch 115.
	(last improvement @ 79)
Train (Loss 0.0074) (90.270 sec)
Validation (F1 = 0.9340) (Acc 50823/51409 = 0.9886) (5.036 sec)
Training epoch 116.
	(last improvement @ 79)
Train (Loss 0.0073) (88.759 sec)
Validation (F1 = 0.9327) (Acc 50828/51409 = 0.9887) (5.237 sec)
Training epoch 117.
	(last improvement @ 79)
Train (Loss 0.0073) (90.406 sec)
Validation (F1 = 0.9351) (Acc 50849/51409 = 0.9891) (4.715 sec)
Training epoch 118.
	(last improvement @ 79)
Train (Loss 0.0073) (89.943 sec)
Validation (F1 = 0.9313) (Acc 50830/51409 = 0.9887) (4.554 sec)
Training epoch 119.
	(last improvement @ 79)
Train (Loss 0.0073) (89.124 sec)
Validation (F1 = 0.9347) (Acc 50821/51409 = 0.9886) (5.071 sec)
Training epoch 120.
	(last improvement @ 79)
Train (Loss 0.0073) (89.259 sec)
Validation (F1 = 0.9320) (Acc 50816/51409 = 0.9885) (5.157 sec)
Training epoch 121.
	(last improvement @ 79)
Train (Loss 0.0074) (90.557 sec)
Validation (F1 = 0.9321) (Acc 50826/51409 = 0.9887) (4.935 sec)
Training epoch 122.
	(last improvement @ 79)
Train (Loss 0.0073) (90.572 sec)
Validation (F1 = 0.9330) (Acc 50833/51409 = 0.9888) (4.627 sec)
Training epoch 123.
	(last improvement @ 79)
Train (Loss 0.0073) (89.682 sec)
Validation (F1 = 0.9326) (Acc 50820/51409 = 0.9885) (4.992 sec)
Training epoch 124.
	(last improvement @ 79)
Train (Loss 0.0073) (89.880 sec)
Validation (F1 = 0.9321) (Acc 50831/51409 = 0.9888) (4.715 sec)
Training epoch 125.
	(last improvement @ 79)
Train (Loss 0.0073) (88.889 sec)
Validation (F1 = 0.9317) (Acc 50818/51409 = 0.9885) (4.625 sec)
Training epoch 126.
	(last improvement @ 79)
Train (Loss 0.0073) (89.638 sec)
Validation (F1 = 0.9315) (Acc 50819/51409 = 0.9885) (4.788 sec)
Training epoch 127.
	(last improvement @ 79)
Train (Loss 0.0073) (89.205 sec)
Validation (F1 = 0.9305) (Acc 50803/51409 = 0.9882) (4.343 sec)
Training epoch 128.
	(last improvement @ 79)
Train (Loss 0.0073) (90.446 sec)
Validation (F1 = 0.9303) (Acc 50803/51409 = 0.9882) (5.038 sec)
Training epoch 129.
	(last improvement @ 79)
Train (Loss 0.0073) (90.311 sec)
Validation (F1 = 0.9270) (Acc 50779/51409 = 0.9877) (4.814 sec)
Training epoch 130.
	(last improvement @ 79)
Train (Loss 0.0073) (89.781 sec)
Validation (F1 = 0.9280) (Acc 50792/51409 = 0.9880) (4.936 sec)
Stopping due to persistent failures to improve
-----------------------------------------------------
Highest dev F1 93.77
=====================================================
Evaluating best model on test data
=====================================================
Reloading conll-ner-dconv3-9/train/ner-9243
Test (F1 = 0.8964) (Acc 45695/46665 = 0.9792) (346.195 sec)
-----------------------------------------------------
Test acc 97.92
Test F1 89.64
=====================================================
Out[7]:
<src.models.dconv3.DConv at 0x7fcfa43bfb70>