In [1]:
# from NLPutils.models.dilatedconv import DilatedConv
from NLPutils.models.dconv3 import DConv

In [2]:
# from NLPutils.trainers.dconv_trainer import DConvTrainer
from NLPutils.trainers.dconv2 import DConvTrainer

In [3]:
train = '/data/ner/conll/eng.train'
valid = '/data/ner/conll/eng.testa'
test = '/data/ner/conll/eng.testb'
word_embed_loc = '/data/embeddings/GoogleNews-vectors-negative300.bin'
# word_embed_loc = '/data/embeddings/glove.6B/glove.6B.300d.txt'
valsplit=0.15

In [4]:
from NLPutils.util.conll_util import *
from NLPutils.embeddings import Word2VecModel, RandomInitVecModel, GloVeModel

maxs, maxw, vocab_ch, vocab_word = conllBuildVocab([train, valid,
                                                    test])

# Vocab LUTs
word_vocab = None
char_vocab = None

word_vec = Word2VecModel(word_embed_loc, vocab_word, 0.25)
# word_vec = GloVeModel(word_embed_loc, vocab_word, 0.25)
word_vocab = word_vec.vocab

# if FLAGS.charsz != FLAGS.wsz and FLAGS.cbow is True:
#     print('Warning, you have opted for CBOW char embeddings, but have provided differing sizes for char embedding depth and word depth.  This is not possible, forcing char embedding depth to be word depth ' + FLAGS.wsz)
#     FLAGS.charsz = FLAGS.wsz

char_vec = RandomInitVecModel(16, vocab_ch, 0.25)
char_vocab = char_vec.vocab

f2i = {"<PAD>":0}

ts, f2i, _ = conllSentsToIndices(train, word_vocab, char_vocab, maxs, maxw, f2i, 3)
print(len(ts))
print('Loaded  training data')

if valid is not None:
    print('Using provided validation data')
    vs, f2i,_ = conllSentsToIndices(valid, word_vocab, char_vocab, maxs, maxw, f2i, 3)
else:
    ts, vs = validSplit(ts, valsplit)
    print('Created validation split')


es, f2i,txts = conllSentsToIndices(test, word_vocab, char_vocab, maxs, maxw, f2i, 3)
print('Loaded test data')


14986
Loaded  training data
Using provided validation data
Loaded test data

In [5]:
print('word embed weights shape: ', word_vec.weights.shape)
print( 'max sentence len', maxs)
print('f2i length', len(f2i))


word embed weights shape:  (30291, 300)
max sentence len 124
f2i length 9

In [7]:
import tensorflow as tf

trainer = DConvTrainer(None, DConv, 'conll-ner-dconv-strubell-5')
trainer.train('ner', ts, f2i, vs, es,
             char_vec,
             word_vec,
             'eval',
              batchsz=2**5,
              optim='adam',
              eta=0.0005,
              epochs=500,
              dropout=0.15,
              patience=50,
              cfiltsz='1,3,5,7',
             maxlen=maxs,
             maxw=maxw,
              num_filt=300,
             num_layers=3,
             num_iterations=3,
             fscore=1,
              viz=1,
             crf=True)

#1 -- 86.55 dropout at .45 and 300 filters. stopped at epoc 24.
#2 -- 86.7 dropout at .35, 350 filters.
#3 -- added dilation 1 conv at end of block. 86.28
#4 -- 3 layers, 3 iters. got much worse. peeked at 91.8 in training, started dropping.
#5 -- layers in block should be 1,2,1 from strubell's code. lowered dropout to .15 -- layers would have been 1,1,1.
#6 -- clipped gradients'
#7 -- 3 layers, 2 iterations.

#strubell-1 -- just trying it. only hits 85.5
#strubell-2 -- glove embeddings -- holy shit. only hits 86 IN VALIDATION
#strubell-3 -- glove embeddings hwy embeddings. hits almost 86.7 soo a little better?
#strubell-4 -- trying word2vec again. 86.3
#strubell-5 -- num_iter to 3


input_feats Tensor("ner/ExpandDims:0", shape=(?, 1, 124, 420), dtype=float32)
filter_shape [1, 3, Dimension(420), 300]
h0 Tensor("ner/relu:0", shape=(?, 1, 124, 300), dtype=float32)
last_output Tensor("ner/concat_1:0", shape=(?, 1, 124, 300), dtype=float32)
block output Tensor("ner/block/iterated-block/relu_2:0", shape=(?, ?, ?, 300), dtype=float32)
h_concat_squeeze Tensor("ner/block/Squeeze:0", shape=(?, ?, 300), dtype=float32)
h_concat_flat Tensor("ner/block/Reshape:0", shape=(?, 300), dtype=float32)
input_to_pred Tensor("ner/block/hidden_dropout/dropout/mul:0", shape=(?, 300), dtype=float32)
proj_width 300
scores Tensor("ner/block/output/scores:0", shape=(?, 9), dtype=float32)
unflat_scores Tensor("ner/block/output/Reshape:0", shape=(?, 124, 9), dtype=float32)
block output Tensor("ner/block_1/iterated-block/relu_2:0", shape=(?, ?, ?, 300), dtype=float32)
h_concat_squeeze Tensor("ner/block_1/Squeeze:0", shape=(?, ?, 300), dtype=float32)
h_concat_flat Tensor("ner/block_1/Reshape:0", shape=(?, 300), dtype=float32)
input_to_pred Tensor("ner/block_1/hidden_dropout/dropout/mul:0", shape=(?, 300), dtype=float32)
proj_width 300
scores Tensor("ner/block_1/output/scores:0", shape=(?, 9), dtype=float32)
unflat_scores Tensor("ner/block_1/output/Reshape:0", shape=(?, 124, 9), dtype=float32)
block output Tensor("ner/block_2/iterated-block/relu_2:0", shape=(?, ?, ?, 300), dtype=float32)
h_concat_squeeze Tensor("ner/block_2/Squeeze:0", shape=(?, ?, 300), dtype=float32)
h_concat_flat Tensor("ner/block_2/Reshape:0", shape=(?, 300), dtype=float32)
input_to_pred Tensor("ner/block_2/hidden_dropout/dropout/mul:0", shape=(?, 300), dtype=float32)
proj_width 300
scores Tensor("ner/block_2/output/scores:0", shape=(?, 9), dtype=float32)
unflat_scores Tensor("ner/block_2/output/Reshape:0", shape=(?, 124, 9), dtype=float32)
crf=True, creating SLL
crf=True, creating SLL
crf=True, creating SLL
/opt/conda/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py:91: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
Setting up word embedding visualization
Writing metadata
Training epoch 1.
Train (Loss 0.0780) (185.126 sec)
Validation (F1 = 0.8433) (Acc 50067/51409 = 0.9739) (9.690 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 2.
	(last improvement @ 1)
Train (Loss 0.0275) (184.544 sec)
Validation (F1 = 0.8854) (Acc 50366/51409 = 0.9797) (9.479 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 3.
	(last improvement @ 2)
Train (Loss 0.0170) (184.135 sec)
Validation (F1 = 0.8940) (Acc 50455/51409 = 0.9814) (9.470 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 4.
	(last improvement @ 3)
Train (Loss 0.0119) (184.240 sec)
Validation (F1 = 0.8693) (Acc 50256/51409 = 0.9776) (9.480 sec)
Training epoch 5.
	(last improvement @ 3)
Train (Loss 0.0081) (184.122 sec)
Validation (F1 = 0.8978) (Acc 50488/51409 = 0.9821) (9.470 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 6.
	(last improvement @ 5)
Train (Loss 0.0059) (184.112 sec)
Validation (F1 = 0.9000) (Acc 50533/51409 = 0.9830) (9.492 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 7.
	(last improvement @ 6)
Train (Loss 0.0051) (184.330 sec)
Validation (F1 = 0.8969) (Acc 50537/51409 = 0.9830) (9.472 sec)
Training epoch 8.
	(last improvement @ 6)
Train (Loss 0.0046) (184.252 sec)
Validation (F1 = 0.9058) (Acc 50581/51409 = 0.9839) (9.475 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 9.
	(last improvement @ 8)
Train (Loss 0.0040) (184.411 sec)
Validation (F1 = 0.9096) (Acc 50581/51409 = 0.9839) (9.477 sec)
Highest dev F1 achieved yet -- writing model
Training epoch 10.
	(last improvement @ 9)
Train (Loss 0.0038) (184.194 sec)
Validation (F1 = 0.9026) (Acc 50548/51409 = 0.9833) (9.478 sec)
Training epoch 11.
	(last improvement @ 9)
Train (Loss 0.0032) (184.339 sec)
Validation (F1 = 0.8985) (Acc 50504/51409 = 0.9824) (9.472 sec)
Training epoch 12.
	(last improvement @ 9)
Train (Loss 0.0035) (184.376 sec)
Validation (F1 = 0.9031) (Acc 50584/51409 = 0.9840) (9.473 sec)
Training epoch 13.
	(last improvement @ 9)
Train (Loss 0.0029) (184.432 sec)
Validation (F1 = 0.8989) (Acc 50514/51409 = 0.9826) (9.472 sec)
Training epoch 14.
	(last improvement @ 9)
Train (Loss 0.0032) (184.203 sec)
Validation (F1 = 0.8965) (Acc 50472/51409 = 0.9818) (9.491 sec)
Training epoch 15.
	(last improvement @ 9)
Train (Loss 0.0780) (184.150 sec)
Validation (F1 = 0.8992) (Acc 50479/51409 = 0.9819) (9.472 sec)
Training epoch 16.
	(last improvement @ 9)
Train (Loss 0.0025) (184.176 sec)
Validation (F1 = 0.8931) (Acc 50515/51409 = 0.9826) (9.477 sec)
Training epoch 17.
	(last improvement @ 9)
Train (Loss 0.0027) (184.025 sec)
Validation (F1 = 0.9027) (Acc 50527/51409 = 0.9828) (9.472 sec)
Training epoch 18.
	(last improvement @ 9)
Train (Loss 0.0029) (184.216 sec)
Validation (F1 = 0.8594) (Acc 50123/51409 = 0.9750) (9.474 sec)
Training epoch 19.
	(last improvement @ 9)
Train (Loss 0.0028) (184.371 sec)
Validation (F1 = 0.8901) (Acc 50416/51409 = 0.9807) (9.474 sec)
Training epoch 20.
	(last improvement @ 9)
Train (Loss 0.0033) (184.063 sec)
Validation (F1 = 0.8962) (Acc 50505/51409 = 0.9824) (9.475 sec)
Training epoch 21.
	(last improvement @ 9)
Train (Loss 0.0024) (184.082 sec)
Validation (F1 = 0.8653) (Acc 50216/51409 = 0.9768) (9.471 sec)
Training epoch 22.
	(last improvement @ 9)
Train (Loss 0.0022) (184.306 sec)
Validation (F1 = 0.8859) (Acc 50439/51409 = 0.9811) (9.470 sec)
Training epoch 23.
	(last improvement @ 9)
Train (Loss 0.0030) (184.135 sec)
Validation (F1 = 0.8923) (Acc 50440/51409 = 0.9812) (9.469 sec)
Training epoch 24.
	(last improvement @ 9)
Train (Loss 0.0023) (184.273 sec)
Validation (F1 = 0.8917) (Acc 50466/51409 = 0.9817) (9.475 sec)
Training epoch 25.
	(last improvement @ 9)
Train (Loss 0.0025) (184.249 sec)
Validation (F1 = 0.8919) (Acc 50464/51409 = 0.9816) (9.472 sec)
Training epoch 26.
	(last improvement @ 9)
Train (Loss 0.0024) (184.290 sec)
Validation (F1 = 0.8948) (Acc 50416/51409 = 0.9807) (9.473 sec)
Training epoch 27.
	(last improvement @ 9)
Train (Loss 0.0025) (184.221 sec)
Validation (F1 = 0.8929) (Acc 50415/51409 = 0.9807) (9.469 sec)
Training epoch 28.
	(last improvement @ 9)
Train (Loss 0.0019) (184.333 sec)
Validation (F1 = 0.8845) (Acc 50396/51409 = 0.9803) (9.473 sec)
Training epoch 29.
	(last improvement @ 9)
Train (Loss 0.0022) (184.331 sec)
Validation (F1 = 0.8818) (Acc 50313/51409 = 0.9787) (9.475 sec)
Training epoch 30.
	(last improvement @ 9)
Train (Loss 0.0026) (184.331 sec)
Validation (F1 = 0.8872) (Acc 50421/51409 = 0.9808) (9.468 sec)
Training epoch 31.
	(last improvement @ 9)
Train (Loss 0.0019) (184.306 sec)
Validation (F1 = 0.9050) (Acc 50547/51409 = 0.9832) (9.469 sec)
Training epoch 32.
	(last improvement @ 9)
Train (Loss 0.0031) (184.297 sec)
Validation (F1 = 0.8868) (Acc 50423/51409 = 0.9808) (9.475 sec)
Training epoch 33.
	(last improvement @ 9)
Train (Loss 0.0022) (184.284 sec)
Validation (F1 = 0.9008) (Acc 50517/51409 = 0.9826) (9.471 sec)
Training epoch 34.
	(last improvement @ 9)
Train (Loss 0.0025) (184.318 sec)
Validation (F1 = 0.8946) (Acc 50480/51409 = 0.9819) (9.471 sec)
Training epoch 35.
	(last improvement @ 9)
Train (Loss 0.0037) (184.190 sec)
Validation (F1 = 0.9012) (Acc 50536/51409 = 0.9830) (9.487 sec)
Training epoch 36.
	(last improvement @ 9)
Train (Loss 0.0014) (184.390 sec)
Validation (F1 = 0.8946) (Acc 50469/51409 = 0.9817) (9.471 sec)
Training epoch 37.
	(last improvement @ 9)
Train (Loss 0.0052) (184.374 sec)
Validation (F1 = 0.9002) (Acc 50518/51409 = 0.9827) (9.472 sec)
Training epoch 38.
	(last improvement @ 9)
Train (Loss 0.0016) (184.272 sec)
Validation (F1 = 0.9032) (Acc 50506/51409 = 0.9824) (9.469 sec)
Training epoch 39.
	(last improvement @ 9)
Train (Loss 0.0025) (184.458 sec)
Validation (F1 = 0.8953) (Acc 50509/51409 = 0.9825) (9.470 sec)
Training epoch 40.
	(last improvement @ 9)
Train (Loss 0.0023) (184.141 sec)
Validation (F1 = 0.8820) (Acc 50382/51409 = 0.9800) (9.467 sec)
Training epoch 41.
	(last improvement @ 9)
Train (Loss 0.0023) (184.303 sec)
Validation (F1 = 0.8954) (Acc 50442/51409 = 0.9812) (9.475 sec)
Training epoch 42.
	(last improvement @ 9)
Train (Loss 0.0028) (184.345 sec)
Validation (F1 = 0.8928) (Acc 50437/51409 = 0.9811) (9.477 sec)
Training epoch 43.
	(last improvement @ 9)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-7-41b3d28b4db4> in <module>()
     20              fscore=1,
     21               viz=1,
---> 22              crf=True)
     23 
     24 #1 -- 86.55 dropout at .45 and 300 filters. stopped at epoc 24.

/src/NLPutils/trainers/dconv2.py in train(self, name, ts, f2i, vs, es, char_vec, word_vec, eval_out, batchsz, epochs, dropout, test_thresh, patience, rnn, maxlen, maxw, wsz, hsz, cfiltsz, optim, eta, crf, fscore, viz, clip, kernel_size, num_layers, num_iterations, word_keep, num_filt)
    276                             if i > 0:
    277                                 print('\t(last improvement @ %d)' % (last_improved+1))
--> 278                             self._train(ts, dropout, batchsz, model, self.sess, word_keep)
    279                             this_acc, this_f1 = self.test(vs, batchsz, 'Validation')
    280 

/src/NLPutils/trainers/dconv2.py in _train(self, ts, dropout, batchsz, model, sess, word_keep)
    331             feed_dict = model.ex2dict(ts_i, 1.0-dropout, True, word_keep)
    332 
--> 333             _, step, summary_str, lossv = sess.run([self.train_op, self.global_step, self.summary_op, self.loss], feed_dict=feed_dict)
    334             self.train_writer.add_summary(summary_str, step)
    335 

/opt/conda/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    765     try:
    766       result = self._run(None, fetches, feed_dict, options_ptr,
--> 767                          run_metadata_ptr)
    768       if run_metadata:
    769         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/opt/conda/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    963     if final_fetches or final_targets:
    964       results = self._do_run(handle, final_targets, final_fetches,
--> 965                              feed_dict_string, options, run_metadata)
    966     else:
    967       results = []

/opt/conda/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1013     if handle is None:
   1014       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1015                            target_list, options, run_metadata)
   1016     else:
   1017       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/opt/conda/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1020   def _do_call(self, fn, *args):
   1021     try:
-> 1022       return fn(*args)
   1023     except errors.OpError as e:
   1024       message = compat.as_text(e.message)

/opt/conda/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1002         return tf_session.TF_Run(session, options,
   1003                                  feed_dict, fetch_list, target_list,
-> 1004                                  status, run_metadata)
   1005 
   1006     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: