importing require packages


In [1]:
from __future__ import print_function

import json
import os
import numpy as np
import sys
import h5py

from gensim.models import Word2Vec
from gensim.utils import simple_preprocess
from keras.engine import Input
from keras.layers import Embedding, merge
from keras.models import Model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.preprocessing import sequence
from embeddings import Embeddings
from keras.callbacks import ModelCheckpoint

from nltk.tokenize import word_tokenize
import random


Using TensorFlow backend.

Instantiate Embeddings


In [2]:
embeddings = Embeddings(300, 4, 1, 4)


Loading embeddings....
Loading Squad Data
Loading the embeddings from the cache
Starting tokenized, pos squad data.....
Combining Squad Data
Creating Tokenized Squad Corpus
Creating Tokenized Squad Corpus

getting data from preprocessing


In [6]:
word2vec_weights = embeddings.get_weights()
word2index, index2word = embeddings.get_vocabulary()
word2vec_model = embeddings.get_model()
tokenized_indexed_sentences = embeddings.get_tokenized_indexed_sentences()

generating training data


In [7]:
window_size = 5
vocab_size = len(word2index)
print(vocab_size)


42049

In [8]:
seq_in = []
seq_out = []
# generating dataset
for sentence in tokenized_indexed_sentences:
    for i in range(len(sentence)-window_size-1):
        x = sentence[i:i + window_size]
        y = sentence[i + window_size]
        seq_in.append(x)#[]
        seq_out.append(word2vec_weights[y])

# converting seq_in and seq_out into numpy array
seq_in = np.array(seq_in)
seq_out = np.array(seq_out)
n_samples = len(seq_in)
print ("Number of samples : ", n_samples)


Number of samples :  610849

Defining model


In [ ]:
# Changes to the model to be done here
model = Sequential()
model.add(Embedding(input_dim=word2vec_weights.shape[0], output_dim=word2vec_weights.shape[1], weights=[word2vec_weights]))
model.add(LSTM(1024,return_sequences =True))
model.add(Dropout(0.2))
model.add(LSTM(512))
#model.add(Dropout(0.2))
model.add(Dense(word2vec_weights.shape[1], activation='relu'))
model.load_weights("../weights/lstm-2-1024-512-batchsize-128-epochs-25/weights.24-0.22.hdf5")
model.compile(loss='mse', optimizer='adam',metrics=['accuracy'])
model.summary()

In [ ]:
model_weights_path = "../weights/lstm-2-1024-512-batchsize-128-epochs-25"
if not os.path.exists(model_weights_path):
    os.makedirs(model_weights_path)
checkpoint_path = model_weights_path + '/weights.{epoch:02d}-{val_acc:.2f}.hdf5'
checkpoint = ModelCheckpoint(filepath=checkpoint_path, monitor='val_acc', verbose=1, save_best_only=False, mode='max')

Train Model


In [12]:
# model_fit_summary = model.fit(seq_in, seq_out, epochs=25, verbose=1, validation_split=0.2, batch_size=128, callbacks=[checkpoint])

Accuracy


In [ ]:
accuracy = model.evaluate(seq_in, seq_out)

model predict


In [21]:
start = 100
pattern = list(seq_in[start])
print("\"",' '.join(index2word[index] for index in pattern))
for i in range(10):
    prediction = model.predict(np.array([pattern]))
    pred_word = word2vec_model.similar_by_vector(prediction[0])[0][0]
    sys.stdout.write(pred_word+" ")
    pattern.append(word2index[pred_word])
    pattern = pattern[1:len(pattern)]


" how many times have the
corrientes billaut handing self-interest profitabl circulated iconodules profitabl profitabl eoka 

Accuracy


In [32]:
def accuracy():
    count = 0
    correct = 0
    for sub_sample_in, sub_sample_out in zip(seq_in, seq_out):
        ypred = model.predict_on_batch(np.expand_dims(sub_sample_in, axis=0))[0]
        ytrue = sub_sample_out
        pred_word = word2vec_model.similar_by_vector(ypred)[0][0]
        true_word = word2vec_model.similar_by_vector(ytrue)[0][0]
        similarity = word2vec_model.similarity(pred_word, true_word)
        if similarity >= .85:
            correct += 1
        count += 1
    print("Accuracy {0}".format(correct/count))

In [33]:
#seq_out[0]

In [38]:
model_results = model_fit_summary.history


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-38-fdb260eebd22> in <module>()
----> 1 model_results = model_fit_summary.history

NameError: name 'model_fit_summary' is not defined

In [35]:
model_results.update(model_fit_summary.params)

In [36]:
model_results["train_accuracy"] = accuracy()

In [37]:
accuracy = accuracy()


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-37-f99444d5bdb2> in <module>()
----> 1 accuracy = accuracy()

<ipython-input-32-2593a2dd52a9> in accuracy()
      3     correct = 0
      4     for sub_sample_in, sub_sample_out in zip(seq_in, seq_out):
----> 5         ypred = model.predict_on_batch(np.expand_dims(sub_sample_in, axis=0))[0]
      6         ytrue = sub_sample_out
      7         pred_word = word2vec_model.similar_by_vector(ypred)[0][0]

/home/user/venvs/autofill/lib/python3.5/site-packages/keras/models.py in predict_on_batch(self, x)
    914         if self.model is None:
    915             self.build()
--> 916         return self.model.predict_on_batch(x)
    917 
    918     def train_on_batch(self, x, y, class_weight=None,

/home/user/venvs/autofill/lib/python3.5/site-packages/keras/engine/training.py in predict_on_batch(self, x)
   1694             ins = x
   1695         self._make_predict_function()
-> 1696         outputs = self.predict_function(ins)
   1697         if len(outputs) == 1:
   1698             return outputs[0]

/home/user/venvs/autofill/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
   2227         session = get_session()
   2228         updated = session.run(self.outputs + [self.updates_op],
-> 2229                               feed_dict=feed_dict)
   2230         return updated[:len(self.outputs)]
   2231 

/home/user/venvs/autofill/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    776     try:
    777       result = self._run(None, fetches, feed_dict, options_ptr,
--> 778                          run_metadata_ptr)
    779       if run_metadata:
    780         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/home/user/venvs/autofill/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    980     if final_fetches or final_targets:
    981       results = self._do_run(handle, final_targets, final_fetches,
--> 982                              feed_dict_string, options, run_metadata)
    983     else:
    984       results = []

/home/user/venvs/autofill/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1030     if handle is None:
   1031       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1032                            target_list, options, run_metadata)
   1033     else:
   1034       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/home/user/venvs/autofill/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1037   def _do_call(self, fn, *args):
   1038     try:
-> 1039       return fn(*args)
   1040     except errors.OpError as e:
   1041       message = compat.as_text(e.message)

/home/user/venvs/autofill/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1019         return tf_session.TF_Run(session, options,
   1020                                  feed_dict, fetch_list, target_list,
-> 1021                                  status, run_metadata)
   1022 
   1023     def _prun_fn(session, handle, feed_dict, fetch_list):

/usr/lib/python3.5/contextlib.py in __exit__(self, type, value, traceback)
     64         if type is None:
     65             try:
---> 66                 next(self.gen)
     67             except StopIteration:
     68                 return

/home/user/venvs/autofill/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py in raise_exception_on_not_ok_status()
    460   try:
    461     yield status
--> 462     if pywrap_tensorflow.TF_GetCode(status) != 0:
    463       raise _make_specific_exception(
    464           None, None,

KeyboardInterrupt: 

In [26]:
text_file_path = "../weights/lstm-2-1024-512-batchsize-128-epochs-25/model_results.json"

In [27]:
with open(text_file_path, "w") as f:
       json.dump(model_results, f)

In [ ]: