In [1]:
from process_data import DataHandler


Using TensorFlow backend.

In [2]:
s = DataHandler("../data/training_hindi_NER.utf8")

In [3]:
s.get_data()[0].shape


Out[3]:
(2023, 191, 50)

In [4]:
from NER_model import NER

In [5]:
m = NER(s)

In [6]:
m.make_and_compile()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
bidirectional_1 (Bidirectional)  (None, 191, 300)      241200      bidirectional_input_1[0][0]      
____________________________________________________________________________________________________
timedistributed_1 (TimeDistribut (None, 191, 12)       3612        bidirectional_1[0][0]            
____________________________________________________________________________________________________
dropout_1 (Dropout)              (None, 191, 12)       0           timedistributed_1[0][0]          
====================================================================================================
Total params: 244,812
Trainable params: 244,812
Non-trainable params: 0
____________________________________________________________________________________________________
None

In [7]:
# m.train()
m.train(epochs=10)


Epoch 1/10
1613/1613 [==============================] - 29s - loss: 3.6243 - acc: 0.7633    
Epoch 2/10
1613/1613 [==============================] - 20s - loss: 3.2839 - acc: 0.7908    
Epoch 3/10
1613/1613 [==============================] - 19s - loss: 3.2750 - acc: 0.7908    
Epoch 4/10
1613/1613 [==============================] - 19s - loss: 3.2753 - acc: 0.7906    
Epoch 5/10
1613/1613 [==============================] - 18s - loss: 3.2466 - acc: 0.7922    
Epoch 6/10
1613/1613 [==============================] - 18s - loss: 3.2592 - acc: 0.7913    
Epoch 7/10
1613/1613 [==============================] - 18s - loss: 3.2838 - acc: 0.7898    
Epoch 8/10
1613/1613 [==============================] - 19s - loss: 3.2770 - acc: 0.7901    
Epoch 9/10
1613/1613 [==============================] - 18s - loss: 3.2569 - acc: 0.7917    
Epoch 10/10
1613/1613 [==============================] - 18s - loss: 3.2486 - acc: 0.7922    

In [8]:
m.evaluate()


             precision    recall  f1-score   support

        NEA       0.00      0.00      0.00         7
        NED       0.00      0.00      0.00        48
        NEL       0.56      0.12      0.20       162
        NEM       0.00      0.00      0.00        17
        NEN       0.91      0.40      0.55       246
        NEO       0.00      0.00      0.00        31
        NEP       0.65      0.17      0.28       189
       NETE       0.00      0.00      0.00       160
       NETI       0.00      0.00      0.00        46
       NETO       0.00      0.00      0.00        41
          O       0.90      1.00      0.95      7105

avg / total       0.85      0.90      0.86      8052

/home/divesh_pandey/anaconda2/envs/keras_tensorflow/lib/python2.7/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
Out[8]:
NEA NED NEL NEM NEN NEO NEP NETE NETI NETO O
NEA 0 0 0 0 0 0 0 0 0 0 7
NED 0 0 0 0 0 0 1 0 0 0 47
NEL 0 0 20 0 0 0 0 0 0 0 142
NEM 0 0 0 0 1 0 0 0 0 0 16
NEN 0 0 1 0 98 0 0 0 0 0 147
NEO 0 0 0 0 0 0 0 0 0 0 31
NEP 0 0 0 0 0 0 33 0 0 0 156
NETE 0 0 0 0 0 0 3 0 0 0 157
NETI 0 0 1 0 4 0 0 0 0 0 41
NETO 0 0 0 0 1 0 3 0 0 0 37
O 0 0 14 0 4 0 11 1 0 0 7075

In [9]:
m.predict_tags("इन्होंने भारतीय आर्य भाषा तथा द्रविड़ भाषाओं का व्याकरण नामक अन्य महत्वपूर्ण ग्रन्थ भी लिखे हैं ।")


इन्होंने/O
भारतीय/O
आर्य/O
भाषा/O
तथा/O
द्रविड़/O
भाषाओं/O
का/O
व्याकरण/O
नामक/O
अन्य/O
महत्वपूर्ण/O
ग्रन्थ/O
भी/O
लिखे/O
हैं/O
।/O

In [10]:
m.model.save("./first_hindi_model")

In [11]:
from keras.models import load_model
m.model = load_model("./first_hindi_model")

In [12]:
from get_word_vectors import get_word_vector
s = get_word_vector("द्रविड़")