Training model using processed data


In [1]:
import os
import sys
import numpy as np
import pickle
from sklearn.metrics import f1_score
    
src = os.path.join(os.getcwd(), os.pardir, 'src')
sys.path.append(src)

from features import feature_proc
from model import train_model


Using Theano backend.
Using gpu device 0: GeForce GTX 690 (CNMeM is disabled, cuDNN not available)

In [2]:
dataset_path = '../data/processed/SentiRuEval2016.pickle'
with open(dataset_path, 'rb') as bin_data:
    X, y, labels, words_num = pickle.load(bin_data)

In [3]:
train_data = set(["bank", "ttk"])
test_data = set(["bank"])
selected = [feature_proc.get_sample_case(label, train_data, test_data) for label in labels]
X_train, X_test, y_train, y_test = feature_proc.split_data(X, y, selected)

In [4]:
model = train_model.build_model(words_num)

In [5]:
batch_size = 50
nb_epoch = 2
model.fit(X_train, [y_train],nb_epoch=nb_epoch, batch_size=batch_size,  validation_data=(X_test, [y_test]))
y_pred = model.predict(X_test, batch_size=1)


Train on 18989 samples, validate on 3395 samples
Epoch 1/2
18989/18989 [==============================] - 75s - loss: 0.5376 - acc: 0.7835 - val_loss: 0.6010 - val_acc: 0.7429
Epoch 2/2
18989/18989 [==============================] - 75s - loss: 0.2807 - acc: 0.8983 - val_loss: 0.7133 - val_acc: 0.7320

In [6]:
# labels 1, 2 correspond to positive and negative samples
f_macro = f1_score(y_test.argmax(axis=1), y_pred.argmax(axis=1), labels=[1,2] , average='macro')
f_micro = f1_score(y_test.argmax(axis=1), y_pred.argmax(axis=1), labels=[1,2] , average='micro')
print("Result scores for prediction negative and positive classes")
print("f_macro: {:.4f}".format(f_macro))
print("f_micro: {:.4f}".format(f_micro))


Result scores for prediction negative and positive classes
f_macro: 0.5127
f_micro: 0.5528

In [8]:
# save weights and architecture trained model

path_to_weights = "../models/example_weights.h5"
path_to_arch = "../models/example_arch.json"

model.save_weights(format(path_to_weights))
json_string = model.to_json()
open(path_to_arch, 'w').write(json_string)


Out[8]:
7450

In [ ]:


In [32]:


In [ ]: