Training model using processed data
In [1]:
import os
import sys
import numpy as np
import pickle
from sklearn.metrics import f1_score
src = os.path.join(os.getcwd(), os.pardir, 'src')
sys.path.append(src)
from features import feature_proc
from model import train_model
In [2]:
dataset_path = '../data/processed/SentiRuEval2016.pickle'
with open(dataset_path, 'rb') as bin_data:
X, y, labels, words_num = pickle.load(bin_data)
In [3]:
train_data = set(["bank", "ttk"])
test_data = set(["bank"])
selected = [feature_proc.get_sample_case(label, train_data, test_data) for label in labels]
X_train, X_test, y_train, y_test = feature_proc.split_data(X, y, selected)
In [4]:
model = train_model.build_model(words_num)
In [5]:
batch_size = 50
nb_epoch = 2
model.fit(X_train, [y_train],nb_epoch=nb_epoch, batch_size=batch_size, validation_data=(X_test, [y_test]))
y_pred = model.predict(X_test, batch_size=1)
In [6]:
# labels 1, 2 correspond to positive and negative samples
f_macro = f1_score(y_test.argmax(axis=1), y_pred.argmax(axis=1), labels=[1,2] , average='macro')
f_micro = f1_score(y_test.argmax(axis=1), y_pred.argmax(axis=1), labels=[1,2] , average='micro')
print("Result scores for prediction negative and positive classes")
print("f_macro: {:.4f}".format(f_macro))
print("f_micro: {:.4f}".format(f_micro))
In [8]:
# save weights and architecture trained model
path_to_weights = "../models/example_weights.h5"
path_to_arch = "../models/example_arch.json"
model.save_weights(format(path_to_weights))
json_string = model.to_json()
open(path_to_arch, 'w').write(json_string)
Out[8]:
In [ ]:
In [32]:
In [ ]: