In [1]:
import numpy as np
import pickle
from learnable_crf import LearnableCrf
from lib import *
from scipy.special import expit as sigmoid
In [2]:
with open('cache/df_val_test.pickle', mode='rb') as h:
df_val, df_test = pickle.load(h)
Y_val = df_val['label']
Y_test = df_test['label']
with open('cache/hex.pickle', mode='rb') as h:
hex_data = pickle.load(h)
id_fh = map(lambda x: list(max(x, key=len)), hex_data['id_hierarchical_labels']) # convert to list for addressing
state_space = hex_data['state_space']
state_space = filter(lambda x: x[:20].any(), state_space)
In [3]:
r = 90
with open('cache/df_train.{}.pickle'.format(r), mode='rb') as h:
df = pickle.load(h)
leaves = np.nonzero([x[0] in x[1] for x in zip(df['label'], df['pseudo_label'])])[0]
Y_train = df['label'][leaves]
In [4]:
model_Phi_train = np.load('results/svm_distance/kern_Phi_train.{}.npy'.format(r))[:, leaves]
model_Phi_val = np.load('results/svm_distance/kern_Phi_val.{}.npy'.format(r))
model_Phi_test = np.load('results/svm_distance/kern_Phi_test.{}.npy'.format(r))
M = len(model_Phi_train)
In [5]:
model_Phi_train = sigmoid(model_Phi_train)
model_Phi_val = sigmoid(model_Phi_val)
model_Phi_test = sigmoid(model_Phi_test)
In [6]:
def confusion_matrix(Y_predict, Y_truth):
if Y_predict.dtype == bool:
cm = np.zeros((27, 27), dtype=int)
count = np.zeros(27, dtype=int)
for i, y in enumerate(Y_predict):
count[id_fh[Y_truth[i]]] += 1
cm[id_fh[Y_truth[i]], :] += y
else:
cm = np.zeros((20, 27), dtype=int)
count = np.zeros(20, dtype=int)
for i, y in enumerate(Y_predict):
count[Y_truth[i]] += 1
cm[Y_truth[i], y.argmax()] += 1
return cm.astype(float) / count[:, None]
In [ ]:
def crf(Phi):
def step(phi):
scores = map(lambda s: phi[s].sum() + ((1-phi)[np.logical_not(s)]).sum(), state_space)
# return state_space[np.argmax(scores)]
return np.vstack(tuple(state_space[np.argsort(scores)[i]] for i in range(-3, 0)))
return np.array(map(step, Phi), dtype=bool)
In [ ]:
# [get_accuracy(model_Phi_val[i], Y_val, lim_states=False) for i in range(0, M)]
# [top_k_accuracy(model_Phi_val[i], Y_val, k=3, lim_states=False) for i in range(0, M)]
# [get_accuracy(crf(model_Phi_val[i]), Y_val) for i in range(0, M)]
[top_k_accuracy(crf(model_Phi_val[i]), Y_val, k=3) for i in range(0, M)]
In [ ]:
# [get_accuracy(model_Phi_test[i], Y_test, lim_states=False) for i in range(0, M)]
# [top_k_accuracy(model_Phi_test[i], Y_test, k=3, lim_states=False) for i in range(0, M)]
# [get_accuracy(crf(model_Phi_test[i]), Y_test) for i in range(0, M)]
[top_k_accuracy(crf(model_Phi_test[i]), Y_test, k=3) for i in range(0, M)]
In [ ]:
# np.diagonal(confusion_matrix(model_Phi_test[0], Y_test))
np.diagonal(confusion_matrix(crf(model_Phi_test[2]), Y_test))
In [9]:
# lcrf = [LearnableCrf(model_Phi_train[i], Y_train) for i in range(0, M)]
# [get_accuracy(lcrf[i].predict(model_Phi_val[i]), Y_val) for i in range(0, M)]
[top_k_accuracy(lcrf[i].predict_top3(model_Phi_val[i]), Y_val, k=3) for i in range(0, M)]
Out[9]:
In [10]:
# [get_accuracy(lcrf[i].predict(model_Phi_test[i]), Y_test) for i in range(0, M)]
[top_k_accuracy(lcrf[i].predict_top3(model_Phi_test[i]), Y_test, k=3) for i in range(0, M)]
Out[10]:
In [ ]:
np.diagonal(confusion_matrix(lcrf[0].predict(model_Phi_test[0]), Y_test))
In [ ]:
lcrf[0].opt_theta
In [ ]: