In [1]:
%matplotlib inline
In [2]:
import sys
import numpy as np
import matplotlib.pyplot as plt
import os
os.environ['THEANO_FLAGS']='floatX=float32,device=gpu,nvcc.fastmath=True'
import theano
import theano.tensor as T
tf = theano.config.floatX
from math import sqrt
from scipy import io
from sklearn.metrics import roc_auc_score, average_precision_score
In [3]:
DATA_DIR = '/dataeast/dawen/'
In [4]:
K = 1024
In [5]:
X_train = np.load(os.path.join(DATA_DIR, 'X_train_K%d.npy' % K))
X_test = np.load(os.path.join(DATA_DIR, 'X_test_K%d.npy' % K))
y_train = np.load(os.path.join(DATA_DIR, 'y_train.npy'))
y_test = np.load(os.path.join(DATA_DIR, 'y_test.npy'))
y_train[y_train == 0] = -1
y_test[y_test == 0] = -1
In [6]:
#X_train = X_train / np.sum(X_train, axis=1, keepdims=True).astype(tf)
#X_test = X_test / np.sum(X_test, axis=1, keepdims=True).astype(tf)
# for numpy 1.6
X_train = X_train / (np.sum(X_train, axis=1)[:, None]).astype(tf)
X_test = X_test / (np.sum(X_test, axis=1)[:, None]).astype(tf)
In [7]:
rng = np.random.RandomState(seed=98765)
In [8]:
n_samples, n_feats = X_train.shape
n_tags = y_train.shape[1]
batch_size = 100
X_batch = theano.shared(np.zeros((batch_size, n_feats), dtype=tf), 'X_batch', borrow=True)
y_batch = theano.shared(np.zeros((batch_size, n_tags), dtype=np.int32), 'y_batch', borrow=True)
In [9]:
class LogisticRegressionLayer:
def __init__(self, rng, input, n_in, n_out):
# input and params
self.input = input
self.W = theano.shared(0.01 * rng.randn(n_in, n_out).astype(tf), name='W', borrow=True)
self.b = theano.shared(0.01 * rng.randn(n_out).astype(tf), name='b', borrow=True)
self.params = [self.W, self.b]
# AdaGrad
self.hist_grad_W = theano.shared(np.zeros((n_in, n_out), dtype=tf), name='hist_grad_W', borrow=True)
self.hist_grad_b = theano.shared(np.zeros(n_out, dtype=tf), name='hist_grad_b', borrow=True)
self.hist_grads = [self.hist_grad_W, self.hist_grad_b]
# output
self.linout = T.dot(self.input, self.W) + self.b
self.L2_sq = T.sum(T.sqr(self.W)) + T.sum(T.sqr(self.b))
def neg_log_likeli(self, y):
return T.mean(T.nnet.softplus(T.neg(y * self.linout)))
def set_params(self, W, b):
self.W.set_value(W.astype(tf))
self.b.set_value(b.astype(tf))
In [10]:
lam = T.scalar(name='lambda')
logistic_layer = LogisticRegressionLayer(rng, X_batch, n_feats, n_tags)
params = logistic_layer.params
hist_grads = logistic_layer.hist_grads
In [11]:
cost = logistic_layer.neg_log_likeli(y_batch) + lam * logistic_layer.L2_sq
grads = T.grad(cost, params)
In [12]:
eta = T.scalar(name='eta')
eps = 1e-10
updates = []
for param_i, grad_i, hist_grad_i in zip(params, grads, hist_grads):
new_hist_grad_i = hist_grad_i + T.sqr(grad_i)
updates.append((hist_grad_i, new_hist_grad_i))
updates.append((param_i, param_i - eta / (eps + T.sqrt(new_hist_grad_i)) * grad_i))
update_f = theano.function(inputs=[eta, lam], outputs=[logistic_layer.linout, cost], updates=updates)
In [13]:
def predict(X, W, b, y):
h = X.dot(W.get_value()) + b.get_value()
cost = lam * (np.sum(W.get_value()**2) + np.sum(b.get_value()**2))
cost += np.mean(np.log(1 + np.exp(-y * h)))
return cost, h
In [14]:
cost_train = []
cost_test = []
acc_train = []
acc_test = []
logistic_layer.set_params(0.01 * rng.randn(n_feats, n_tags), 0.01 * rng.randn(n_tags))
if np.any(hist_grads[0].get_value()):
hist_grads[0].set_value(np.zeros((n_feats, n_tags)).astype(tf))
hist_grads[1].set_value(np.zeros(n_tags).astype(tf))
In [15]:
eta = 1.
lam = 0
In [16]:
nepochs = 50
print 'epoch\tcost.train\tcost.test\ttrain.acc\ttest.acc'
sys.stdout.flush()
for epoch in xrange(nepochs):
# Execute stochastic gradient updates
for i in xrange(0, n_samples, batch_size):
inds = np.arange(i, min(i + batch_size, n_samples), dtype=int)
X_batch.set_value(X_train[inds].astype(tf))
y_batch.set_value(y_train[inds].astype(np.int32))
update_f(eta, lam)
cost, pred_train = predict(X_train, logistic_layer.W, logistic_layer.b, y_train)
cost_train.append(cost)
acc_train.append(1 - np.mean(np.logical_xor(pred_train > 0, y_train > 0)))
cost, pred_test = predict(X_test, logistic_layer.W, logistic_layer.b, y_test)
cost_test.append(cost)
acc_test.append(1 - np.mean(np.logical_xor(pred_test > 0, y_test > 0)))
pred_test = 1. / (1 + np.exp(-pred_test))
print('%d\t%.3e\t%.3e\t%.3e\t%.3e' % (epoch, cost_train[-1], cost_test[-1], acc_train[-1], acc_test[-1]))
sys.stdout.flush()
In [17]:
plt.plot(acc_train)
plt.plot(acc_test)
Out[17]:
In [18]:
# compute evaluation metrics
def construct_pred_mask(tags_predicted, predictat):
n_samples, n_tags = tags_predicted.shape
rankings = np.argsort(-tags_predicted, axis=1)[:, :predictat]
tags_predicted_binary = np.zeros_like(tags_predicted, dtype=bool)
for i in xrange(n_samples):
tags_predicted_binary[i, rankings[i]] = 1
return tags_predicted_binary
def per_tag_prec_recall(tags_predicted_binary, tags_true_binary):
mask = np.logical_and(tags_predicted_binary, tags_true_binary)
prec = mask.sum(axis=0) / (tags_predicted_binary.sum(axis=0) + np.spacing(1))
tags_true_count = tags_true_binary.sum(axis=0).astype(float)
idx = (tags_true_count > 0)
recall = mask.sum(axis=0)[idx] / tags_true_count[idx]
return prec, recall
def aroc_ap(tags_true_binary, tags_predicted):
n_tags = tags_true_binary.shape[1]
auc = list()
aprec = list()
for i in xrange(n_tags):
if np.sum(tags_true_binary[:, i]) != 0:
auc.append(roc_auc_score(tags_true_binary[:, i], tags_predicted[:, i]))
aprec.append(average_precision_score(tags_true_binary[:, i], tags_predicted[:, i]))
return auc, aprec
def print_out_metrics(tags_true_binary, tags_predicted, predictat):
tags_predicted_binary = construct_pred_mask(tags_predicted, predictat)
prec, recall = per_tag_prec_recall(tags_predicted_binary, tags_true_binary)
mprec, mrecall = np.mean(prec), np.mean(recall)
print 'Precision = %.3f (%.3f)' % (mprec, np.std(prec) / sqrt(prec.size))
print 'Recall = %.3f (%.3f)' % (mrecall, np.std(recall) / sqrt(recall.size))
print 'F-score = %.3f' % (2 * mprec * mrecall / (mprec + mrecall))
auc, aprec = aroc_ap(tags_true_binary, tags_predicted)
print 'AROC = %.3f (%.3f)' % (np.mean(auc), np.std(auc) / sqrt(len(auc)))
print 'AP = %.3f (%.3f)' % (np.mean(aprec), np.std(aprec) / sqrt(len(aprec)))
In [19]:
tags_predicted = pred_test
print tags_predicted.min(), tags_predicted.max()
div_factor = 1.25
tags_predicted = tags_predicted - div_factor * np.mean(tags_predicted, axis=0)
In [20]:
predictat = 20
tags_true_binary = (y_test > 0)
print_out_metrics(tags_true_binary, tags_predicted, predictat)
In [ ]: