In [ ]:
%load_ext line_profiler
%load_ext autoreload
%autoreload 2
import os, sys, time
import numpy as np
from scipy.optimize import check_grad
from scipy.sparse import issparse, csc_matrix, csr_matrix
In [ ]:
sys.path.append('src/')
sys.path.append('src/models')
#from MLC import objective, risk_pclassification, DataHelper
#from NSR import objective_clf, risk_pclassification, DataHelper, obj_clf_loop
#from MTC import objective, risk_pclassification, DataHelper
from MTC_L1 import objective, risk_pclassification, DataHelper
from tools import create_dataset, dataset_names, nLabels_dict
In [ ]:
dataset_names
In [ ]:
data_ix = 0
In [ ]:
dataset_name = dataset_names[data_ix]
nLabels = nLabels_dict[dataset_name]
print(dataset_name, nLabels)
In [ ]:
data_dir = 'data'
SEED = 918273645
Load data.
In [ ]:
X_train, Y_train = create_dataset(dataset_name, train_data=True, shuffle=True, random_state=SEED)
X_test, Y_test = create_dataset(dataset_name, train_data=False)
Feature normalisation.
In [ ]:
X_train_mean = np.mean(X_train, axis=0).reshape((1, -1))
X_train_std = np.std(X_train, axis=0).reshape((1, -1)) + 10 ** (-6)
X_train -= X_train_mean
X_train /= X_train_std
X_test -= X_train_mean
X_test /= X_train_std
In [ ]:
def print_dataset_info(X_train, Y_train, X_test, Y_test):
N_train, D = X_train.shape
K = Y_train.shape[1]
N_test = X_test.shape[0]
print('%-45s %s' % ('Number of training examples:', '{:,}'.format(N_train)))
print('%-45s %s' % ('Number of test examples:', '{:,}'.format(N_test)))
print('%-45s %s' % ('Number of features:', '{:,}'.format(D)))
print('%-45s %s' % ('Number of labels:', '{:,}'.format(K)))
avgK_train = np.mean(np.sum(Y_train, axis=1))
avgK_test = np.mean(np.sum(Y_test, axis=1))
print('%-45s %.3f (%.2f%%)' % ('Average number of positive labels (train):', avgK_train, 100*avgK_train / K))
print('%-45s %.3f (%.2f%%)' % ('Average number of positive labels (test):', avgK_test, 100*avgK_test / K))
#print('%-45s %.4f%%' % ('Average label occurrence (train):', np.mean(np.sum(Y_train, axis=0)) / N_train))
#print('%-45s %.4f%%' % ('Average label occurrence (test):', np.mean(np.sum(Y_test, axis=0)) / N_test))
print('%-45s %.3f%%' % ('Sparsity (percent) (train):', 100 * np.sum(Y_train) / np.prod(Y_train.shape)))
print('%-45s %.3f%%' % ('Sparsity (percent) (test):', 100 * np.sum(Y_test) / np.prod(Y_test.shape)))
In [ ]:
print('%-45s %s' % ('Dataset:', dataset_name))
print_dataset_info(X_train, Y_train, X_test, Y_test)
check gradient.
In [ ]:
%%script false
PU = np.zeros((Y_train.shape[0], 3), dtype=Y_train.dtype)
PU[[0, 1, 2, 10], [0, 1, 1, 2]] = 1
upl_ix = [[2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13, 14, 15]]
w0 = 0.001 * np.random.randn((Y_train.shape[1] + 3) * X_train.shape[1] + 1)
loss = 'both'
check_grad(\
lambda w: obj_pclassification(w, X_train, Y_train, C1=10, C2=1, C3=2, p=3, loss_type=loss,
PU=PU, user_playlist_indices=upl_ix)[0],
lambda w: obj_pclassification(w, X_train, Y_train, C1=10, C2=1, C3=2, p=3, loss_type=loss,
PU=PU, user_playlist_indices=upl_ix)[1],w0)
In [ ]:
%%script false
cliques = [[2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13]]
#cliques = None
w0 = 0.001 * np.random.randn(Y_train.shape[1] * X_train.shape[1] + 1)
#w0 = np.zeros(Y_train.shape[1] * X_train.shape[1] + 1)
dw = np.zeros_like(w0)
loss = 'example'
bs=5 if loss == 'label' else 100
Y_train = csr_matrix(Y_train)
data_helper_example = None if loss == 'label' else DataHelper(Y_train, ax=0, batch_size=bs)
data_helper_label = None if loss == 'example' else DataHelper(Y_train, ax=1, batch_size=bs)
#%lprun -f accumulate_risk \
#%lprun -f objective \
check_grad(lambda w: objective(w, dw, X_train, Y_train, C1=10, C2=1, C3=2, p=3, loss_type=loss, cliques=cliques, \
data_helper_example=data_helper_example, data_helper_label=data_helper_label), \
lambda w: dw, w0)
In [ ]:
%%script false
cliques = [[2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13]]
#cliques = None
w0 = 0.001 * np.random.randn(Y_train.shape[1] * (X_train.shape[1] + 1))
#w0 = np.zeros(Y_train.shape[1] * (X_train.shape[1] + 1))
dw = np.zeros_like(w0)
bs=5
Y_train = csr_matrix(Y_train)
data_helper = DataHelper(Y_train, ax=1, batch_size=bs)
#%lprun -f accumulate_risk \
#%lprun -f objective \
check_grad(lambda w: objective(w, dw, X_train, Y_train, C1=10, C3=2, p=3, \
cliques=cliques, data_helper=data_helper), \
lambda w: dw, w0)
In [ ]:
def check_grad_loop(obj, grad, w0):
eps = 1.49e-08
w = np.zeros_like(w0)
for i in range(len(w0)):
if (i+1) % 10 == 0:
sys.stdout.write('\r%d / %d' % (i+1, len(w0)))
wi1 = w0.copy()
wi2 = w0.copy()
wi1[i] = wi1[i] - eps
wi2[i] = wi2[i] + eps
J1 = obj(wi1)
J2 = obj(wi2)
w[i] = (J2 - J1) / (2 * eps)
#print(w[i])
w1 = grad(w0)
diff = w1 - w
return np.sqrt(np.dot(diff, diff))
In [ ]:
%%script false
# objective(w, dw, X, Y, C, p, cliques, data_helper, UF=None, njobs=1, verbose=0, fnpy=None)
cliques = [[0], [1], [2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13]]
#cliques = [[0], [1, 2], [3, 4, 5]]
Ycsc = csc_matrix(Y_train)
data_helper = DataHelper(Ycsc, cliques)
UF = np.zeros((X_train.shape[0], len(cliques)))
for u in range(len(cliques)):
clq = cliques[u]
UF[:, u] = Ycsc[:, clq].sum(axis=1).A.reshape(-1)
UF_mean = np.mean(UF, axis=0).reshape((1, -1))
UF_std = np.std(UF, axis=0).reshape((1, -1)) + 10 ** (-6)
UF -= UF_mean
UF /= UF_std
w0 = 0.001 * np.random.randn((len(cliques) + Y_train.shape[1] + 1) * (X_train.shape[1] + len(cliques) - 1))
#UF = None
#w0 = 0.001 * np.random.randn((len(cliques) + Y_train.shape[1] + 1) * X_train.shape[1])
dw = np.zeros_like(w0)
#%lprun -f accumulate_risk \
#%lprun -f objective \
check_grad(lambda w: objective(w, dw, X_train, Y_train, C=10, p=3, cliques=cliques, \
data_helper=data_helper, UF=UF), \
lambda w: dw, w0)
In [ ]:
%%script false
check_grad_loop(lambda w: objective(w, dw, X_train, Y_train, C=10, p=3, cliques=cliques, \
data_helper=data_helper, UF=UF), \
lambda w: dw, w0)
In [ ]:
#%%script false
# objective(w, dw, X, Y, p, cliques, data_helper, verbose=0, fnpy=None)
cliques = [[0], [1], [2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13]]
#cliques = [[0], [1, 2], [3, 4, 5]]
Ycsc = csc_matrix(Y_train)
data_helper = DataHelper(Ycsc, cliques)
w0 = 0.001 * np.random.randn((len(cliques) + Y_train.shape[1] + 1) * X_train.shape[1])
dw = np.zeros_like(w0)
#%lprun -f accumulate_risk \
#%lprun -f objective \
check_grad(lambda w: objective(w, dw, X_train, Y_train, p=6, cliques=cliques, data_helper=data_helper), \
lambda w: dw, w0)
In [ ]:
import gzip
import pickle as pkl
pkldir = 'data/%s/setting1' % dataset_name
pkl.dump(X_train, gzip.open(os.path.join(pkldir, 'X_train.pkl.gz'), 'wb'))
pkl.dump(csc_matrix(Y_train), gzip.open(os.path.join(pkldir, 'Y_train.pkl.gz'), 'wb'))
pkl.dump(cliques, gzip.open(os.path.join(pkldir, 'cliques_train.pkl.gz'), 'wb'))
pkl.dump(X_test, gzip.open(os.path.join(pkldir, 'X_dev.pkl.gz'), 'wb'))
pkl.dump(csc_matrix(Y_test), gzip.open(os.path.join(pkldir, 'Y_dev.pkl.gz'), 'wb'))
pkl.dump(cliques, gzip.open(os.path.join(pkldir, 'cliques_trndev.pkl.gz'), 'wb'))
In [ ]:
dataset_name
In [ ]:
%%script false
print('%-20s %-20s' % ('J1-J2', '|G1-G2|'))
#cliques = [[0], [1], [2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13]]
cliques = [np.arange(Y_train.shape[1])]
Y_train = csc_matrix(Y_train)
data_helper = DataHelper(Y_train, cliques)
for i in range(10):
w0 = 0.001 * np.random.randn((len(cliques) + Y_train.shape[1] + 1) * X_train.shape[1])
dw1 = np.zeros_like(w0)
dw2 = np.zeros_like(w0)
J1 = objective_clf(w0, dw1, X_train, Y_train, C=10, p=3, cliques=cliques, data_helper=data_helper)
J2 = obj_clf_loop(w0, dw2, X_train, Y_train, C=10, p=3, cliques=cliques, data_helper=data_helper)
print('%-20s %-20s' % ('%g' % (J1-J2), '%g' % np.sqrt(np.dot(dw1-dw2, dw1-dw2))))
In [ ]:
import numpy as np
import sys
from scipy.optimize import check_grad
#m, n, d = 100, 50, 20
m, n, d = 1500, 14, 103
X = np.random.randn(m, d)
w0 = 0.001 * np.random.randn(n, m).reshape(-1)
def obj(w):
assert w.shape == (n * m,)
W = w.reshape(n, m)
T = W.sum(axis=0)
return T.dot(X).dot(X.T).dot(T) / 2
def grad(w):
assert w.shape == (n * m,)
W = w.reshape(n, m)
T = W.sum(axis=0)
Tg = X.dot(X.T).dot(T).reshape(-1)
return np.tile(Tg, (n, 1)).ravel()
from ad import gh
jac, hessian = gh(obj)
In [ ]:
check_grad(obj, jac, w0)
In [ ]:
check_grad(obj, grad, w0)
In [ ]:
check_grad_loop(obj, grad, w0)