In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
g_cos = np.cos
def g_sigmoid(t, k=1.0):
etk = np.exp( (t - np.pi / 2) / k )
e0k = np.exp( - np.pi / 2 / k )
y = ( (1 - etk) * (1 + e0k) ) / ( (1 + etk) * (1 - e0k) )
return y
In [3]:
def g_sigmoid_grad(t, k=1.0):
etk = np.exp((t - np.pi/2) / k)
e0k = np.exp((0. - np.pi/2) / k)
y = -2 * (1 + e0k) * etk / (1 - e0k) / (1 + etk) ** 2 / k
return y
In [4]:
import chainer
import chainer.functions as F
import chainer.links as L
In [5]:
class AngleFunction(chainer.Function):
def forward(self, inputs):
xp = chainer.cuda.get_array_module(*inputs)
norm = xp.linalg.norm
x, w = inputs
nx = norm(x, axis=1)[:,None]
nw = norm(w, axis=0)[None,:]
y = xp.dot(x, w) / (nx * nw)
return y,
def backward(self, inputs, grad_outputs):
xp = chainer.cuda.get_array_module(*inputs)
norm = xp.linalg.norm
x, w = inputs
gy, = grad_outputs
if gy is None:
gy = 1.0
nx = norm(x, axis=1)[:,None]
nw = norm(w, axis=0)[None,:]
xtw = xp.dot(x, w)
xtx = nx ** 2
wtw = nw ** 2
inv = gy / xp.sqrt( xtx * wtw - xtw**2 )
gx = inv.dot( (w - x.T.dot(xtw / xtx)).T )
gw = (x.T - w.dot((xtw / wtw).T) ).dot(inv)
return gx, gw
def angle_function(x, w):
return AngleFunction()(x, w)
In [6]:
class AngleLink(chainer.Link):
def __init__(self, dim, n_filter):
super(AngleLink, self).__init__()
with self.init_scope():
W = np.random.random( (dim, n_filter) ).astype(np.float32)
self.W = chainer.Parameter(W)
def __call__(self, x):
return angle_function(x, self.W)
In [13]:
def projection_onto_hypersphere(update_rule, w):
xp = chainer.cuda.get_array_module(w)
nw = xp.linalg.norm(w.data, axis=0)[None,:]
w.data /= nw
In [19]:
N = 5
d = 1000
M = 4
norm = np.linalg.norm
random = np.random.random
x = random( (N, d) ).astype(np.float32)
x = chainer.Variable(x)
In [20]:
l1 = AngleLink(d, M)
optimizer = chainer.optimizers.SGD()
optimizer.setup(l1)
In [21]:
l1.W.update_rule.add_hook(projection_onto_hypersphere)
In [22]:
N = 10000
L = np.zeros(N)
for n in range(N):
z = l1(x)
l = F.sum(z**2)
l.cleargrad()
l.backward()
optimizer.update()
L[n] = l.data
In [23]:
plt.plot(L)
plt.grid()
plt.xlabel('Iter')
plt.ylabel('Loss')
Out[23]:
In [50]:
np.linalg.norm(l1.W.grad, axis=0)
Out[50]: