AdaGrad
adaGrad=True). Zasady działania AdaGrad są podane w materiałach do wykładu.Walidacja Krzyżowa
Ensemble (za dodatkowe 20 punktów)
In [1]:
import numpy as np
def runningMeanFast(x, N):
return np.convolve(x, np.ones((N,))/N, mode='valid')
def safeSigmoid(x, eps=0):
y = 1.0/(1.0 + np.exp(-x))
# przytnij od dolu i gory
if eps > 0:
y[y < eps] = eps
y[y > 1 - eps] = 1 - eps
return y
def h(theta, X, eps=0.0):
return safeSigmoid(X*theta, eps)
def J(h,theta,X,y):
m = len(y)
f = h(theta, X, eps=10**-7)
return -np.sum(np.multiply(y, np.log(f)) +
np.multiply(1 - y, np.log(1 - f)), axis=0)/m
def dJ(h,theta,X,y):
return 1.0/len(y)*(X.T*(h(theta,X)-y))
def softmax(X):
return np.exp(X)/np.sum(np.exp(X))
In [2]:
def SGD(h, fJ, fdJ, theta, X, y,
alpha=0.001, maxEpochs=1.0, batchSize=100):
m, n = X.shape
start, end = 0, batchSize
maxSteps = (m * float(maxEpochs)) / batchSize
for i in range(int(maxSteps)):
XBatch, yBatch = X[start:end,:], y[start:end,:]
theta = theta - alpha * fdJ(h, theta, XBatch, yBatch)
if start + batchSize < m:
start += batchSize
else:
start = 0
end = min(start + batchSize, m)
return theta
In [ ]: