In [2033]:
import warnings
warnings.filterwarnings('ignore')
from IPython.core.display import HTML
def css_styling():
styles = """
<style>
.output_png { text-align: center; }
</style>
"""
return HTML(styles)
css_styling()
import matplotlib
In [2067]:
def runningMeanFast(x, N):
return np.convolve(x, np.ones((N,))/N, mode='valid')
def powerme(x1,x2,n):
X = []
for m in range(n+1):
for i in range(m+1):
X.append(np.multiply(np.power(x1,i),np.power(x2,(m-i))))
return np.hstack(X)
def safeSigmoid(x, eps=0):
y = 1.0/(1.0 + np.exp(-x))
# przytnij od dolu i gory
if eps > 0:
y[y < eps] = eps
y[y > 1 - eps] = 1 - eps
return y
def h(theta, X, eps=0.0):
return safeSigmoid(X*theta, eps)
def J(h,theta,X,y, lamb=0):
m = len(y)
f = h(theta, X, eps=10**-7)
j = -np.sum(np.multiply(y, np.log(f)) +
np.multiply(1 - y, np.log(1 - f)), axis=0)/m# \
#+ lamb/(2*m) * np.sum(np.power(theta[1:],2))
return j
def dJ(h,theta,X,y,lamb=0):
g = 1.0/y.shape[0]*(X.T*(h(theta,X)-y))
#g[1:] += lamb/float(y.shape[0]) * theta[1:]
return g
def SGD(h, fJ, fdJ, theta, X, Y,
alpha=0.001, maxEpochs=1.0, batchSize=100,
adaGrad=False, logError=True, validate=0.0, valStep=100, lamb=0):
errorsX, errorsY = [], []
errorsVX, errorsVY = [], []
XT, YT = X, Y
if validate > 0:
mv = int(X.shape[0] * validate)
XV, YV = X[:mv], Y[:mv]
XT, YT = X[mv:], Y[mv:]
m, n = XT.shape
start, end = 0, batchSize
maxSteps = (m * float(maxEpochs)) / batchSize
if adaGrad:
hgrad = np.matrix(np.zeros(n)).reshape(n,1)
for i in range(int(maxSteps)):
XBatch, YBatch = XT[start:end,:], YT[start:end,:]
grad = fdJ(h, theta, XBatch, YBatch, lamb=lamb)
if adaGrad:
hgrad += np.multiply(grad, grad)
Gt = 1.0 / (10**-7 + np.sqrt(hgrad))
theta = theta - np.multiply(alpha * Gt, grad)
else:
theta = theta - alpha * grad
if logError:
errorsX.append(float(i*batchSize)/m)
errorsY.append(fJ(h, theta, XBatch, YBatch).item())
if validate > 0 and i % valStep == 0:
errorsVX.append(float(i*batchSize)/m)
errorsVY.append(fJ(h, theta, XV, YV).item())
if start + batchSize < m:
start += batchSize
else:
start = 0
end = min(start + batchSize, m)
return theta, (errorsX, errorsY, errorsVX, errorsVY)
def classifyBi(theta, X):
prob = h(theta, X)
return prob
In [2068]:
n = 6
sgd = True
In [2069]:
data = np.matrix(np.loadtxt("ex2data2.txt",delimiter=","))
np.random.shuffle(data)
X = powerme(data[:,0], data[:,1],n)
Y = data[:,2]
pyplot.figure(figsize=(16,8))
pyplot.subplot(121)
pyplot.scatter(X[:,2].tolist(),
X[:,1].tolist(),
c=Y.tolist(),
s=100, cmap=pyplot.cm.get_cmap('prism'));
if sgd:
theta = np.matrix(np.zeros(X.shape[1])).reshape(X.shape[1],1)
thetaBest, err = SGD(h, J, dJ, theta, X, Y, alpha=1, adaGrad=True, maxEpochs=2500, batchSize=100,
logError=True, validate=0.25, valStep=1, lamb=0)
xx, yy = np.meshgrid(np.arange(-1.5, 1.5, 0.02),
np.arange(-1.5, 1.5, 0.02))
l = len(xx.ravel())
C = powerme(xx.reshape(l,1),yy.reshape(l,1),n)
z = classifyBi(thetaBest, C).reshape(np.sqrt(l),np.sqrt(l))
pyplot.contour(xx, yy, z, levels=[0.5], lw=3);
pyplot.ylim(-1,1.2);
pyplot.xlim(-1,1.2);
pyplot.legend();
pyplot.subplot(122)
pyplot.plot(err[0],err[1], lw=3, label="Training error")
pyplot.plot(err[2],err[3], lw=3, label="Validation error");
pyplot.legend()
pyplot.ylim(0.2,0.8);
In [2114]:
def J(h,theta,X,y,lamb=0):
m = len(y)
f = h(theta, X, eps=10**-7)
j = -np.sum(np.multiply(y, np.log(f)) +
np.multiply(1 - y, np.log(1 - f)), axis=0)/m \
+ lamb/(2*m) * np.sum(np.power(theta[1:] ,2))
return j
def dJ(h,theta,X,y,lamb=0):
m = float(y.shape[0])
g = 1.0/y.shape[0]*(X.T*(h(theta,X)-y))
g[1:] += lamb/m * theta[1:]
return g
In [2115]:
n = 6
lam = 0.01
In [2116]:
data = np.matrix(np.loadtxt("ex2data2.txt",delimiter=","))
np.random.shuffle(data)
X = powerme(data[:,0], data[:,1],n)
Y = data[:,2]
theta = np.matrix(np.zeros(X.shape[1])).reshape(X.shape[1],1)
thetaBest, err = SGD(h, J, dJ, theta, X, Y, alpha=1, adaGrad=True, maxEpochs=2500, batchSize=100,
logError=True, validate=0.25, valStep=1, lamb=lam)
xx, yy = np.meshgrid(np.arange(-1.5, 1.5, 0.02),
np.arange(-1.5, 1.5, 0.02))
l = len(xx.ravel())
C = powerme(xx.reshape(l,1),yy.reshape(l,1),n)
z = classifyBi(thetaBest, C).reshape(np.sqrt(l),np.sqrt(l))
pyplot.figure(figsize=(16,8))
pyplot.subplot(121)
pyplot.scatter(X[:,2].tolist(),
X[:,1].tolist(),
c=Y.tolist(),
s=100, cmap=pyplot.cm.get_cmap('prism'));
pyplot.contour(xx, yy, z, levels=[0.5], lw=3);
pyplot.ylim(-1,1.2);
pyplot.xlim(-1,1.2);
pyplot.legend();
pyplot.subplot(122)
pyplot.plot(err[0],err[1], lw=3, label="Training error")
pyplot.plot(err[2],err[3], lw=3, label="Validation error");
pyplot.legend()
pyplot.ylim(0.2,0.8);
Dyskusja:
In [2185]:
%matplotlib inline
import numpy as np
from matplotlib import pyplot
def h(theta, X):
return X.dot(theta)
def norm(X,Y):
return np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(Y)
def rho(a,b):
return np.sqrt((a-b)**2)
def kneighbors(k, t, x, y):
d = rho(t, x)
nn = np.argsort(d)[:k]
return y[nn]
def Gauss(x,t,l=.01):
t = rho(x,t)/l
return 1./np.sqrt(2*np.pi)*np.exp(-1./2.*t**2)
def Kernel(K,t,x,y,l=.01):
return np.array([np.sum(K(x,t1,l)*y)/np.sum(K(x,t1,l), axis=1) for t1 in t])
def J(Yp,Y):
return np.sqrt(1.0/(2*len(y))*(Yp-Y).dot(Yp-Y))
def true(x):
return 2*x*np.sin(4*x)
m = 300
x = np.linspace(0, 5, m)
ytrue = true(x)
y = ytrue + 3*np.random.randn(m)
t = np.linspace(0, 5, m)
ttrue = true(t)
In [2186]:
n = 11 #stopień wielomianu
show1 = True
In [2187]:
xtuple = [x**i for i in range(n+1)]
xreg = np.vstack(xtuple).T
theta = norm(xreg,y)
ttuple = [t**i for i in range(n+1)]
treg = np.vstack(ttuple).T
pyplot.figure(figsize=(16,10))
if show1:
pyplot.plot(x, ytrue, label="originał", lw=3)
pyplot.scatter(x,y, s=40)
if n > 0:
predict = h(theta, treg)
pyplot.plot(t, predict, label="reg. lin. n=" + str(n), lw=3, color="red")
print("RMSE: ", J(predict,ttrue))
pyplot.xlim(0,5)
pyplot.legend();
In [2188]:
k = 16
show2 = True
In [2189]:
pyplot.figure(figsize=(16,10))
if show2:
pyplot.plot(x, ytrue, label="originał", lw=3)
pyplot.scatter(x,y, s=40)
if k > 0:
predict = np.array([np.mean(kneighbors(k, i, x, y)) for i in t])
#predict = Kernel(D1,t,x,y,0.1)
pyplot.plot(t, predict, label="k="+str(k), lw=3, color="red")
print("RMSE: ", J(predict,ttrue))
pyplot.xlim(0,5)
pyplot.legend();
In [2174]:
k = 16
In [2175]:
data = np.matrix(np.loadtxt("ex2data2.txt",delimiter=","))
#np.random.shuffle(data)
n = 1
X = powerme(data[:,0], data[:,1],n)
Y = data[:,2]
def rho(a,b):
p = np.sqrt(np.sum(np.power((a-b),2), axis=1))
return p
def kneighbors(k, t, x, y):
d = rho(t, x)
nn = np.argsort(d.ravel()).ravel()[0,:k]
return np.array(y[nn]).reshape(k,).astype(int)
xx, yy = np.meshgrid(np.arange(-1.5, 1.5, 0.02),
np.arange(-1.5, 1.5, 0.02))
l = len(xx.ravel())
C = powerme(xx.reshape(l,1),yy.reshape(l,1),n)
Z = [np.argmax(np.bincount(kneighbors(k,c,X,Y))) for c in C]
pyplot.figure(figsize=(10,10))
pyplot.scatter(X[:,2].tolist(),
X[:,1].tolist(),
c=Y.tolist(),
s=100, cmap=pyplot.cm.get_cmap('prism'));
pyplot.contour(xx, yy, np.array(Z).reshape(np.sqrt(l),np.sqrt(l)), levels=[0.5], lw=3);
pyplot.ylim(-1,1.2);
pyplot.xlim(-1,1.2);
Dyskusja
Dla zbioru $K(x) = \{ (x^{(1)},y^{(1)}), \dots, (x^{(k)},y^{(k)}) \}$ zawierającym $k$ najbliższych sąsiadów $x$ obliczamy:
Przykładowy kernel $D$ (jaka to funkcja?):
$$ D(t) = \frac{1}{\sqrt{2\pi}}e^{-\frac{1}{2}t^2} $$
In [2176]:
xg = np.linspace(-3,3,300)
yg = 1/np.sqrt(2*np.pi)*np.exp(-1./2.*xg**2)
pyplot.figure(figsize=(12,6))
pyplot.plot(xg,yg,lw=3);
In [2183]:
n = 6
k = 16
l = 0.1
show3 = False
In [2184]:
pyplot.figure(figsize=(16,10))
if show3:
pyplot.plot(x, ytrue, label="originał", lw=3)
pyplot.scatter(x,y, s=40)
if k > 0:
predict1 = np.array([np.mean(kneighbors(k, i, x, y)) for i in t])
pyplot.plot(t, predict1, label="k="+str(k), lw=3, color="green")
if l > 0:
predict2 = Kernel(D1,t,x,y,l)
pyplot.plot(t, predict2, label="lambda=" + str(l), lw=3, color="red")
print("RMSE: ", J(predict2,ttrue))
pyplot.xlim(0,5)
pyplot.legend();
Zmiana organizacji przykładów:
In [ ]: