In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
In [3]:
def h(z):
return 1/(1+np.exp(-z))
In [4]:
z = np.arange(-10,10,0.01)
y = h(z)
plt.plot(z,y,linewidth=2.5,)
plt.ylabel(r'$h_\theta (x)$',fontsize=20)
plt.xlabel(r'$\theta^T\dotX$',fontsize=20)
plt.scatter(0,0.5)
plt.annotate(r'$(0,\,0.5)$',xy=(0.2,0.5),fontsize=20)
plt.show()
LR的目标函数(对数损失函数,或者交叉熵)是:
$$\underset{\theta}{minimize}\;L(\theta) = -\underset{i=0}{\overset{m}\sum}y^{(i)}log(h_\theta(x^{(i)}))+(1-y^{(i)})log(1-h_\theta(x^{(i)})),\qquad\text{$y \in\{0,1\}$ }$$
因此目标函数的一次导数和二次导数分别为:
$$L^{'}(\theta) = -\underset{i=0}{\overset{m}\sum}(y^{(i)}-h_\theta(x^{(i)}))x^{(i)}$$$$L^{''}(\theta) = \underset{i=0}{\overset{m}\sum}h_\theta(x^{(i)})(1-h_\theta(x^{(i)}))(x^{(i)})^2 > 0$$
由于目标函数的二次导数恒大于0,因此可知LR的目标函数是一个凸函数。
In [5]:
N = [1,1,2,2]
P = [4,4,5,5]
Y = [1,2,1,2,3,4,3,4]
plt.scatter(N,Y[:4],s=80,marker='x')
plt.scatter(P,Y[4:],s=100,marker='+',color='red')
x=np.arange(2,4,0.01)
x2=np.arange(2,3.2,0.01)
plt.plot(x,-2*x+8,color='green')
plt.plot(x2,0.5*x2)
plt.axis('equal')
plt.annotate(r'$\frac{\theta^TX}{||\theta||}$',
xy=(2.4,0.6),fontsize=20
)
plt.show()
上图中所示的为几何距离。实际上,在LR模型里,如果将y取值设定为-1和1的话,对数损失也可以用如下形式表征
$$P(y|x) = \frac{1}{1+e^{-y\theta^TX}},\qquad\text{$y \in\{-1,1\}$ }$$
目标函数可以表示成如下形式:
$$l(\theta) = -log(p(y|x)) = log(1+e^{-y\theta^TX})$$
而LR里跟距离有关的便是$\theta^TX$,只不过在这里表示成函数距离的形式,即$y\theta^TX$。
SVM的损失函数可以表示如下:
$$\underset{w,b}{minimize}\;L(w,b) = \underset{i=1}{\overset{m}{\sum}}[1-y^{(i)}(w^T\cdot x^{(i)}+b)]_++\lambda||w||^2$$
等式右边第一项便是合页损失。将合页损失与LR的对数损失画在同一坐标系下时如下:
In [6]:
loss = -np.log2(h(z))#这里特别用2为底的对数,主要是为了和SVM进行统一尺度的损失函数比较
plt.plot(z,loss,linewidth=2.5)
plt.plot(z,-z+1,linewidth=2.5,color='red')
plt.plot([1,5],[0,0],linewidth=3.5,color='red')
plt.plot([-5,0],[1,1],linewidth=2.5,color='green')
plt.plot([0,0],[0,1],linewidth=2.5,color='green')
plt.plot([0,5],[0,0],linewidth=2.5,color='green')
plt.title(r'$hinge\;loss\;and\;logistic\;loss$',fontsize=20)
plt.xlim(-5,5)
plt.ylim(0,6)
plt.grid()
plt.scatter(0,1)
plt.annotate(r'$LR$',xy=(-3.5,5.5),fontsize=20)
plt.annotate(r'$SVM$',xy=(-4.8,4),fontsize=20)
plt.annotate(r'$0-1\;LOSS$',xy=(-4.5,1.2),fontsize=20)
plt.annotate(r'$(0,1)$',xy=(0,1.2),fontsize=20)
plt.ylabel(r'$E$',fontsize=20)
plt.xlabel(r'$y\theta^TX$',fontsize=20)
plt.show()
In [7]:
x = np.arange(-4,4,0.01)
for theta in np.arange(1,2,0.2):
plt.plot(x,-np.log2(h(theta*x)),linewidth=3)
plt.annotate(r'$\theta=%.1f$' % theta,xy=(-4,-np.log2(h(theta*-4))),fontsize=15)
plt.plot(x,-np.log2(h(100*x)),linewidth=3)
plt.annotate(r'$\theta=1000$',xy=(0,8),fontsize=20)
plt.ylim(0,11)
plt.grid()
plt.show()
In [8]:
fig = plt.figure()
ax = fig.gca(projection='3d')#Axes3D(fig)
N = [-5,-5,-2,-2]
P = [4,4,8,8]
Y = [-5,-2,-5,-2,4,8,4,8]
ax.scatter(N,Y[:4],s=80)
ax.scatter(P,Y[4:],s=80,color='red')
ax.plot([-1,9],[10,-10],linewidth=3,color='green')
ax.set_autoscalez_on(False)
ax.set_zlim(0,4)
X = np.arange(-10, 10, 0.25)
Y = np.arange(-10, 10, 0.25)
X, Y = np.meshgrid(X, Y)
R = 2*X+Y-8
Z = h(R)
ax.set_xlabel(r'$x_1$',fontsize=20)
ax.set_ylabel(r'$x_2$',fontsize=20)
ax.set_zlabel(r'$h_\theta (x)$',fontsize=20)
ax.text(7,-5,0,r'$\theta^TX=0$',fontsize=15)
ax.text(5,10,1,r'$h_\theta (x)$',fontsize=20,color='red')
ax.text(-10,5,1,r'$1-h_\theta (x)$',fontsize=20,color='blue')
ax.plot_surface(X, Y, Z,alpha=0,rstride=20,cstride=20,color='yellow')
ax.plot_surface(X, Y, 1-Z,alpha=0,rstride=20,cstride=20,color='yellow')
plt.show()