虽然名字是回归,但是大多时间都用用来做分类。已知样本${x_i,y_i} \ \ i = 1,2,3,..,N$,模型函数定义如下
$$z_i = \frac{1}{1+e^{-\theta x_i}}$$
其中 $\theta$ 是模型参数
对于二分类问题,采用交叉熵损失函数
$$
L = -\frac{1}{N}\sum_{i=1}^{N}[y_i log z_i + (1-y_i) log (1-z_i)]
$$
损失函数对参数求导
$$
\frac{\partial L}{\partial \theta_j} = -\frac{1}{N}\sum_{i=1}^{N}[\frac{y_i}{z_i} \frac{\partial z_i}{\partial \theta_j} - \frac{1-y_i}{1-z_i} \frac{\partial z_i}{\partial \theta_j} ]
$$
其中
$$
\frac{\partial z_i}{\partial \theta_j} = x_{ij} z_i (z_i - 1)
$$
带入得到
$$
\frac{\partial L}{\partial \theta_j} = -\frac{1}{N}\sum_{i=1}^{N}x_{ij}[z_i - y_i]
$$
In [1]:
import cv2
import sys,os
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
#from sklearn.decomposition import PCA
sample_size = (64//8,64//8)
smallset_size = 500 #每类下采样,方便调试
labels_in_use = set([1,0]) #二分类
num_pcs = 30 #
flag_debug = True
In [2]:
def load_mnist(num_per_class, dataset_root="C:/dataset/mnist/",resize=sample_size):
data_pairs = []
labeldict = {}
ds_root = os.path.join(dataset_root,'train')
for rdir, pdirs, names in os.walk(ds_root):
for name in names:
basename,ext = os.path.splitext(name)
if ext != ".jpg":
continue
fullpath = os.path.join(rdir,name)
label = fullpath.split('\\')[-2]
label = int(label)
if label not in labels_in_use:
continue
if num_per_class > 0 and ( label in labeldict.keys() ) and labeldict[label] >= num_per_class:
continue
data_pairs.append((label,fullpath))
if label in labeldict:
labeldict[label] += 1
else:
labeldict[label] = 1
data = np.zeros((resize[0]*resize[1],len(data_pairs)))
labels = np.zeros(len(data_pairs))
for col,(label, path) in enumerate(data_pairs):
img = cv2.imread(path,0)
img = cv2.resize(img,resize)
img = (img / 255.0).flatten()
data[:,col] = img
labels[col] = label
return (data,labels)
X,Y = load_mnist(smallset_size)
#if num_pcs > 0:
# pca = PCA(num_pcs)
# X = pca.fit_transform(X)
In [3]:
def update_theta(theta, dtheta, lr):
theta -= lr * dtheta
return theta
def calc_logistic_regression(X,theta):
theta_tile = np.tile( np.reshape(theta,(-1,1)), (1, X.shape[1]) )
val = (theta_tile * X).sum(axis=0)
scores = np.reshape( 1 / (1 + np.exp(-val)), X.shape[1])
return scores
def calc_loss(X,Y,theta,eps = 0.000):
Z = calc_logistic_regression(X,theta)
lgZ, log1sZ = np.log(Z+eps), np.log(1-Z+eps)
loss = -(Y * lgZ + (1-Y)*log1sZ).mean()
return loss
def calc_gradient(X,Y,theta):
N = len(Y)
Z = calc_logistic_regression(X,theta)
A = np.reshape(Z-Y, (1,-1))
grad = (X * np.tile(A, (X.shape[0],1))).mean(axis=1)
return grad
def predict(X,theta,th = 0.5):
Z = calc_logistic_regression(X,theta)
Ybar = Z >= th
return Ybar.astype(int)
In [4]:
lr = 0.01
theta = np.random.normal(0,0.1,X.shape[0])
loss_rec, recall_rec = [], []
for epoch in range(100):
grad = calc_gradient(X,Y,theta)
theta = update_theta(theta,grad,lr)
Ybar = predict(X,theta)
hit = (Ybar == Y).sum()
loss = calc_loss(X,Y,theta)
loss_rec.append(loss)
recall_rec.append(hit/X.shape[1])
#print('epoch {} recalling {} loss {}'.format(epoch,hit / X.shape[1],loss))
plt.plot(range(len(loss_rec)), loss_rec, color='red', label='loss')
plt.plot(range(len(recall_rec)), recall_rec, color='blue',label='rcalling')
plt.legend()
plt.xlabel("epoch")
plt.show()
In [ ]:
In [ ]: