本文基于TensorFlow官网的Tutorial写成。输入数据是MNIST,全称是Modified National Institute of Standards and Technology,是一组由这个机构搜集的手写数字扫描文件和每个文件对应标签的数据集,经过一定的修改使其适合机器学习算法读取。这个数据集可以从牛的不行的Yann LeCun教授的网站获取。
本文首先使用sklearn的LogisticRegression()进行训练,得到的参数绘制效果如下(红色表示参数估计结果为负,蓝色表示参数估计结果为正,绿色代表参数估计结果为零):
从图形效果看,我们发现蓝色点组成的轮廓与对应的数字轮廓还是比较接近的。
然后本文使用tensorflow对同样的数据集进行了softmax regression的训练,得到的参数绘制效果如下:
蓝色点组成的轮廓与对应的数字轮廓比较接近。但是对比上下两幅截图,感觉tensorflow的效果更平滑一些。不过从测试集的准确率来看,二者都在92%左右,sklearn稍微好一点。注意,92%的准确率看起来不错,但其实是一个很低的准确率,按照官网教程的说法,应该要感到羞愧。
In [1]:
import gzip
import struct
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
def read_image(file_name):
with gzip.open(file_name, 'rb') as f:
buf = f.read()
index = 0
magic, images, rows, columns = struct.unpack_from('>IIII', buf, index)
index += struct.calcsize('>IIII')
image_size = '>' + str(images * rows * columns) + 'B'
ims = struct.unpack_from(image_size, buf, index)
im_array = np.array(ims).reshape(images, rows, columns)
return im_array
def read_label(file_name):
with gzip.open(file_name, 'rb') as f:
buf = f.read()
index = 0
magic, labels = struct.unpack_from('>II', buf, index)
index += struct.calcsize('>II')
label_size = '>' + str(labels) + 'B'
labels = struct.unpack_from(label_size, buf, index)
label_array = np.array(labels)
return label_array
print("Start processing MNIST handwritten digits data...")
train_x_data = read_image("MNIST_data/train-images-idx3-ubyte.gz")
train_x_data = train_x_data.reshape(train_x_data.shape[0], -1).astype(np.float32)
train_y_data = read_label("MNIST_data/train-labels-idx1-ubyte.gz")
test_x_data = read_image("MNIST_data/t10k-images-idx3-ubyte.gz")
test_x_data = test_x_data.reshape(test_x_data.shape[0], -1).astype(np.float32)
test_y_data = read_label("MNIST_data/t10k-labels-idx1-ubyte.gz")
train_x_minmax = train_x_data / 255
test_x_minmax = test_x_data / 255
# Of course you can also use the utility function to read in MNIST provided by tensorflow
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("MNIST_data/", one_hot=False)
# train_x_minmax = mnist.train.images
# train_y_data = mnist.train.labels
# test_x_minmax = mnist.test.images
# test_y_data = mnist.test.labels
In [2]:
# We evaluate the softmax regression model by sklearn first
eval_sklearn = True
if eval_sklearn:
print("Start evaluating softmax regression model by sklearn")
reg = LogisticRegression(solver="lbfgs", multi_class="multinomial")
reg.fit(train_x_minmax, train_y_data)
# Save coefficients to a text file
np.savetxt("output/coef_softmax_sklearn.txt", reg.coef_, fmt="%.6f")
test_y_predict = reg.predict(test_x_minmax)
print("Accuracy of test set: %f" % accuracy_score(test_y_data, test_y_predict))
In [3]:
eval_tensorflow = True
batch_gradient = False
if eval_tensorflow:
print("Start evaluating softmax regression model by tensorflow...")
# reformat y into one-hot encoding style
lb = preprocessing.LabelBinarizer()
lb.fit(train_y_data)
train_y_data_trans = lb.transform(train_y_data)
test_y_data_trans = lb.transform(test_y_data)
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
V = tf.matmul(x, W) + b
y = tf.nn.softmax(V)
y_ = tf.placeholder(tf.float32, [None, 10])
loss = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
optimizer = tf.train.GradientDescentOptimizer(0.5)
train = optimizer.minimize(loss)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
if batch_gradient:
for step in range(300):
sess.run(train, feed_dict={x: train_x_minmax, y_: train_y_data_trans})
if step % 10 == 0:
print("Batch Gradient Descent processing step %d" % step)
print("Finally we got the estimated results, take such a long time...")
else:
for step in range(1000):
sample_index = np.random.choice(train_x_minmax.shape[0], 100)
batch_xs = train_x_minmax[sample_index, :]
batch_ys = train_y_data_trans[sample_index, :]
sess.run(train, feed_dict={x: batch_xs, y_: batch_ys})
if step % 100 == 0:
print("Stochastic Gradient Descent processing step %d" % step)
# Save coefficients to a text file
np.savetxt('output/coef_softmax_tf.txt', np.transpose(sess.run(W)), fmt='%.6f')
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Accuracy of test set: %f"
% sess.run(accuracy, feed_dict={x: test_x_minmax,
y_: test_y_data_trans}))