使用tensorflow重写lr算法


In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

TF简介


In [7]:
# 例1: a+b
a = tf.placeholder(dtype=tf.float32, shape=[2])  # 定义占位符,可以feed满足相应条件的数据
b = tf.placeholder(dtype=tf.float32, shape=[2])
c = a + b
with tf.Session() as sess:  # 创建一个会话
    print sess.run(c, feed_dict={a:[1.,2.], b:[3.,3.]})


[ 4.  5.]

In [28]:
# 例2: 最小值 f(x) = x(1-x)sin(x)
import matplotlib.pylab as plt
%matplotlib inline

x = tf.Variable([1.80], dtype=tf.float32)   # 定义变量
#x = tf.Variable([1.7], dtype=tf.float32)
y = x * (1-x) * tf.sin(6.28*x)
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(y)  # 使用GD算法求最小值
init = tf.global_variables_initializer()   # 变量初始化,很重要!!!
with tf.Session() as sess:
    sess.run(init)
    x_init, y_init = sess.run([x,y])
    for i in range(100):
        sess.run(train_op)
    x_min,y_min = sess.run([x,y])

# plot
x = np.linspace(-1,3,100)
y = x * (1-x) * np.sin(6.28*x)
plt.plot(x,y,'b-')
plt.plot(x_init,y_init,'bo')
plt.plot(x_min,y_min,'ro')
plt.title("$min_x f(x)=x(x-1)\sin(x)$")


Out[28]:
<matplotlib.text.Text at 0x7f41605f1750>

LR算法


In [30]:
# 说明: 我们还是使用tinanic数据,见sklearn_titanic.ipynb
import  cPickle
with open("../kaggle_titanic/data/train_data","rb") as f:
    X_train, y_train = cPickle.load(f)

X_train = X_train.astype(np.float32)
y_train = y_train.reshape((-1,1)).astype(np.float32)

X_tra, X_val, y_tra, y_val = train_test_split(X_train,y_train, test_size=0.25)

LR算法简介:
对样本$\{x^{i}, y^{i}\}(i=1,2,...,N),其中y^i \in \{0,1\}$,LR模型为:
$$h(x) = sigmod(\vec{w} \cdot \vec{x}+b)$$ 其中,$sigmod(t) = \frac{1}{1+e^{-t}}$, $\vec{w},b$为参数。其损失函数为 $$L(\vec{w}) = -\sum_{i=0}^{N}(y^i \log h(x^i) + (1-y^i) \log (1-h(x^i)))$$


In [53]:
N_INPUT = 14
MAX_STEP = 1000

def inference(x):  # 一般情况下,把正向传播部分到到一起,称之为infercence,如果要修改模型,很多时候修改这部分就可以了
    w = tf.Variable(np.random.randn(N_INPUT,1),dtype=tf.float32) 
    b = tf.Variable([0.], dtype=tf.float32)
    h = tf.matmul(x,w) + b                     # h = x * w + b
    return h

x = tf.placeholder(tf.float32, shape=[None, N_INPUT])
y = tf.placeholder(tf.float32,shape=[None, 1])

y_ = inference(x)

loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_) 
y_pred = tf.cast(tf.greater(y_, 0.5), tf.float32)
correct = tf.equal(y_pred, y)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))   # loss is not 1-accuracy

train_op = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)

acc1 = []
with tf.Session() as sess:
    init = tf.global_variables_initializer()  # 变量初始化,很重要!!!
    sess.run(init)
    for i in range(MAX_STEP):
        _, acc_tra = sess.run([train_op,accuracy],feed_dict={x:X_tra, y:y_tra})
        if i % 10 == 0 or i+1 == MAX_STEP:
            acc_val = sess.run(accuracy, feed_dict={x:X_val, y:y_val})
            acc1.append([i, acc_tra, acc_val])
        if i % 100 == 0 or i+1 == MAX_STEP:
            print "%d, train accuracy :%.4f, test accuracy: %.4f" % (i, acc_tra, acc_val)


0, train accuracy :0.3383, test accuracy: 0.6906
100, train accuracy :0.7964, test accuracy: 0.7892
200, train accuracy :0.7395, test accuracy: 0.7982
300, train accuracy :0.7859, test accuracy: 0.6592
400, train accuracy :0.7560, test accuracy: 0.7937
500, train accuracy :0.7036, test accuracy: 0.7937
600, train accuracy :0.7784, test accuracy: 0.6637
700, train accuracy :0.7964, test accuracy: 0.7220
800, train accuracy :0.7635, test accuracy: 0.7444
900, train accuracy :0.7754, test accuracy: 0.7623
999, train accuracy :0.7934, test accuracy: 0.7354

添加一个隐藏层

LR算法是单层的NN,前面分析过,我们使用LR模型是欠拟合的,我们可以通过增加隐藏层复杂化,看看效果如何?


In [54]:
N_INPUT = 14
MAX_STEP = 1000
N_HID = 7

def inference(x):
    w1 = tf.Variable(np.random.randn(N_INPUT,N_HID),dtype=tf.float32)
    b1 = tf.Variable([0.], dtype=tf.float32)
    h1 = tf.nn.tanh(tf.matmul(x,w1) + b1)
    w2 = tf.Variable(np.random.randn(N_HID,1),dtype=tf.float32)
    b2 = tf.Variable([0.], dtype=tf.float32)
    h2 = tf.matmul(h1,w2) + b2
    return h2

x = tf.placeholder(tf.float32, shape=[None, N_INPUT])
y = tf.placeholder(tf.float32,shape=[None, 1])

y_ = inference(x)

loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_)
y_pred = tf.cast(tf.greater(y_, 0.5), tf.float32)
correct = tf.equal(y_pred, y)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

train_op = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)

acc2 = []
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    for i in range(MAX_STEP):
        _, acc_tra = sess.run([train_op,accuracy],feed_dict={x:X_tra, y:y_tra})
        if i % 10 == 0 or i+1 == MAX_STEP:
            acc_val = sess.run(accuracy, feed_dict={x:X_val, y:y_val})
            acc2.append([i, acc_tra, acc_val])
        if i % 100 == 0 or i+1 == MAX_STEP:
            print "%d, train accuracy :%.4f, test accuracy: %.4f" % (i, acc_tra, acc_val)


0, train accuracy :0.5734, test accuracy: 0.6592
100, train accuracy :0.8219, test accuracy: 0.7848
200, train accuracy :0.8219, test accuracy: 0.7758
300, train accuracy :0.8308, test accuracy: 0.8161
400, train accuracy :0.8353, test accuracy: 0.7848
500, train accuracy :0.8398, test accuracy: 0.7803
600, train accuracy :0.8428, test accuracy: 0.7803
700, train accuracy :0.8548, test accuracy: 0.7892
800, train accuracy :0.8458, test accuracy: 0.7937
900, train accuracy :0.8413, test accuracy: 0.7803
999, train accuracy :0.8383, test accuracy: 0.7937

比较


In [55]:
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline

In [57]:
acc1  = np.array(acc1)
acc2  = np.array(acc2)

plt.figure(figsize=(12,6))
plt.plot(acc1[:,0],acc1[:,1],'b--')
plt.plot(acc1[:,0],acc1[:,2],'b-')

plt.plot(acc2[:,0],acc2[:,1],'g--')
plt.plot(acc2[:,0],acc2[:,2],'g-')
plt.title("step vs. accuracy")


Out[57]:
<matplotlib.text.Text at 0x7f41583de490>

说明,通过复杂化模型,对效果没有提升,反而引入过过拟合(样本数目过少),所以我们只能做更加合理的特征。


In [ ]: