In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
In [7]:
# 例1: a+b
a = tf.placeholder(dtype=tf.float32, shape=[2]) # 定义占位符,可以feed满足相应条件的数据
b = tf.placeholder(dtype=tf.float32, shape=[2])
c = a + b
with tf.Session() as sess: # 创建一个会话
print sess.run(c, feed_dict={a:[1.,2.], b:[3.,3.]})
In [28]:
# 例2: 最小值 f(x) = x(1-x)sin(x)
import matplotlib.pylab as plt
%matplotlib inline
x = tf.Variable([1.80], dtype=tf.float32) # 定义变量
#x = tf.Variable([1.7], dtype=tf.float32)
y = x * (1-x) * tf.sin(6.28*x)
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(y) # 使用GD算法求最小值
init = tf.global_variables_initializer() # 变量初始化,很重要!!!
with tf.Session() as sess:
sess.run(init)
x_init, y_init = sess.run([x,y])
for i in range(100):
sess.run(train_op)
x_min,y_min = sess.run([x,y])
# plot
x = np.linspace(-1,3,100)
y = x * (1-x) * np.sin(6.28*x)
plt.plot(x,y,'b-')
plt.plot(x_init,y_init,'bo')
plt.plot(x_min,y_min,'ro')
plt.title("$min_x f(x)=x(x-1)\sin(x)$")
Out[28]:
In [30]:
# 说明: 我们还是使用tinanic数据,见sklearn_titanic.ipynb
import cPickle
with open("../kaggle_titanic/data/train_data","rb") as f:
X_train, y_train = cPickle.load(f)
X_train = X_train.astype(np.float32)
y_train = y_train.reshape((-1,1)).astype(np.float32)
X_tra, X_val, y_tra, y_val = train_test_split(X_train,y_train, test_size=0.25)
LR算法简介:
对样本$\{x^{i}, y^{i}\}(i=1,2,...,N),其中y^i \in \{0,1\}$,LR模型为:
$$h(x) = sigmod(\vec{w} \cdot \vec{x}+b)$$
其中,$sigmod(t) = \frac{1}{1+e^{-t}}$, $\vec{w},b$为参数。其损失函数为
$$L(\vec{w}) = -\sum_{i=0}^{N}(y^i \log h(x^i) + (1-y^i) \log (1-h(x^i)))$$
In [53]:
N_INPUT = 14
MAX_STEP = 1000
def inference(x): # 一般情况下,把正向传播部分到到一起,称之为infercence,如果要修改模型,很多时候修改这部分就可以了
w = tf.Variable(np.random.randn(N_INPUT,1),dtype=tf.float32)
b = tf.Variable([0.], dtype=tf.float32)
h = tf.matmul(x,w) + b # h = x * w + b
return h
x = tf.placeholder(tf.float32, shape=[None, N_INPUT])
y = tf.placeholder(tf.float32,shape=[None, 1])
y_ = inference(x)
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_)
y_pred = tf.cast(tf.greater(y_, 0.5), tf.float32)
correct = tf.equal(y_pred, y)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) # loss is not 1-accuracy
train_op = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)
acc1 = []
with tf.Session() as sess:
init = tf.global_variables_initializer() # 变量初始化,很重要!!!
sess.run(init)
for i in range(MAX_STEP):
_, acc_tra = sess.run([train_op,accuracy],feed_dict={x:X_tra, y:y_tra})
if i % 10 == 0 or i+1 == MAX_STEP:
acc_val = sess.run(accuracy, feed_dict={x:X_val, y:y_val})
acc1.append([i, acc_tra, acc_val])
if i % 100 == 0 or i+1 == MAX_STEP:
print "%d, train accuracy :%.4f, test accuracy: %.4f" % (i, acc_tra, acc_val)
In [54]:
N_INPUT = 14
MAX_STEP = 1000
N_HID = 7
def inference(x):
w1 = tf.Variable(np.random.randn(N_INPUT,N_HID),dtype=tf.float32)
b1 = tf.Variable([0.], dtype=tf.float32)
h1 = tf.nn.tanh(tf.matmul(x,w1) + b1)
w2 = tf.Variable(np.random.randn(N_HID,1),dtype=tf.float32)
b2 = tf.Variable([0.], dtype=tf.float32)
h2 = tf.matmul(h1,w2) + b2
return h2
x = tf.placeholder(tf.float32, shape=[None, N_INPUT])
y = tf.placeholder(tf.float32,shape=[None, 1])
y_ = inference(x)
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_)
y_pred = tf.cast(tf.greater(y_, 0.5), tf.float32)
correct = tf.equal(y_pred, y)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
train_op = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)
acc2 = []
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
for i in range(MAX_STEP):
_, acc_tra = sess.run([train_op,accuracy],feed_dict={x:X_tra, y:y_tra})
if i % 10 == 0 or i+1 == MAX_STEP:
acc_val = sess.run(accuracy, feed_dict={x:X_val, y:y_val})
acc2.append([i, acc_tra, acc_val])
if i % 100 == 0 or i+1 == MAX_STEP:
print "%d, train accuracy :%.4f, test accuracy: %.4f" % (i, acc_tra, acc_val)
In [55]:
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline
In [57]:
acc1 = np.array(acc1)
acc2 = np.array(acc2)
plt.figure(figsize=(12,6))
plt.plot(acc1[:,0],acc1[:,1],'b--')
plt.plot(acc1[:,0],acc1[:,2],'b-')
plt.plot(acc2[:,0],acc2[:,1],'g--')
plt.plot(acc2[:,0],acc2[:,2],'g-')
plt.title("step vs. accuracy")
Out[57]:
说明,通过复杂化模型,对效果没有提升,反而引入过过拟合(样本数目过少),所以我们只能做更加合理的特征。
In [ ]: