Linear Regression with Boston Housing Dataset


In [1]:
from __future__ import division, print_function
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
%matplotlib inline

In [2]:
DATA_DIR = "../../data"

Prepare Data


In [3]:
fdata = open(os.path.join(DATA_DIR, "housing.data"), "rb")
data, prices = [], []
for line in fdata:
    cols = [float(x) for x in line.strip().split()]
    data.append(np.array(cols[:-1]))
    prices.append(cols[-1])
fdata.close()

X = np.array(data)
y = np.array(prices).reshape(-1, 1)
print(X.shape, y.shape)


(506, 13) (506, 1)

In [4]:
scaler = StandardScaler()
Xs = scaler.fit_transform(X)

In [5]:
Xtrain, Xtest, ytrain, ytest = train_test_split(Xs, y, train_size=0.9)
print(Xtrain.shape, ytrain.shape, Xtest.shape, ytest.shape)


(455, 13) (455, 1) (51, 13) (51, 1)

Define Network


In [6]:
X = tf.placeholder(tf.float32, [None, 13], name="X")
y = tf.placeholder(tf.float32, [None, 1], name="y")

In [7]:
W = tf.Variable(tf.random_normal([13, 1]), name="W")
b = tf.Variable(tf.random_normal([1, 1]), name="b")

In [8]:
y_ = tf.add(tf.matmul(X, W), b)

In [9]:
loss = tf.reduce_mean(tf.pow(y - y_, 2))

In [10]:
optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)

Train Network


In [11]:
losses, preds = [], []
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    prev_loss, epsilon, num_steps = None, None, 0
    while True:
        if num_steps % 1000 == 0 and num_steps > 0:
            print("# steps: {:d}, loss: {:.3f}, epsilon: {:.5f}"
                .format(num_steps, prev_loss, epsilon))
        _, loss_val = sess.run([optimizer, loss], feed_dict={
            X: Xtrain, y: ytrain})
        losses.append(loss_val)
        if prev_loss == None:
            epsilon = loss_val
        else:
            epsilon = prev_loss - loss_val
        if epsilon < 1e-5:
            break
        prev_loss = loss_val
        num_steps += 1
    print("# steps: {:d}, loss: {:.3f}, epsilon: {:.5f}"
        .format(num_steps, prev_loss, epsilon))
    W_val, b_val = sess.run([W, b])


# steps: 1000, loss: 500.648, epsilon: 0.05789
# steps: 2000, loss: 448.461, epsilon: 0.04736
# steps: 3000, loss: 404.681, epsilon: 0.04068
# steps: 4000, loss: 366.364, epsilon: 0.03635
# steps: 5000, loss: 331.426, epsilon: 0.03372
# steps: 6000, loss: 298.793, epsilon: 0.03162
# steps: 7000, loss: 268.219, epsilon: 0.02957
# steps: 8000, loss: 239.627, epsilon: 0.02756
# steps: 9000, loss: 212.935, epsilon: 0.02574
# steps: 10000, loss: 188.102, epsilon: 0.02391
# steps: 11000, loss: 165.114, epsilon: 0.02206
# steps: 12000, loss: 143.958, epsilon: 0.02026
# steps: 13000, loss: 124.623, epsilon: 0.01845
# steps: 14000, loss: 107.092, epsilon: 0.01665
# steps: 15000, loss: 91.349, epsilon: 0.01489
# steps: 16000, loss: 77.372, epsilon: 0.01312
# steps: 17000, loss: 65.134, epsilon: 0.01138
# steps: 18000, loss: 54.604, epsilon: 0.00969
# steps: 19000, loss: 45.743, epsilon: 0.00803
# steps: 20000, loss: 38.500, epsilon: 0.00645
# steps: 21000, loss: 32.806, epsilon: 0.00495
# steps: 22000, loss: 28.573, epsilon: 0.00355
# steps: 23000, loss: 25.674, epsilon: 0.00229
# steps: 24000, loss: 23.930, epsilon: 0.00124
# steps: 25000, loss: 23.087, epsilon: 0.00050
# steps: 26000, loss: 22.812, epsilon: 0.00011
# steps: 26784, loss: 22.770, epsilon: 0.00001

In [12]:
plt.plot(np.arange(len(losses)), losses)
plt.xlabel("steps")
plt.ylabel("loss")


Out[12]:
<matplotlib.text.Text at 0x1155b1f50>

Predictions


In [13]:
ytest_ = np.dot(Xtest, W_val) + b_val
plt.plot(np.arange(ytest.shape[0]), ytest, color="r", label="test")
plt.plot(np.arange(ytest_.shape[0]), ytest_, color="b", label="pred")
plt.legend(loc="best")
plt.ylabel("cases")
plt.xlabel("prices")


Out[13]:
<matplotlib.text.Text at 0x1157ddc10>

In [ ]: