In [1]:
from __future__ import division, print_function
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
%matplotlib inline
In [2]:
DATA_DIR = "../../data"
In [3]:
fdata = open(os.path.join(DATA_DIR, "housing.data"), "rb")
data, prices = [], []
for line in fdata:
cols = [float(x) for x in line.strip().split()]
data.append(np.array(cols[:-1]))
prices.append(cols[-1])
fdata.close()
X = np.array(data)
y = np.array(prices).reshape(-1, 1)
print(X.shape, y.shape)
In [4]:
scaler = StandardScaler()
Xs = scaler.fit_transform(X)
In [5]:
Xtrain, Xtest, ytrain, ytest = train_test_split(Xs, y, train_size=0.9)
print(Xtrain.shape, ytrain.shape, Xtest.shape, ytest.shape)
In [6]:
X = tf.placeholder(tf.float32, [None, 13], name="X")
y = tf.placeholder(tf.float32, [None, 1], name="y")
In [7]:
W = tf.Variable(tf.random_normal([13, 1]), name="W")
b = tf.Variable(tf.random_normal([1, 1]), name="b")
In [8]:
y_ = tf.add(tf.matmul(X, W), b)
In [9]:
loss = tf.reduce_mean(tf.pow(y - y_, 2))
In [10]:
optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)
In [11]:
losses, preds = [], []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
prev_loss, epsilon, num_steps = None, None, 0
while True:
if num_steps % 1000 == 0 and num_steps > 0:
print("# steps: {:d}, loss: {:.3f}, epsilon: {:.5f}"
.format(num_steps, prev_loss, epsilon))
_, loss_val = sess.run([optimizer, loss], feed_dict={
X: Xtrain, y: ytrain})
losses.append(loss_val)
if prev_loss == None:
epsilon = loss_val
else:
epsilon = prev_loss - loss_val
if epsilon < 1e-5:
break
prev_loss = loss_val
num_steps += 1
print("# steps: {:d}, loss: {:.3f}, epsilon: {:.5f}"
.format(num_steps, prev_loss, epsilon))
W_val, b_val = sess.run([W, b])
In [12]:
plt.plot(np.arange(len(losses)), losses)
plt.xlabel("steps")
plt.ylabel("loss")
Out[12]:
In [13]:
ytest_ = np.dot(Xtest, W_val) + b_val
plt.plot(np.arange(ytest.shape[0]), ytest, color="r", label="test")
plt.plot(np.arange(ytest_.shape[0]), ytest_, color="b", label="pred")
plt.legend(loc="best")
plt.ylabel("cases")
plt.xlabel("prices")
Out[13]:
In [ ]: