I gathered this data for current real estate listing prices in North Bergen from Zillow. Let's see if we can use it to develop a model for housing costs based on home size.
In [1]:
%matplotlib inline
#Typical imports
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import pandas as pd
# plots on fleek
matplotlib.style.use('ggplot')
In [2]:
# Read the housing data from the csv file into a pandas dataframe
# the names keyword allows us to name the columns,
# while the dtype sets the data type.
df = pd.read_csv('data/nb home sales.csv', names=['Square Feet', 'Price'],
dtype=np.float32)
In [3]:
# Display the dataframe
df
Out[3]:
In [4]:
# Visualize the data as a scatter plot
# with sq. ft. as the independent variable.
df.plot(x='Square Feet', y='Price', kind='scatter')
Out[4]:
It seems a linear model could be appropriate in this case. How can we build it with TensorFlow?
In [5]:
# First we declare our placeholders
x = tf.placeholder(tf.float32, [None, 1])
y_ = tf.placeholder(tf.float32, [None, 1])
# Then our variables
W = tf.Variable(tf.zeros([1,1]))
b = tf.Variable(tf.zeros([1]))
# And now we can make our linear model: y = Wx + b
y = tf.matmul(x, W) + b
# Finally we choose our cost function (SSE in this case)
cost = tf.reduce_sum(tf.square(y_-y))
And here's where all the magic will happen:
In [6]:
# Call tf's gradient descent function with a learning rate and instructions to minimize the cost
learn_rate = .0000000001
train = tf.train.GradientDescentOptimizer(learn_rate).minimize(cost)
# Prepare our data to be read into the training session. The data needs to match the
# shape we specified earlier -- in this case (n, 1) where n is the number of data points.
xdata = np.asarray([[i] for i in df['Square Feet']])
y_data = np.asarray([[i] for i in df['Price']])
# Create a tensorflow session, initialize the variables, and run gradient descent
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(10000):
# This is the actual training step - feed_dict specifies the data to be read into
# the placeholders x and y_ respectively.
sess.run(train, feed_dict={x:xdata, y_:y_data})
# Convert our variables from tensors to scalars so we can use them outside tf
price_sqft = np.asscalar(sess.run(W))
cost_0 = np.asscalar(sess.run(b))
print("Model: y = %sx + %s" % (round(price_sqft,2), round(cost_0,2)))
In [7]:
# Create the empty plot
fig, axes = plt.subplots()
# Draw the scatter plot on the axes we just created
df.plot(x='Square Feet', y='Price', kind='scatter', ax=axes)
# Create a range of x values to plug into our model
sqft = np.arange(500, 3000, 1)
# Plot the model
plt.plot(sqft, price_sqft*sqft + cost_0)
plt.show()