In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf


/opt/conda/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6
  return f(*args, **kwds)

In [15]:
dataframe = pd.read_csv('data.csv')
dataframe = dataframe.drop(['index', 'price','sq_price'], axis=1)
dataframe = dataframe[0:10]
print(dataframe)


     area  bathrooms
0  2104.0        3.0
1  1600.0        3.0
2  2400.0        3.0
3  1416.0        2.0
4  3000.0        4.0
5  1985.0        4.0
6  1534.0        3.0
7  1427.0        3.0
8  1380.0        3.0
9  1494.0        3.0

In [21]:
#step 2 - add labels
dataframe.loc[:,('y1')]=[1,1,1,0,0,1,1,1,0,0]
#y2 is a negation of y1, opposite
dataframe.loc[:,('y2')] = dataframe['y1']==0
# turn t/f to 1's and 0's
dataframe.loc[:,('y2')] = dataframe['y2'].astype(int)
print(dataframe)


     area  bathrooms  y1  y2
0  2104.0        3.0   1   0
1  1600.0        3.0   1   0
2  2400.0        3.0   1   0
3  1416.0        2.0   0   1
4  3000.0        4.0   0   1
5  1985.0        4.0   1   0
6  1534.0        3.0   1   0
7  1427.0        3.0   1   0
8  1380.0        3.0   0   1
9  1494.0        3.0   0   1

In [27]:
# prepare the data
# tensor is generic version of vertor and matrices
# convert features into input tensors
inputX = dataframe.loc[:,['area','bathrooms']].as_matrix()
inputY = dataframe.loc[:, ['y1','y2']].as_matrix()
print(inputX)


[[  2.10400000e+03   3.00000000e+00]
 [  1.60000000e+03   3.00000000e+00]
 [  2.40000000e+03   3.00000000e+00]
 [  1.41600000e+03   2.00000000e+00]
 [  3.00000000e+03   4.00000000e+00]
 [  1.98500000e+03   4.00000000e+00]
 [  1.53400000e+03   3.00000000e+00]
 [  1.42700000e+03   3.00000000e+00]
 [  1.38000000e+03   3.00000000e+00]
 [  1.49400000e+03   3.00000000e+00]]

In [28]:
inputY


Out[28]:
array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1]])

In [30]:
# step 4 write out hyper params

learning_rate = 0.000001
training_epochs = 2000
display_steps = 50
n_samples = inputY.size

In [41]:
# step 5 - create computation graph
x = tf.placeholder(tf.float32, [None, 2])

# create weights
# 2x2 float matrix
W = tf.Variable(tf.zeros([2,2]))

# add biases
b = tf.Variable(tf.zeros([2]))

# multiply our weights by our inputs, first calculation
# weights are how we govern how data flows in our computation graph
y_values = tf.add(tf.matmul(x, W), b)

# softmax - activation function
y = tf.nn.softmax(y_values)

# Feed in a matrix of labels
y_ = tf.placeholder(tf.float32, [None,2])

In [43]:
cost = tf.reduce_sum(tf.pow(y_ - y, 2))/(2*n_samples)

optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [52]:
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

for i in range(training_epochs):
    sess.run(optimizer, feed_dict={x: inputX, y_: inputY})
  
    if (i) % display_steps == 0:
        cc = sess.run(cost, feed_dict={x: inputX, y_: inputY})
        print('training step: ', '%04d' % (i), 'cost=',"{:.9f}".format(cc) )
        
print("Optimization finished")
training_cost = sess.run(cost, feed_dict={x: inputX, y_: inputY})
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b))


training step:  0000 cost= 0.121579930
training step:  0050 cost= 0.119997814
training step:  0100 cost= 0.119997784
training step:  0150 cost= 0.119997784
training step:  0200 cost= 0.119997762
training step:  0250 cost= 0.119997762
training step:  0300 cost= 0.119997755
training step:  0350 cost= 0.119997740
training step:  0400 cost= 0.119997740
training step:  0450 cost= 0.119997717
training step:  0500 cost= 0.119997717
training step:  0550 cost= 0.119997695
training step:  0600 cost= 0.119997695
training step:  0650 cost= 0.119997665
training step:  0700 cost= 0.119997665
training step:  0750 cost= 0.119997643
training step:  0800 cost= 0.119997643
training step:  0850 cost= 0.119997635
training step:  0900 cost= 0.119997621
training step:  0950 cost= 0.119997598
training step:  1000 cost= 0.119997598
training step:  1050 cost= 0.119997598
training step:  1100 cost= 0.119997583
training step:  1150 cost= 0.119997576
training step:  1200 cost= 0.119997546
training step:  1250 cost= 0.119997546
training step:  1300 cost= 0.119997546
training step:  1350 cost= 0.119997524
training step:  1400 cost= 0.119997524
training step:  1450 cost= 0.119997501
training step:  1500 cost= 0.119997501
training step:  1550 cost= 0.119997501
training step:  1600 cost= 0.119997479
training step:  1650 cost= 0.119997479
training step:  1700 cost= 0.119997457
training step:  1750 cost= 0.119997442
training step:  1800 cost= 0.119997427
training step:  1850 cost= 0.119997427
training step:  1900 cost= 0.119997405
training step:  1950 cost= 0.119997405
Optimization finished
Training cost= 0.119997 W= [[  1.09816581e-04  -1.09817403e-04]
 [  2.06348905e-05  -2.06348941e-05]] b= [  1.11517249e-06  -1.11517238e-06]

In [53]:
# step 6 run the prediction

sess.run(y, feed_dict={x:inputX})


Out[53]:
array([[ 0.61354446,  0.38645557],
       [ 0.58699107,  0.4130089 ],
       [ 0.62884092,  0.37115908],
       [ 0.57715046,  0.4228496 ],
       [ 0.6590513 ,  0.34094873],
       [ 0.60733908,  0.39266092],
       [ 0.58347243,  0.41652757],
       [ 0.57775003,  0.42225   ],
       [ 0.5752297 ,  0.42477027],
       [ 0.58133572,  0.41866425]], dtype=float32)