In [1]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing
from IPython.display import clear_output, Image, display, HTML
###### Do not modify here ######
def strip_consts(graph_def, max_const_size=32):
"""Strip large constant values from graph_def."""
strip_def = tf.GraphDef()
for n0 in graph_def.node:
n = strip_def.node.add()
n.MergeFrom(n0)
if n.op == 'Const':
tensor = n.attr['value'].tensor
size = len(tensor.tensor_content)
if size > max_const_size:
tensor.tensor_content = "<stripped %d bytes>"%size
return strip_def
def show_graph(graph_def, max_const_size=32):
"""Visualize TensorFlow graph."""
if hasattr(graph_def, 'as_graph_def'):
graph_def = graph_def.as_graph_def()
strip_def = graph_def
#strip_def = strip_consts(graph_def, max_const_size=max_const_size)
code = """
<script>
function load() {{
document.getElementById("{id}").pbtxt = {data};
}}
</script>
<link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
<div style="height:600px">
<tf-graph-basic id="{id}"></tf-graph-basic>
</div>
""".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))
iframe = """
<iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
""".format(code.replace('"', '"'))
display(HTML(iframe))
###### Do not modify here ######
###### Implement Data Preprocess here ######
def preprocess_data(X, y):
orignal_mean = y.mean(axis=0)
orignal_std = y.std(axis=0)
upper_bound = orignal_mean + orignal_std * 1.5
lower_bound = orignal_mean - orignal_std * 1.5
filtered_X = []
filtered_y = []
# delete the outlier (mean +- 1.5*std)
for i in range(0, y.shape[0]):
e = y[i]
if lower_bound <= e <= upper_bound:
filtered_X.append(X[i, :])
filtered_y.append(y[i])
return np.asarray(filtered_X), np.asarray(filtered_y)
housing = fetch_california_housing()
num_data = housing.target.shape[0]
print('Num of data:', num_data, '\n')
# split data to training and testing sets
train_num = int(num_data * 0.9)
test_num = num_data - train_num
print('train_num:', train_num)
print('test_num:', test_num)
Train_X_original = housing.data[:train_num]
Train_y_original = housing.target[:train_num]
Test_X = housing.data[train_num:]
Test_y = housing.target[train_num:]
# apply data preprocessing
Train_X, Train_y = preprocess_data(Train_X_original, Train_y_original)
print('Train_X.shape?', Train_X.shape)
print('Train_y.shape?', Train_y.shape)
# add bias
Train_bias = np.full((Train_X.shape[0], 1), 1)
Train_X = np.concatenate((Train_X, Train_bias), axis=1)
Test_bias = np.full((test_num, 1), 1)
Test_X = np.concatenate((Test_X, Test_bias), axis=1)
# reshape Train_Y and Test_Y to matmul (1-D array)
Train_y = Train_y.reshape(-1, 1)
Test_y = Test_y.reshape(-1, 1)
# to make the data to tensor data
Train_X = tf.constant(Train_X)
Test_X = tf.constant(Test_X)
Train_y = tf.constant(Train_y)
Test_y = tf.constant(Test_y)
# weight fomula (5.12)
Weight = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(Train_X, Train_X, transpose_a=True)), Train_X, transpose_b=True), Train_y)
Predict_y = tf.matmul(Test_X, Weight)
error = tf.reduce_mean(tf.abs(tf.div(tf.subtract(Test_y, Predict_y), Test_y)))
###### Implement Data Preprocess here ######
###### Start TF session ######
with tf.Session() as sess:
Weight.eval()
Predict_y.eval()
error.eval()
print('-' * 40)
print('Error Rate:', sess.run(error))
print('-' * 40)
show_graph(tf.get_default_graph().as_graph_def())
###### Start TF session ######
###### Explain the graph ######
# The Formula 5.2
# we have to multiply Train_X and Train_X_transpose
# So we use tf.matmul and set the parameter true to transpose the Train_X
# The second step, we have to use the tf.matrix_inverse to inverse the value of multiplier Train_X and Train_X_transpose
# The third step, we could open up the Matmul[1-3]
# The Matmul_1 present the inverse matrix multiply to Train_X_transpose
# The Matmul_2 present the value of Matmul_1 multiply to Train_y to get the Weight
# The Matmul_3 present the Test_X multiply to Weight to get the Predict_y
# we have to caculate the error rate, so we use the predict_y to subtract the Test_y
# And use the tf.div to get the subtract value divide the Test_y
# Using the tf.abs to get positive value
# Finally, we use tf.mean to caculate average error rate (total_sum/test_y_num)
###### Explain the graph ######
In [ ]: