In [1]:

    
import tensorflow as tf
import numpy as np 
from sklearn.datasets import fetch_california_housing
from IPython.display import clear_output, Image, display, HTML

###### Do not modify here ###### 
def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = graph_def
    #strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))
    
###### Do not modify  here ######

###### Implement Data Preprocess here ######
def preprocess_data(X, y):
    orignal_mean = y.mean(axis=0)
    orignal_std = y.std(axis=0)
    
    upper_bound = orignal_mean + orignal_std * 1.5
    lower_bound = orignal_mean - orignal_std * 1.5
    
    filtered_X = []
    filtered_y = []
    # delete the outlier (mean +- 1.5*std)
    for i in range(0, y.shape[0]):
        e = y[i]
        if lower_bound <= e <= upper_bound:
            filtered_X.append(X[i, :])
            filtered_y.append(y[i])
        
    return np.asarray(filtered_X), np.asarray(filtered_y)

housing = fetch_california_housing()
num_data = housing.target.shape[0]
print('Num of data:', num_data, '\n')

# split data to training and testing sets
train_num = int(num_data * 0.9)
test_num = num_data - train_num
print('train_num:', train_num)
print('test_num:', test_num)

Train_X_original = housing.data[:train_num]
Train_y_original = housing.target[:train_num]

Test_X = housing.data[train_num:]
Test_y = housing.target[train_num:]

# apply data preprocessing
Train_X, Train_y = preprocess_data(Train_X_original, Train_y_original)
print('Train_X.shape?', Train_X.shape)
print('Train_y.shape?', Train_y.shape)

# add bias
Train_bias = np.full((Train_X.shape[0], 1), 1)
Train_X = np.concatenate((Train_X, Train_bias), axis=1)

Test_bias = np.full((test_num, 1), 1)
Test_X = np.concatenate((Test_X, Test_bias), axis=1)

# reshape Train_Y and Test_Y to matmul (1-D array)
Train_y = Train_y.reshape(-1, 1)
Test_y = Test_y.reshape(-1, 1)

# to make the data to tensor data
Train_X = tf.constant(Train_X)
Test_X = tf.constant(Test_X)
Train_y = tf.constant(Train_y)
Test_y = tf.constant(Test_y)

# weight fomula (5.12)
Weight = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(Train_X, Train_X, transpose_a=True)), Train_X, transpose_b=True), Train_y)
Predict_y = tf.matmul(Test_X, Weight)
error = tf.reduce_mean(tf.abs(tf.div(tf.subtract(Test_y, Predict_y), Test_y)))

###### Implement Data Preprocess here ######

###### Start TF session ######
with tf.Session() as sess:
    Weight.eval()
    Predict_y.eval()
    error.eval()
    
    print('-' * 40)
    print('Error Rate:', sess.run(error))
    print('-' * 40)
    show_graph(tf.get_default_graph().as_graph_def())
    
###### Start TF session ######

###### Explain the graph ######
# The Formula 5.2
# we have to multiply Train_X and Train_X_transpose
# So we use tf.matmul and set the parameter true to transpose the Train_X
# The second step, we have to use the tf.matrix_inverse to inverse the value of multiplier Train_X and Train_X_transpose
# The third step, we could open up the Matmul[1-3]
# The Matmul_1 present the inverse matrix multiply to Train_X_transpose
# The Matmul_2 present the value of Matmul_1 multiply to Train_y to get the Weight
# The Matmul_3 present the Test_X multiply to Weight to get the Predict_y
# we have to caculate the error rate, so we use the predict_y to subtract the Test_y
# And use the tf.div to get the subtract value divide the Test_y
# Using the tf.abs to get positive value
# Finally, we use tf.mean to caculate average error rate (total_sum/test_y_num)
###### Explain the graph ######









    



Num of data: 20640 

train_num: 18576
test_num: 2064
Train_X.shape? (16674, 8)
Train_y.shape? (16674,)
----------------------------------------
Error Rate: 0.302088199027
----------------------------------------