In [57]:

    
import pandas as pd
import numpy as np
import math

Load train and test dataset



In [58]:

    
dtype_dict = {'bathrooms':float, 'waterfront':int, 'sqft_above':int, 'sqft_living15':float, 'grade':int, 'yr_renovated':int, 'price':float, 'bedrooms':float, 'zipcode':str, 'long':float, 'sqft_lot15':float, 'sqft_living':float, 'floors':str, 'condition':int, 'lat':float, 'date':str, 'sqft_basement':int, 'yr_built':int, 'id':str, 'sqft_lot':int, 'view':int}



In [59]:

    
train_data = pd.read_csv('kc_house_train_data.csv', dtype=dtype_dict)
test_data = pd.read_csv('kc_house_test_data.csv', dtype=dtype_dict)

Define functions for finding optimums



In [60]:

    
def get_numpy_data(dataset, features, output_name):
    dataset['constant'] = 1
    features = ['constant'] + features
    return (dataset[features], dataset[output_name])



In [61]:

    
def predict_output(feature_matrix, weights):
    return feature_matrix.dot(weights)



In [62]:

    
def regression_gradient_descent_bulk(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False
    weights = np.array(initial_weights)
    steps = 0
    while not converged:
        steps += 1
        gradient_sum_squares = 0
        old_weights = weights
        delta = (output.values - feature_matrix.values.dot(old_weights)).T.dot(feature_matrix.values)
        weights = old_weights + step_size * delta
        gradient_sum_squares = delta.T.dot(delta)
        gradient_magnitude = np.sqrt(gradient_sum_squares)
        if gradient_magnitude < tolerance or steps > 1000:
            converged = True
    return weights



In [63]:

    
def feature_derivative(errors, feature):
    derivative = 2 * errors.dot(feature)
    return derivative



In [64]:

    
(example_features, example_output) = get_numpy_data(train_data, ['sqft_living'], 'price') 
my_weights = np.array([0., 0.]) # this makes all the predictions 0
test_predictions = predict_output(example_features, my_weights) 
# just like SFrames 2 numpy arrays can be elementwise subtracted with '-': 
errors = test_predictions - example_output # prediction errors in this case is just the -example_output
feature = example_features.values[:,0] # let's compute the derivative with respect to 'constant', the ":" indicates "all rows"
derivative = feature_derivative(errors, feature)
print(derivative)
print(-np.sum(example_output)*2) # should be the same as derivative









    



-18752698920.0
-18752698920.0



In [103]:

    
def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False
    weights = np.array(initial_weights)
    step = 0
    while not converged:
        step += 1
        old_weights = weights
        # compute the predictions based on feature_matrix and weights:
        predictions = feature_matrix.dot(old_weights)
        # compute the errors as predictions - output:
        errors = predictions - output        
        gradient_sum_squares = 0 # initialize the gradient
        # while not converged, update each weight individually:
        for i in range(len(weights)):
            # Recall that feature_matrix[:, i] is the feature column associated with weights[i]
            # compute the derivative for weight[i]:
            derivative = feature_derivative(errors, feature_matrix.values[:, i])
            #print('derivative:  %s' % derivative)
            # add the squared derivative to the gradient magnitude
            gradient_sum_squares += derivative**2
            #print('gradient_sum_squares: %s' % gradient_sum_squares)
            # update the weight based on step size and derivative:
            weights[i] = old_weights[i] - step_size * derivative
        gradient_magnitude = math.sqrt(gradient_sum_squares)
        #print('gradient_magnitude: %s' % gradient_magnitude)
        if gradient_magnitude < tolerance or step > 100000:
            converged = True
    return weights



In [104]:

    
simple_features = ['sqft_living']
my_output= 'price'
(simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7



In [105]:

    
weights = regression_gradient_descent(simple_feature_matrix, output, initial_weights, step_size, tolerance)
weights









    Out[105]:





array([-46999.88716555,    281.91211918])

Computing simple feature predictions for test data



In [106]:

    
(test_simple_feature_matrix, test_output) = get_numpy_data(test_data, simple_features, my_output)
test_weights = regression_gradient_descent(test_simple_feature_matrix, test_output, initial_weights, step_size, tolerance)



In [107]:

    
test_data['prediction'] = test_simple_feature_matrix.dot(test_weights)



In [108]:

    
model1_prediction = test_data.iloc[[0]]['prediction']
model1_prediction









    Out[108]:





0    356774.140275
Name: prediction, dtype: float64

Coputing RSS for test data



In [109]:

    
RSS = sum((test_data['price'] - test_data['prediction'])**2)



In [110]:

    
RSS









    Out[110]:





275395691278133.28

Build models with more then one predictor variable



In [111]:

    
model_features = ['sqft_living', 'sqft_living15']
my_output = 'price'
(feature_matrix, output) = get_numpy_data(train_data, model_features,my_output)
initial_weights = np.array([-100000., 1., 1.])
step_size = 4e-12
tolerance = 1e9



In [112]:

    
weights = regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance)
weights









    Out[112]:





array([ -9.99999605e+04,   2.45073907e+02,   6.52781074e+01])



In [113]:

    
test_data['prediction_2'] = feature_matrix.dot(weights)



In [114]:

    
model2_prediction = test_data.iloc[[0]]['prediction_2']
model2_prediction









    Out[114]:





0    276659.913727
Name: prediction_2, dtype: float64

Actual price for first house:



In [115]:

    
actual_price = test_data.iloc[[0]]['price']



In [116]:

    
abs(actual_price-model1_prediction) < abs(actual_price - model2_prediction)









    Out[116]:





0    False
dtype: bool

Computing RSS for second model



In [88]:

    
RSS_2 = sum((test_data['price'] - test_data['prediction_2'])**2)



In [89]:

    
RSS_2









    Out[89]:





836745380608789.88



In [90]:

    
RSS < RSS_2









    Out[90]:





True



In [ ]:



In [ ]: