In [42]:
# import
import graphlab as gl
import matplotlib.pyplot as plt
In [43]:
%matplotlib inline
gl.canvas.set_target('ipynb')
In [44]:
# importing the data
sales = gl.SFrame('data/kc_house_data.gl/')
sales.head(4)
Out[44]:
In [45]:
train_data,test_data = sales.random_split(.8,seed=0)
In [46]:
def simple_linear_regression(input_feature, output):
sig_y = output.sum()
sig_x = input_feature.sum()
sig_xy = (input_feature*output).sum()
sig_x2 = (input_feature*input_feature).sum()
size = len(output)*1.0
slope = (size*sig_xy - sig_y*sig_x )/(size*sig_x2-sig_x*sig_x)
intercept = sig_y/size - slope*sig_x/size
return(intercept, slope)
In [47]:
def get_regression_predictions(input_feature, intercept, slope):
predicted_output = intercept + slope*input_feature
return(predicted_output)
In [48]:
input_feature = train_data['sqft_living']
output = train_data['price']
Using your Slope and Intercept from above, What is the predicted price for a house with 2650 sqft?
In [49]:
intercept, slope = simple_linear_regression(input_feature, output)
print(get_regression_predictions(2650, intercept, slope))
In [50]:
def get_residual_sum_of_squares(input_feature, output, intercept,slope):
predict = get_regression_predictions(input_feature, intercept, slope)
RSS = ((output-predict)**2).sum()
return(RSS)
According to this function and the slope and intercept What is the RSS for the simple linear regression using squarefeet to predict prices on TRAINING data?
In [51]:
print(get_residual_sum_of_squares(input_feature, output, intercept,slope))
In [52]:
def inverse_regression_predictions(output, intercept, slope):
estimated_input=1.0*(output - intercept)/slope
return(estimated_input)
According to this function and the regression slope and intercept from (3) what is the estimated square-feet for a house costing $800,000
In [53]:
inverse_regression_predictions(800000, intercept, slope)
Out[53]:
In [54]:
print(intercept, slope)
Which model (square feet or bedrooms) has lowest RSS on TEST data? Think about why this might be the case.
In [55]:
bedroom_slope, bedroom_intercept = simple_linear_regression(train_data['bedrooms'], train_data['price'])
In [56]:
bedroom_RSS = get_residual_sum_of_squares(test_data['bedrooms'], test_data['price'], bedroom_intercept,bedroom_slope)
print(bedroom_RSS)
In [58]:
sqft_slope, sqft_intercept = simple_linear_regression(train_data['sqft_living'], train_data['price'])
sqft_RSS = get_residual_sum_of_squares(test_data['sqft_living'], test_data['price'], intercept, slope)
print(sqft_RSS)
In [ ]: