In [1]:
    
import graphlab
    
    
In [2]:
    
sales = graphlab.SFrame('home_data.gl/')
    
    
    
In [42]:
    
sales
    
    Out[42]:
In [4]:
    
graphlab.canvas.set_target('ipynb')
sales.show(view="Scatter Plot", x="sqft_living", y="price")
    
    
In [43]:
    
seed = 0
train_data, test_data = sales.random_split(0.80, seed=seed)
    
In [44]:
    
sqft_model = graphlab.linear_regression.create(train_data, target="price", features=['sqft_living'])
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
In [45]:
    
print(test_data['price'].mean())
    
    
In [46]:
    
print(sqft_model.evaluate(test_data))
    
    
In [47]:
    
import matplotlib
matplotlib.use('TkAgg')
    
    
In [48]:
    
import matplotlib.pyplot as plt
%matplotlib inline
    
In [49]:
    
plt.plot(test_data['sqft_living'], test_data['price'], '.',
         test_data['sqft_living'], sqft_model.predict(test_data), '-')
    
    Out[49]:
    
In [50]:
    
sqft_model.get('coefficients')
    
    Out[50]:
In [51]:
    
my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
    
In [52]:
    
sales[my_features].show()
    
    
In [53]:
    
sales.show(view='BoxWhisker Plot', x="zipcode", y="price")
    
    
In [54]:
    
my_features_model = graphlab.linear_regression.create(train_data, target='price', features=my_features)
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
In [22]:
    
print(my_features)
    
    
In [55]:
    
print(sqft_model.evaluate(test_data))
print(my_features_model.evaluate(test_data))
    
    
In [25]:
    
house1 = sales[sales['id']=='5309101200']
    
In [28]:
    
house1
    
    Out[28]:
In [30]:
    
print(house1['price'])
    
    
In [31]:
    
print(sqft_model.predict(house1))
    
    
In [33]:
    
print(my_features_model.predict(house1))
    
    
In [34]:
    
house2 = sales[sales['id'] == '1925069082']
    
In [35]:
    
house2
    
    Out[35]:
In [36]:
    
print(house2['price'])
print(sqft_model.predict(house2))
print(my_features_model.predict(house2))
    
    
In [37]:
    
bill_gates = {
    'bedrooms':[8], 
    'bathrooms':[25], 
    'sqft_living':[50000], 
    'sqft_lot':[225000],
    'floors':[4], 
    'zipcode':['98039'], 
    'condition':[10], 
    'grade':[10],
    'waterfront':[1],
    'view':[4],
    'sqft_above':[37500],
    'sqft_basement':[12500],
    'yr_built':[1994],
    'yr_renovated':[2010],
    'lat':[47.627606],
    'long':[-122.242054],
    'sqft_living15':[5000],
    'sqft_lot15':[40000]
}
    
In [40]:
    
print(sqft_model.predict(graphlab.SFrame(bill_gates)))
print(my_features_model.predict(graphlab.SFrame(bill_gates)))
    
    
In [61]:
    
x = sales[sales['sqft_living'] >= 2000]
y = x[x['sqft_living'] <= 4000]
print sales.num_rows()
print y.num_rows()
print float(y.num_rows()) / sales.num_rows()
    
    
In [63]:
    
advanced_features = [
'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode',
'condition', # condition of house				
'grade', # measure of quality of construction				
'waterfront', # waterfront property				
'view', # type of view				
'sqft_above', # square feet above ground				
'sqft_basement', # square feet in basement				
'yr_built', # the year built				
'yr_renovated', # the year renovated				
'lat', 'long', # the lat-long of the parcel				
'sqft_living15', # average sq.ft. of 15 nearest neighbors 				
'sqft_lot15', # average lot size of 15 nearest neighbors 
]
    
In [65]:
    
advanced_features_model = graphlab.linear_regression.create(train_data, target='price', features=advanced_features)
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
In [66]:
    
print(my_features_model.evaluate(test_data))
print(advanced_features_model.evaluate(test_data))