In [1]:
import graphlab
In [2]:
sales = graphlab.SFrame('home_data.gl/')
In [42]:
sales
Out[42]:
In [4]:
graphlab.canvas.set_target('ipynb')
sales.show(view="Scatter Plot", x="sqft_living", y="price")
In [43]:
seed = 0
train_data, test_data = sales.random_split(0.80, seed=seed)
In [44]:
sqft_model = graphlab.linear_regression.create(train_data, target="price", features=['sqft_living'])
In [45]:
print(test_data['price'].mean())
In [46]:
print(sqft_model.evaluate(test_data))
In [47]:
import matplotlib
matplotlib.use('TkAgg')
In [48]:
import matplotlib.pyplot as plt
%matplotlib inline
In [49]:
plt.plot(test_data['sqft_living'], test_data['price'], '.',
test_data['sqft_living'], sqft_model.predict(test_data), '-')
Out[49]:
In [50]:
sqft_model.get('coefficients')
Out[50]:
In [51]:
my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
In [52]:
sales[my_features].show()
In [53]:
sales.show(view='BoxWhisker Plot', x="zipcode", y="price")
In [54]:
my_features_model = graphlab.linear_regression.create(train_data, target='price', features=my_features)
In [22]:
print(my_features)
In [55]:
print(sqft_model.evaluate(test_data))
print(my_features_model.evaluate(test_data))
In [25]:
house1 = sales[sales['id']=='5309101200']
In [28]:
house1
Out[28]:
In [30]:
print(house1['price'])
In [31]:
print(sqft_model.predict(house1))
In [33]:
print(my_features_model.predict(house1))
In [34]:
house2 = sales[sales['id'] == '1925069082']
In [35]:
house2
Out[35]:
In [36]:
print(house2['price'])
print(sqft_model.predict(house2))
print(my_features_model.predict(house2))
In [37]:
bill_gates = {
'bedrooms':[8],
'bathrooms':[25],
'sqft_living':[50000],
'sqft_lot':[225000],
'floors':[4],
'zipcode':['98039'],
'condition':[10],
'grade':[10],
'waterfront':[1],
'view':[4],
'sqft_above':[37500],
'sqft_basement':[12500],
'yr_built':[1994],
'yr_renovated':[2010],
'lat':[47.627606],
'long':[-122.242054],
'sqft_living15':[5000],
'sqft_lot15':[40000]
}
In [40]:
print(sqft_model.predict(graphlab.SFrame(bill_gates)))
print(my_features_model.predict(graphlab.SFrame(bill_gates)))
In [61]:
x = sales[sales['sqft_living'] >= 2000]
y = x[x['sqft_living'] <= 4000]
print sales.num_rows()
print y.num_rows()
print float(y.num_rows()) / sales.num_rows()
In [63]:
advanced_features = [
'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode',
'condition', # condition of house
'grade', # measure of quality of construction
'waterfront', # waterfront property
'view', # type of view
'sqft_above', # square feet above ground
'sqft_basement', # square feet in basement
'yr_built', # the year built
'yr_renovated', # the year renovated
'lat', 'long', # the lat-long of the parcel
'sqft_living15', # average sq.ft. of 15 nearest neighbors
'sqft_lot15', # average lot size of 15 nearest neighbors
]
In [65]:
advanced_features_model = graphlab.linear_regression.create(train_data, target='price', features=advanced_features)
In [66]:
print(my_features_model.evaluate(test_data))
print(advanced_features_model.evaluate(test_data))