In [1]:
import graphlab
import numpy as np
In [2]:
sales = graphlab.SFrame('home_data.gl/')
In [3]:
sales
Out[3]:
In [4]:
graphlab.canvas.set_target("ipynb")
sales.show(view="Scatter Plot",x='sqft_living',y='price')
In [5]:
train_data,test_data = sales.random_split(.8,seed=0)
In [6]:
sqft_model = graphlab.linear_regression.create(train_data, target='price',features=["sqft_living"])
In [7]:
print (test_data['price'].mean())
In [ ]:
print (sqft_model.evaluate(test_data))
In [ ]:
import matplotlib.pyplot as plt
%matplotlib inline
In [ ]:
plt.plot(test_data['sqft_living'],test_data['price'],'.',
test_data['sqft_living'],sqft_model.predict(test_data),'-')
In [ ]:
sqft_model.get('coefficients')
In [ ]:
myfeatures = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
In [ ]:
sales[myfeatures].show()
In [ ]:
sales.show(view='BoxWhisker Plot', x='zipcode',y='price')
In [ ]:
my_features_model = graphlab.linear_regression.create(train_data, target='price', features=myfeatures)
In [ ]:
print(myfeatures)
In [ ]:
print(sqft_model.evaluate(test_data))
print(my_features_model.evaluate(test_data))
In [ ]:
house1 = sales[sales['id']=='5309101200']
In [ ]:
house1
In [ ]:
print(house1['price'])
In [ ]:
print(sqft_model.predict(house1))
In [ ]:
print(my_features_model.predict(house1))
In [ ]:
house2 = sales[sales['id']=='1925069082']
In [ ]:
house2
In [ ]:
print(house2['price'])
print(sqft_model.predict(house2))
print(my_features_model.predict(house2))
In [ ]:
highest_price=sales[sales['zipcode']=='98039']
highest_price.head()
In [ ]:
np.average(highest_price['price'])
In [ ]:
sqft_filter = sales[(sales['sqft_living']>2000) & (sales['sqft_living']<4000)]
In [ ]:
float(sqft_filter.num_rows()) / float(sales.num_rows())
In [ ]:
advanced_features = [
'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode',
'condition', # condition of house
'grade', # measure of quality of construction
'waterfront', # waterfront property
'view', # type of view
'sqft_above', # square feet above ground
'sqft_basement', # square feet in basement
'yr_built', # the year built
'yr_renovated', # the year renovated
'lat', 'long', # the lat-long of the parcel
'sqft_living15', # average sq.ft. of 15 nearest neighbors
'sqft_lot15', # average lot size of 15 nearest neighbors
]
In [ ]:
print(sqft_model.evaluate(test_data))
In [ ]:
sqft_model=graphlab.linear_regression.create(train_data,target='price',features=myfeatures)
advanced_model=graphlab.linear_regression.create(train_data, target='price', features=advanced_features)
In [ ]:
print(sqft_model.evaluate(test_data))
print(advanced_model.evaluate(test_data))
In [ ]:
plt.plot(test_data['sqft_living'],test_data['price'],'.',
test_data['sqft_living'],sqft_model.predict(test_data),'-')
In [ ]:
plt.plot(test_data['sqft_living'],test_data['price'],'.',
test_data['sqft_living'],advanced_model.predict(test_data),'-')
In [ ]:
#Finished Week 2