In [1]:
import graphlab
In [2]:
sales = graphlab.SFrame('../home_data.gl/')
In [ ]:
sales
In [3]:
graphlab.canvas.set_target('ipynb')
sales.show(view="Scatter Plot", x="sqft_living", y="price")
In [4]:
train_data, test_data = sales.random_split(.8, seed = 0)
In [ ]:
sqft_model = graphlab.linear_regression.create(train_data, target="price", features=['sqft_living'])
In [ ]:
print test_data['price'].mean()
In [ ]:
print sqft_model.evaluate(test_data)
In [ ]:
import matplotlib.pyplot as plt
%matplotlib inline
In [ ]:
plt.plot(test_data['sqft_living'], test_data['price'], '.', test_data['sqft_living'], sqft_model.predict(test_data),'-')
In [5]:
my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
In [ ]:
sales[my_features].show()
In [ ]:
sales.show(view = 'BoxWhisker Plot', x='zipcode', y='price')
In [6]:
my_features_model = graphlab.linear_regression.create(train_data, target='price', features = my_features)
In [ ]:
plt.plot(test_data['sqft_living'],test_data['price'],'.',
test_data['sqft_living'],my_features_model.predict(test_data),'-')
In [ ]:
print my_features
In [ ]:
my_model = graphlab.regression.create(train_data, target='price', features=my_features)
In [ ]:
print my_model.evaluate(test_data)
In [ ]:
sales
In [ ]:
sales.show()
In [ ]:
cost_zip_code = ['price', 'zipcode']
In [ ]:
sales[cost_zip_code].show()
In [ ]:
sales.show()
In [ ]:
price_avg = sales.groupby(['zipcode'], {'price_avg' : graphlab.aggregate.AVG('price')})
In [ ]:
price_avg.sort('price_avg', ascending = False)
In [ ]:
sales
In [ ]:
len(sales[(sales['sqft_living'] > 2000 ) & (sales['sqft_living'] < 4000)])
In [ ]:
len(sales)
In [7]:
advanced_features = [
'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode',
'condition', # condition of house
'grade', # measure of quality of construction
'waterfront', # waterfront property
'view', # type of view
'sqft_above', # square feet above ground
'sqft_basement', # square feet in basement
'yr_built', # the year built
'yr_renovated', # the year renovated
'lat', 'long', # the lat-long of the parcel
'sqft_living15', # average sq.ft. of 15 nearest neighbors
'sqft_lot15', # average lot size of 15 nearest neighbors
]
In [ ]:
print advanced_features
In [8]:
my_advanced_features_model = graphlab.linear_regression.create(train_data, target='price', features = advanced_features)
In [9]:
print my_features_model.evaluate(test_data)
print my_advanced_features_model.evaluate(test_data)
In [10]:
179636.27816359687 - 156791.70994956387
Out[10]:
In [ ]: