In [1]:
IMPORT GRAPHLAB
In [2]:
import graphlab
In [3]:
sales = graphlab.SFrame('home_data.gl')
In [4]:
sales.show(view='BoxWhisker Plot', x ='zipcode', y='price')
In [6]:
filtered_sales = sales[sales['zipcode'] == '98039']
In [7]:
filtered_sales
Out[7]:
In [8]:
print filtered_sales['price'].mean()
In [11]:
filtered_sales = sales[((sales['sqft_living']) > 2000) & ((sales['sqft_living']) <= 4000)]
In [12]:
filtered_sales.num_rows()
Out[12]:
In [13]:
sales.num_rows()
Out[13]:
In [14]:
my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
In [16]:
advanced_features = [
'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode',
'condition', # condition of house
'grade', # measure of quality of construction
'waterfront', # waterfront property
'view', # type of view
'sqft_above', # square feet above ground
'sqft_basement', # square feet in basement
'yr_built', # the year built
'yr_renovated', # the year renovated
'lat', 'long', # the lat-long of the parcel
'sqft_living15', # average sq.ft. of 15 nearest neighbors
'sqft_lot15', # average lot size of 15 nearest neighbors
]
In [18]:
features_train_data, features_test_data = sales.random_split(.8, seed = 0)
In [21]:
my_features_model = graphlab.linear_regression.create(features_train_data, target = 'price', features = my_features)
adv_features_model = graphlab.linear_regression.create(features_train_data, target = 'price', features = advanced_features)
In [22]:
print my_features_model.evaluate(features_train_data)
print adv_features_model.evaluate(features_test_data)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: