In [1]:
import graphlab
In [2]:
sales = graphlab.SFrame("home_data.gl/")
In [3]:
sales
Out[3]:
In [6]:
graphlab.canvas.set_target('ipynb')
sales.show(view="Scatter Plot", x="sqft_living", y="price")
In [7]:
train_data, test_data = sales.random_split(.8, seed=0)
In [8]:
sqft_model = graphlab.linear_regression.create(train_data, target='price', features=['sqft_living'])
In [10]:
print test_data['price'].mean()
In [13]:
print sqft_model.evaluate(test_data)
In [16]:
import matplotlib.pyplot as plt
%matplotlib inline
In [17]:
plt.plot(test_data['sqft_living'], test_data['price'], '.',
test_data['sqft_living'], sqft_model.predict(test_data), '_')
Out[17]:
In [21]:
sqft_model.get('coefficients')
# sqft_model.list_fields()
Out[21]:
In [26]:
my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
In [27]:
sales[my_features].show()
In [28]:
sales.show(view='BoxWhisker Plot', x='zipcode', y='price')
In [29]:
my_features_model = graphlab.linear_regression.create(train_data, target='price', features=my_features)
In [30]:
print my_features
In [34]:
print my_features_model.evaluate(test_data)
print sqft_model.evaluate(test_data)
In [35]:
house1 = sales[sales['id'] == '5309101200']
In [40]:
print house1
In [42]:
type(house1), type(sales)
Out[42]:
In [43]:
print house1['price']
In [44]:
print sqft_model.predict(house1)
In [45]:
print my_features_model.predict(house1)
In [48]:
house2 = sales[sales['id'] == '1925069082']
In [49]:
print house2
In [50]:
print sqft_model.predict(house2)
In [51]:
print my_features_model.predict(house2)
In [ ]: