In [1]:
import graphlab
In [4]:
sales = graphlab.SFrame('home_data.gl/')
In [3]:
sales
Out[3]:
In [5]:
graphlab.canvas.set_target('ipynb')
In [6]:
sales.show(view='Scatter Plot',x='sqft_living',y='price')
In [7]:
train_data,test_data = sales.random_split(.8,seed=0)
In [8]:
sqft_model = graphlab.linear_regression.create(train_data,target='price',features=['sqft_living'])
In [21]:
print test_data['price'].mean()
In [22]:
print sqft_model.evaluate(test_data)
In [23]:
import sys
reload(sys)
sys.setdefaultencoding('utf8')
In [24]:
import matplotlib.pyplot as plt
In [25]:
%matplotlib inline
In [26]:
plt.plot(test_data['sqft_living'],test_data['price'],'.',
test_data['sqft_living'],sqft_model.predict(test_data),'-')
Out[26]:
In [27]:
sqft_model.get('coefficients')
Out[27]:
In [28]:
my_features = ['bedrooms','bathrooms','sqft_living','sqft_lot','floors','zipcode']
In [29]:
sales[my_features].show()
In [30]:
sales.show(view='BoxWhisker Plot',x='zipcode',y='price')
my_features_model = graphlab.linear_regression.create(train_data,target='price',features=my_features)
In [32]:
print my_features
In [33]:
print sqft_model.evaluate(test_data)
In [34]:
print my_features_model.evaluate(test_data)
In [35]:
house1 = sales[sales['id']=='5309101200']
In [36]:
house1
Out[36]:
In [37]:
print house1['price']
In [38]:
print sqft_model.predict(house1)
In [39]:
print my_features_model.predict(house1)
In [40]:
house2 = sales[sales['id']=='1925069082']
In [41]:
house2
Out[41]:
In [42]:
print sqft_model.predict(house2)
In [43]:
print my_features_model.predict(house2)
In [ ]: