In [1]:
    
import graphlab
    
In [2]:
    
sales = graphlab.SFrame("home_data.gl/")
    
    
In [3]:
    
sales
    
    Out[3]:
In [6]:
    
graphlab.canvas.set_target('ipynb')
sales.show(view="Scatter Plot", x="sqft_living", y="price")
    
    
In [7]:
    
train_data, test_data = sales.random_split(.8, seed=0)
    
In [8]:
    
sqft_model = graphlab.linear_regression.create(train_data, target='price', features=['sqft_living'])
    
    
In [10]:
    
print test_data['price'].mean()
    
    
In [13]:
    
print sqft_model.evaluate(test_data)
    
    
In [16]:
    
import matplotlib.pyplot as plt
%matplotlib inline
    
In [17]:
    
plt.plot(test_data['sqft_living'], test_data['price'], '.',
        test_data['sqft_living'], sqft_model.predict(test_data), '_')
    
    Out[17]:
    
In [21]:
    
sqft_model.get('coefficients')
# sqft_model.list_fields()
    
    Out[21]:
In [26]:
    
my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
    
In [27]:
    
sales[my_features].show()
    
    
In [28]:
    
sales.show(view='BoxWhisker Plot', x='zipcode', y='price')
    
    
In [29]:
    
my_features_model = graphlab.linear_regression.create(train_data, target='price', features=my_features)
    
    
In [30]:
    
print my_features
    
    
In [34]:
    
print my_features_model.evaluate(test_data)
print sqft_model.evaluate(test_data)
    
    
In [35]:
    
house1 = sales[sales['id'] == '5309101200']
    
In [40]:
    
print house1
    
    
In [42]:
    
type(house1), type(sales)
    
    Out[42]:
In [43]:
    
print house1['price']
    
    
In [44]:
    
print sqft_model.predict(house1)
    
    
In [45]:
    
print my_features_model.predict(house1)
    
    
In [48]:
    
house2 = sales[sales['id'] == '1925069082']
    
In [49]:
    
print house2
    
    
In [50]:
    
print sqft_model.predict(house2)
    
    
In [51]:
    
print my_features_model.predict(house2)
    
    
In [ ]: