In [1]:
    
import graphlab
    
In [2]:
    
sf = graphlab.SFrame('home_data.gl/')
    
    
    
In [3]:
    
sales = sf
    
In [4]:
    
sales
    
    Out[4]:
In [5]:
    
sales.show()
    
    
In [6]:
    
graphlab.canvas.set_target('ipynb')
    
In [7]:
    
sales.show(view='Scatter Plot', x='sqft_living', y='price')
    
    
In [9]:
    
train_dataset, test_dataset = sales.random_split(.8, seed=0)
    
In [10]:
    
sqft_model = graphlab.linear_regression.create(train_dataset, target='price', features=['sqft_living'])
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
In [11]:
    
print test_dataset['price'].mean()
    
    
In [12]:
    
print sqft_model.evaluate(test_dataset)
    
    
In [16]:
    
import matplotlib.pyplot as plt
%matplotlib inline
    
In [17]:
    
plt.plot(test_dataset['sqft_living'], test_dataset['price'], '.',
        test_dataset['sqft_living'], sqft_model.predict(test_dataset), '-'
        )
    
    Out[17]:
    
In [19]:
    
sqft_model.get('coefficients')
    
    Out[19]:
In [21]:
    
my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
    
In [22]:
    
sales[my_features].show()
    
    
In [24]:
    
sales.show(view='BoxWhisker Plot', x='zipcode', y='price')
    
    
In [25]:
    
my_features_model = graphlab.linear_regression.create(train_dataset, target='price', features=my_features)
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
In [26]:
    
print sqft_model.evaluate(test_dataset)
print my_features_model.evaluate(test_dataset)
    
    
In [27]:
    
house1 = sales[sales['id'] == '5309101200']
    
In [28]:
    
house1
    
    Out[28]:
In [29]:
    
print house1['price']
    
    
In [30]:
    
print sqft_model.predict(house1)
    
    
In [32]:
    
print my_features_model.predict(house1)
    
    
In [33]:
    
house2 = sales[sales['id'] == '1925069082']
    
In [34]:
    
house2
    
    Out[34]:
In [35]:
    
print house2['price']
    
    
In [36]:
    
print sqft_model.predict(house2)
    
    
In [37]:
    
print my_features_model.predict(house2)
    
    
In [38]:
    
# it was Bill Gates house
    
In [39]:
    
house_temp = sales[['id']=='5309101200']
    
In [40]:
    
house_temp
    
    Out[40]:
In [ ]: