In [1]:
    
import graphlab
import numpy as np
    
In [2]:
    
sales = graphlab.SFrame('home_data.gl/')
    
    
    
In [3]:
    
sales
    
    Out[3]:
In [4]:
    
graphlab.canvas.set_target("ipynb")
sales.show(view="Scatter Plot",x='sqft_living',y='price')
    
    
In [5]:
    
train_data,test_data = sales.random_split(.8,seed=0)
    
In [6]:
    
sqft_model = graphlab.linear_regression.create(train_data, target='price',features=["sqft_living"])
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
In [7]:
    
print (test_data['price'].mean())
    
    
In [ ]:
    
print (sqft_model.evaluate(test_data))
    
    
In [ ]:
    
import matplotlib.pyplot as plt
%matplotlib inline
    
In [ ]:
    
plt.plot(test_data['sqft_living'],test_data['price'],'.',
        test_data['sqft_living'],sqft_model.predict(test_data),'-')
    
In [ ]:
    
sqft_model.get('coefficients')
    
In [ ]:
    
myfeatures = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
    
In [ ]:
    
sales[myfeatures].show()
    
In [ ]:
    
sales.show(view='BoxWhisker Plot', x='zipcode',y='price')
    
In [ ]:
    
my_features_model = graphlab.linear_regression.create(train_data, target='price', features=myfeatures)
    
In [ ]:
    
print(myfeatures)
    
In [ ]:
    
print(sqft_model.evaluate(test_data))
print(my_features_model.evaluate(test_data))
    
In [ ]:
    
house1 = sales[sales['id']=='5309101200']
    
In [ ]:
    
house1
    
In [ ]:
    
print(house1['price'])
    
In [ ]:
    
print(sqft_model.predict(house1))
    
In [ ]:
    
print(my_features_model.predict(house1))
    
In [ ]:
    
house2 = sales[sales['id']=='1925069082']
    
In [ ]:
    
house2
    
In [ ]:
    
print(house2['price'])
print(sqft_model.predict(house2))
print(my_features_model.predict(house2))
    
In [ ]:
    
highest_price=sales[sales['zipcode']=='98039']
highest_price.head()
    
In [ ]:
    
np.average(highest_price['price'])
    
In [ ]:
    
sqft_filter = sales[(sales['sqft_living']>2000) & (sales['sqft_living']<4000)]
    
In [ ]:
    
float(sqft_filter.num_rows()) / float(sales.num_rows())
    
In [ ]:
    
advanced_features = [
'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode',
'condition', # condition of house       
'grade', # measure of quality of construction       
'waterfront', # waterfront property       
'view', # type of view        
'sqft_above', # square feet above ground        
'sqft_basement', # square feet in basement        
'yr_built', # the year built        
'yr_renovated', # the year renovated        
'lat', 'long', # the lat-long of the parcel       
'sqft_living15', # average sq.ft. of 15 nearest neighbors         
'sqft_lot15', # average lot size of 15 nearest neighbors 
]
    
In [ ]:
    
print(sqft_model.evaluate(test_data))
    
In [ ]:
    
sqft_model=graphlab.linear_regression.create(train_data,target='price',features=myfeatures)
advanced_model=graphlab.linear_regression.create(train_data, target='price', features=advanced_features)
    
In [ ]:
    
print(sqft_model.evaluate(test_data))
print(advanced_model.evaluate(test_data))
    
In [ ]:
    
plt.plot(test_data['sqft_living'],test_data['price'],'.',
        test_data['sqft_living'],sqft_model.predict(test_data),'-')
    
In [ ]:
    
plt.plot(test_data['sqft_living'],test_data['price'],'.',
        test_data['sqft_living'],advanced_model.predict(test_data),'-')
    
In [ ]:
    
#Finished Week 2