In [2]:
#Predicting House Prices
In [11]:
import graphlab
In [6]:
#Load the House Sales Data
In [13]:
sales = graphlab.SFrame('home_data.gl')
In [14]:
sales
Out[14]:
In [15]:
#Exploring the Data for Housing
In [16]:
sales.show(view = "Categorical")
In [18]:
graphlab.canvas.set_target('ipynb')
sales.show(view="Scatter Plot", x = "sqft_living", y = "price")
In [19]:
#Create a regression model
In [21]:
train_data,test_data = sales.random_split(.8, seed = 20154)
In [22]:
#Build the regression model
In [23]:
sqft_model = graphlab.linear_regression.create(train_data, target='price', features = ['sqft_living'])
In [24]:
sqft_model
Out[24]:
In [25]:
#Evaluate the model
In [26]:
print test_data['price'].mean()
In [28]:
print sqft_model.evaluate(test_data)
In [29]:
#Let's show what our prediction's look like
In [38]:
import matplotlib.pyplot as plt
%matplotlib inline
In [ ]:
In [41]:
plt.plot(test_data['sqft_living'],test_data['price'],'.',test_data['sqft_living'],sqft_model.predict(test_data),'-')
Out[41]:
In [42]:
sqft_model.get('coefficients')
Out[42]:
In [46]:
#Explore features in the data
In [48]:
features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
In [49]:
sales[features].show()
In [50]:
sales.show(view='BoxWhisker Plot', x ='zipcode', y='price')
In [51]:
#Build a regression model with more features
In [53]:
my_features_model = graphlab.linear_regression.create(train_data, target = 'price', features = features)
In [55]:
print sqft_model.evaluate(test_data)
print my_features_model.evaluate(test_data)
In [57]:
#Apply learned models to predict houses from dataset
In [59]:
house1 = sales[sales['id'] == '5309101200']
In [63]:
house1['price']
Out[63]:
In [ ]:
In [65]:
print sqft_model.predict(house1)
In [66]:
print my_features_model.predict(house1)
In [68]:
#PRedict House2
In [70]:
house2 = sales[sales['id'] == '1925069082']
In [71]:
house2
Out[71]:
In [72]:
print sqft_model.predict(house2)
print my_features_model.predict(house2)
In [73]:
my_features_model.coefficients
Out[73]:
In [ ]: