In [1]:
import graphlab as gl
In [2]:
oulu_sales = gl.SFrame.read_csv('datasets/oulu_housing_postcode.csv', delimiter=';')
In [3]:
oulu_sales.head()
Out[3]:
In [4]:
feature1 = ['postcode']
feature2 = ['yearBuilt', 'postcode']
feature3 = ['size', 'yearBuilt', 'postcode']
In [5]:
train_data, test_data = oulu_sales.random_split(0.8, seed=0)
print "all:", len(oulu_sales)
print "train:", len(train_data)
print "test:", len(test_data)
In [6]:
size_model = gl.linear_regression.create(train_data, target='salePrice', features=['size'], validation_set=None)
In [7]:
size_model.evaluate(test_data)
Out[7]:
In [8]:
print test_data['pricePerSqm'].mean()
In [9]:
size_model.get('coefficients')
Out[9]:
In [10]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(test_data['size'], test_data['salePrice'],',',
test_data['size'], size_model.predict(test_data), '-')
plt.title('Size vs Price')
# 1st line = Real prices on test data
# 2nd line = Predicted prices from Model on test data
Out[10]:
In [11]:
gl.canvas.set_target('ipynb')
oulu_sales.show(view='BoxWhisker Plot', x='postcode', y='salePrice')
In [12]:
oulu_sales.show(view='Scatter Plot', x='size', y='salePrice')
In [13]:
f1_model = gl.linear_regression.create(train_data, target='salePrice', features=feature1, validation_set=None)
f2_model = gl.linear_regression.create(train_data, target='salePrice', features=feature2, validation_set=None)
f3_model = gl.linear_regression.create(train_data, target='salePrice', features=feature3, validation_set=None)
In [14]:
print f1_model.evaluate(test_data)
print f2_model.evaluate(test_data)
print f3_model.evaluate(test_data)
In [15]:
f1_model.get('coefficients')
Out[15]:
In [16]:
f2_model.get('coefficients')
Out[16]:
In [17]:
f3_model.get('coefficients')
Out[17]:
In [18]:
oulu_sales['pricePerSqm'].mean()
Out[18]:
In [19]:
test_data.tail()
Out[19]:
In [20]:
house1 = oulu_sales[oulu_sales['id']==156]
house2 = oulu_sales[oulu_sales['id']==1123]
house3 = oulu_sales[oulu_sales['id']==2091]
house4 = oulu_sales[oulu_sales['id']==42908]
house5 = oulu_sales[oulu_sales['id']==40870]
In [21]:
print house1
print house2
print house3
print house4
print house5
In [22]:
print "Prediction:", f3_model.predict(house1[''])
print "Real Price:", house1['salePrice']
print f3_model.evaluate(house1)
print "--------------------"
print "Prediction:", f3_model.predict(house2)
print "Real Price:",house2['salePrice']
print f3_model.evaluate(house2)
print "--------------------"
print "Prediction:", f3_model.predict(house3)
print "Real Price:",house3['salePrice']
print f3_model.evaluate(house3)
print "--------------------"
print "Prediction:", f3_model.predict(house4)
print "Real Price:",house4['salePrice']
print f3_model.evaluate(house4)
print "--------------------"
print "Prediction:", f3_model.predict(house5)
print "Real Price:",house5['salePrice']
print f3_model.evaluate(house5)
In [23]:
year_model = gl.linear_regression.create(train_data, target='salePrice', features=['yearBuilt'], validation_set=None)
In [24]:
year_model.evaluate(test_data)
Out[24]:
In [25]:
year_model.get('coefficients')
Out[25]:
In [26]:
plt.plot(test_data['yearBuilt'], test_data['salePrice'], '.',
test_data['yearBuilt'], year_model.predict(test_data), '-')
Out[26]: