In [10]:
# import
import graphlab as gl
import matplotlib.pyplot as plt
In [11]:
gl.canvas.set_target('ipynb')
%matplotlib inline
In [4]:
# reading the data
data = gl.SFrame.read_csv('data/Philadelphia_Crime_Rate_noNA.csv')
data.head(4)
Out[4]:
In [8]:
# making a scatter plot
data.show(x='CrimeRate', y='HousePrice', view='Scatter Plot')
In [9]:
crime_model = gl.linear_regression.create(data,
features=['CrimeRate'],
target='HousePrice')
In [20]:
plt.plot(data['CrimeRate'], data['HousePrice'], '.',
data['CrimeRate'], crime_model.predict(data), '-')
Out[20]:
In [30]:
data2 = data[data['MilesPhila']!=0.0]
In [31]:
crime_model_noCC = gl.linear_regression.create(data2,
features=['CrimeRate'],
target='HousePrice')
In [32]:
plt.plot(data2['CrimeRate'], data2['HousePrice'], '.',
data2['CrimeRate'], crime_model_noCC.predict(data2), '-')
Out[32]:
In [33]:
print('data: ',crime_model.coefficients)
print('data2: ',crime_model_noCC.coefficients)
In [34]:
crime_model.get('coefficients')
Out[34]:
In [35]:
crime_model_noCC.coefficients
Out[35]:
In [36]:
data_noHighPrice = data2[data2['HousePrice']<350000]
In [41]:
crime_model_noHighPrice = gl.linear_regression.create(data_noHighPrice,
features=['CrimeRate'],
target='HousePrice')
In [42]:
crime_model_noHighPrice.get('coefficients')
Out[42]:
In [43]:
crime_model_noCC.get('coefficients')
Out[43]:
In [ ]: