In [7]:
#importing the graphlab
import graphlab as gl
In [8]:
#importing the input data into graphlab SFrame dataset
crimeData = gl.SFrame.read_csv("Philadelphia_Crime_Rate_noNA.csv")
crimeData
Out[8]:
In [9]:
# setting plot canvas to this IPython notebook
gl.canvas.set_target('ipynb')
# plotting scatter plot
crimeData.show(view="Scatter Plot", x="CrimeRate", y="HousePrice")
In [10]:
# Calculating Linear Regression model
crimeData_model = gl.linear_regression.create(crimeData, target='HousePrice', features=['CrimeRate'],validation_set=None,verbose=False)
In [11]:
crimeData_model.coefficients
Out[11]:
In [12]:
#importing matplotlib library for plotting
import matplotlib.pyplot as plt
%matplotlib inline
In [15]:
plt.plot(crimeData['CrimeRate'],crimeData['HousePrice'],'.',
crimeData['CrimeRate'],crimeData_model.predict(crimeData),'-')
Out[15]:
In [17]:
# Tryting the same linear model withput high influencial point
crimeData_woHI = crimeData[crimeData['HousePrice'] != 96200]
In [18]:
crimeData_woHI_model = gl.linear_regression.create(crimeData_woHI, target='HousePrice', features=['CrimeRate'],validation_set=None,verbose=False)
In [20]:
# getting linear model coefficients
crimeData_woHI_model.get('coefficients')
Out[20]:
In [22]:
plt.plot(crimeData_woHI['CrimeRate'],crimeData_woHI['HousePrice'],'.',
crimeData_woHI['CrimeRate'],crimeData_woHI_model.predict(crimeData_woHI),'-')
Out[22]:
In [ ]: