In [1]:
#importing the graphlab
import graphlab as gl
In [2]:
#importing the input data into graphlab SFrame dataset
crimeData = gl.SFrame.read_csv("Philadelphia_Crime_Rate_noNA.csv")
crimeData
In [ ]:
# setting plot canvas to this IPython notebook
gl.canvas.set_target('ipynb')
# plotting scatter plot
crimeData.show(view="Scatter Plot", x="CrimeRate", y="HousePrice")
In [ ]:
# Calculating Linear Regression model
crimeData_model = gl.linear_regression.create(crimeData, target='HousePrice', features=['CrimeRate'],validation_set=None,verbose=False)
In [ ]:
crimeData_model.coefficients
In [ ]:
#importing matplotlib library for plotting
import matplotlib.pyplot as plt
%matplotlib inline
In [ ]:
plt.plot(crimeData['CrimeRate'],crimeData['HousePrice'],'.',
crimeData['CrimeRate'],crimeData_model.predict(crimeData),'-')
In [ ]:
# Tryting the same linear model withput high influencial point
crimeData_woHI = crimeData[crimeData['HousePrice'] != 96200]
In [ ]:
crimeData_woHI_model = gl.linear_regression.create(crimeData_woHI, target='HousePrice', features=['CrimeRate'],validation_set=None,verbose=False)
In [ ]:
# getting linear model coefficients
crimeData_woHI_model.get('coefficients')
In [ ]:
plt.plot(crimeData_woHI['CrimeRate'],crimeData_woHI['HousePrice'],'.',
crimeData_woHI['CrimeRate'],crimeData_woHI_model.predict(crimeData_woHI),'-')
In [4]:
gl.get_dependencies()
In [ ]: