notebook.community

Edit and run



In [1]:

    
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets, linear_model as lm
from sklearn.model_selection import cross_val_predict, cross_val_score



In [2]:

    
boston = datasets.load_boston()

print(boston.DESCR[100:1300])









    



   :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
        - B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
        - LSTAT    % lower status of the population
        - MEDV     Median value of owner-occupied homes in $1000's

    :Missing Attribute Values: None

    :Creator: Harrison, D. and Rubinfel



In [3]:

    
boston_df = pd.DataFrame(boston.data)
boston_df.columns = boston.feature_names
boston_df.head()



In [5]:

    
model = lm.LinearRegression()
prediction = cross_val_predict(model, boston.data, boston.target, cv=5)
score = cross_val_score(model, boston.data, boston.target, cv=5)

print("Accuracy: {:.2f}%".format(score.mean() * 100))









    



Accuracy: 35.07%



In [6]:

    
figure, plot = plt.subplots()

plot.scatter(boston.target, prediction, alpha=0.3, color='yellow')
plot.plot(
    [boston.target.min(), boston.target.max()],
    [boston.target.min(), boston.target.max()],
    'k--', lw=3, color='green'
)

plot.set_xlabel('Measured')
plot.set_ylabel('Prediction')

plt.show()

	CRIM	ZN	INDUS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT
0	0.00632	18.0	2.31	0.538	6.575	65.2	4.0900	1.0	296.0	15.3	396.90	4.98
1	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2.0	242.0	17.8	396.90	9.14
2	0.02729	0.0	7.07	0.469	7.185	61.1	4.9671	2.0	242.0	17.8	392.83	4.03
3	0.03237	0.0	2.18	0.458	6.998	45.8	6.0622	3.0	222.0	18.7	394.63	2.94
4	0.06905	0.0	2.18	0.458	7.147	54.2	6.0622	3.0	222.0	18.7	396.90	5.33