w2-practice-02--Boston-housing-regression



In [3]:
import pandas as pd
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import sklearn
%matplotlib inline
from IPython.display import display
plt.rc('font', family='Verdana')

In [15]:
from sklearn.datasets import load_boston
data = load_boston()
df = pd.DataFrame(data.data, columns=data.feature_names)
target = pd.DataFrame(data.target, columns = ["Target"])

In [33]:
from sklearn.preprocessing import scale
df = scale(df)

In [22]:
ps = np.linspace(1.0, 10.0, num=200)

In [34]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score

from sklearn.model_selection import KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)

result = list()
for p in ps:
    knr = KNeighborsRegressor(n_neighbors=5, weights='distance')
    knr.fit(df, target)
    array = cross_val_score(estimator=knr, X=df, y=target, cv=kf, scoring='neg_mean_squared_error')
    result.append((p, array.mean()))

print(max(result, key=lambda t: t[1]))


(1.0, -17.336637884259673)