In [3]:
import pandas as pd
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import sklearn
%matplotlib inline
from IPython.display import display
plt.rc('font', family='Verdana')
In [15]:
from sklearn.datasets import load_boston
data = load_boston()
df = pd.DataFrame(data.data, columns=data.feature_names)
target = pd.DataFrame(data.target, columns = ["Target"])
In [33]:
from sklearn.preprocessing import scale
df = scale(df)
In [22]:
ps = np.linspace(1.0, 10.0, num=200)
In [34]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)
result = list()
for p in ps:
knr = KNeighborsRegressor(n_neighbors=5, weights='distance')
knr.fit(df, target)
array = cross_val_score(estimator=knr, X=df, y=target, cv=kf, scoring='neg_mean_squared_error')
result.append((p, array.mean()))
print(max(result, key=lambda t: t[1]))