In [4]:
# %load ../common_import.py
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn import datasets
In [5]:
from sklearn.model_selection import cross_val_predict
from sklearn import linear_model
In [12]:
lr = linear_model.LinearRegression()
boston = datasets.load_boston()
# 将数据转成 DataFrame 方便查看
target = pd.DataFrame(boston.target)
data = pd.DataFrame(boston.data)
In [14]:
predicted = cross_val_predict(lr, data, target, cv=10)
In [21]:
fig, ax = plt.subplots()
ax.scatter(target, predicted, edgecolors=(0, 0, 0))
ax.plot([target.min(), target.max()], [target.min(), target.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()
In [25]:
target['predicted'] = predicted
In [38]:
target = target.rename(columns={0:'target'})
In [43]:
target['error'] = target['target'] - target['predicted']
target.head()
Out[43]:
In [44]:
fig, ax = plt.subplots()
ax.hist(target['error'])
Out[44]:
In [ ]:
# TODO 数据 normalization