In [1]:
    
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn import linear_model, datasets, metrics, model_selection, feature_selection, preprocessing
from scipy import stats
    
In [2]:
    
boston = datasets.load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = boston.target
    
In [3]:
    
print('shape:', X.shape)
    
    
In [4]:
    
X.describe()
    
    Out[4]:
In [5]:
    
sns.distplot(y)
    
    Out[5]:
    
In [6]:
    
sns.pairplot(X);
    
    
In [7]:
    
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)
sns.distplot(y, ax=ax2)
sns.boxplot(data=y, orient='h', ax=ax1)
    
    Out[7]:
    
In [8]:
    
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=0.7)
print('train samples:', len(X_train))
print('test samples', len(X_test))
    
    
    
In [9]:
    
import warnings
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")
    
In [10]:
    
lr = linear_model.LinearRegression()
lr.fit(X_train, y_train)
    
    Out[10]:
In [11]:
    
print('No coef:', len(lr.coef_))
print('Coefficients: \n', lr.coef_)
    
    
In [12]:
    
predicted = lr.predict(X_test)
    
In [13]:
    
fig, ax = plt.subplots()
ax.scatter(y_test, predicted)
ax.plot([y.min(), y.max()], [y.min(), y.max()], ls='--', color='red')
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
    
    Out[13]:
    
In [14]:
    
residual = (y_test - predicted)
    
In [15]:
    
fig, ax = plt.subplots()
ax.scatter(y_test, residual)
plt.axhline(0, color='red', ls='--')
ax.set_xlabel('y')
ax.set_ylabel('residual')
    
    Out[15]:
    
In [16]:
    
sns.distplot(residual);
    
    
The trainig scores
In [17]:
    
metrics.r2_score(y_train, lr.predict(X_train))
    
    Out[17]:
In [18]:
    
metrics.mean_squared_error(y_train, lr.predict(X_train))
    
    Out[18]:
In [19]:
    
metrics.r2_score(y_test, predicted)
    
    Out[19]:
In [20]:
    
metrics.mean_squared_error(y_test, predicted)
    
    Out[20]:
In [21]:
    
print(lr.intercept_)
print(lr.coef_)