In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data',
                 header=None,sep="\s+")
df.columns = ["CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS","RAD","TAX","PTRATIO","B","LSTAT","MEDV"]

Validating the model


In [3]:
X = df.iloc[:,:-1].values
y = df["MEDV"].values
slr = LinearRegression()
slr.fit(X,y)


/Users/wileymorgan/.virtualenvs/afit/lib/python2.7/site-packages/scipy/linalg/basic.py:1018: RuntimeWarning: internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver.
  warnings.warn(mesg, RuntimeWarning)
Out[3]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [6]:
from analyzefit import Analysis

In [7]:
an = Analysis(X,y,slr)
an.res_vs_fit()


Loading BokehJS ...

In [ ]: