In [1]:
    
import numpy as np
import statsmodels.api as sm
import pandas as pd
from scipy import stats
pd.options.display.float_format = '{:,.4f}'.format
    
In [2]:
    
X = sm.add_constant(np.random.randn(100,3)*10)
w = np.array([0.25, 0.1, 0.3, -1.5])
y = np.dot(X,w) + np.random.randn(X.shape[0])
print X.shape, y.shape, w.shape
    
    
In [3]:
    
model = sm.OLS(y, X)
res = model.fit()
print res.summary2()
    
    
In [4]:
    
def R2(y, X, coeffs):
    y_hat = np.dot(X, coeffs)
    y_mean = np.mean(y)
    SST = np.sum((y-y_mean)**2)
    SSR = np.sum((y_hat - y_mean)**2)
    SSE = np.sum((y_hat - y)**2)
    #R_squared = SSR / SST
    R_squared = SSE / SST
    return 1- R_squared
    
In [5]:
    
R2(y, X, res.params)
    
    Out[5]:
In [6]:
    
def se_coeff(y, X, coeffs):
    # Reference: https://en.wikipedia.org/wiki/Ordinary_least_squares#Finite_sample_properties
    s2 = np.dot(y, y - np.dot(X, coeffs)) / (X.shape[0] - X.shape[1]) # Calculate S-squared
    XX = np.diag(np.linalg.inv(np.dot(X.T, X))) # Calculate 
    return np.sqrt(s2*XX)
    
In [7]:
    
coeffs = res.params
N, K = X.shape
se = se_coeff(y, X, coeffs)
t = coeffs / se
p = stats.t.sf(np.abs(t), N - K)*2
ci = stats.t.ppf((1 + 0.95)/2, N-K)*se
pd.DataFrame(np.vstack((coeffs, se, t, p, coeffs - ci, coeffs + ci)).T, columns=["coeff", "S.E.", "t", "p-value", "ci-", "ci+"])
    
    Out[7]:
In [ ]: