Durbin-Wu-Hausman (DWH) test with demand and supply data

Set up the environment


In [1]:
import numpy as np
import pandas as pd

from numpy.linalg import inv, solve

np.set_printoptions(precision=4, suppress=True)
pd.set_option('float_format', '{:6.4f}'.format)

%matplotlib inline

Import the data


In [2]:
df = pd.read_csv('../data/demand-supply.csv',
                 names=['q', 'p', 'x1', 'x2', 'x3', 'x4'], sep=';')
df['const'] = 1

print(df.head())


       q      p     x1  x2     x3     x4  const
0 5.0203 1.0131 5.1466   0 0.9526 1.9106      1
1 4.5993 1.9126 5.1112   0 0.6296 1.7603      1
2 4.7450 0.9333 5.0010   0 0.7878 1.6907      1
3 4.5549 1.6169 5.2120   1 0.8066 1.4489      1
4 5.1902 0.1995 5.1983   0 1.1973 1.8025      1

OLS estimation


In [3]:
def ols(Y, X):
    beta = solve(X.T * X, X.T * Y)
    Yhat = X * beta
    ehat = Y - Yhat
    Omega = np.diag(np.array(ehat).squeeze()**2)
    var = inv(X.T * X) * (X.T * Omega * X) * inv(X.T * X)
    se = np.diag(var)**.5
    return np.array(beta).squeeze(), np.array(se).squeeze(), Yhat

2SLS estimation


In [4]:
def twosls(Y, X, Z):
    Pz = Z * inv(Z.T * Z) * Z.T
    beta = solve(X.T * Pz * X, X.T * Pz * Y)
    Yhat = X * beta
    ehat = Y - Yhat
    Omega = np.diag(np.array(ehat).squeeze()**2)
    Qxpx = X.T * Pz * X
    var = inv(Qxpx) * (X.T * Pz * Omega * Pz * X) * inv(Qxpx)
    se = np.diag(var)**.5
    return np.array(beta).squeeze(), np.array(se).squeeze()

Solution of the problem


In [5]:
def solution(dep='q'):
    # The list of variable names
    expl = ['p', 'q']
    # Remove dependent variable from the list
    expl.remove(dep)
    # Extract the name from the list
    expl = expl[0]
    
    # Define matrix variables
    Y = np.matrix(df[dep]).T
    X = np.matrix(df[['const', expl, 'x1', 'x2']])
    Z = np.matrix(df[['const', 'x1', 'x2', 'x3', 'x4']])
    
    # Print results
    print('\nOLS:')
    print('Beta =', ols(Y, X)[0])
    print('S.e. =', ols(Y, X)[1])
    print('2SLS:')
    print('Beta =', twosls(Y, X, Z)[0])
    print('S.e. =', twosls(Y, X, Z)[1])
    
    #---------
    # DWH test
    #---------
    Y = np.matrix(df[expl]).T
    X = np.matrix(df[['const', 'x1', 'x2', 'x3', 'x4']])
    
    # Predicted price
    Phat = ols(Y, X)[2]

    Y = np.matrix(df[dep]).T
    X = np.hstack([np.matrix(df[['const', expl, 'x1', 'x2']]), Phat])
    beta_hat, se = ols(Y, X)[:2]
    DWH = beta_hat[-1] / se[-1]
    print('\nDWH statistic = %.2f' % DWH)

In [6]:
solution('q')
solution('p')


OLS:
Beta = [ 3.4532 -0.3951  0.383  -0.1975]
S.e. = [ 0.1128  0.0235  0.0195  0.0373]
2SLS:
Beta = [ 3.0616 -0.5936  0.4913 -0.2252]
S.e. = [ 0.1529  0.039   0.0301  0.0448]

DWH statistic = -29.71

OLS:
Beta = [ 5.3087 -1.7203  0.8337 -0.3845]
S.e. = [ 0.5045  0.1084  0.0397  0.0838]
2SLS:
Beta = [ 5.1281 -1.6777  0.8266 -0.3785]
S.e. = [ 0.5275  0.1116  0.0389  0.0825]

DWH statistic = 1.91