In [1]:
import numpy as np
import pandas as pd
from numpy.linalg import inv, solve
np.set_printoptions(precision=4, suppress=True)
pd.set_option('float_format', '{:6.4f}'.format)
%matplotlib inline
In [2]:
df = pd.read_csv('../data/demand-supply.csv',
names=['q', 'p', 'x1', 'x2', 'x3', 'x4'], sep=';')
df['const'] = 1
print(df.head())
In [3]:
def ols(Y, X):
beta = solve(X.T * X, X.T * Y)
Yhat = X * beta
ehat = Y - Yhat
Omega = np.diag(np.array(ehat).squeeze()**2)
var = inv(X.T * X) * (X.T * Omega * X) * inv(X.T * X)
se = np.diag(var)**.5
return np.array(beta).squeeze(), np.array(se).squeeze(), Yhat
In [4]:
def twosls(Y, X, Z):
Pz = Z * inv(Z.T * Z) * Z.T
beta = solve(X.T * Pz * X, X.T * Pz * Y)
Yhat = X * beta
ehat = Y - Yhat
Omega = np.diag(np.array(ehat).squeeze()**2)
Qxpx = X.T * Pz * X
var = inv(Qxpx) * (X.T * Pz * Omega * Pz * X) * inv(Qxpx)
se = np.diag(var)**.5
return np.array(beta).squeeze(), np.array(se).squeeze()
In [5]:
def solution(dep='q'):
# The list of variable names
expl = ['p', 'q']
# Remove dependent variable from the list
expl.remove(dep)
# Extract the name from the list
expl = expl[0]
# Define matrix variables
Y = np.matrix(df[dep]).T
X = np.matrix(df[['const', expl, 'x1', 'x2']])
Z = np.matrix(df[['const', 'x1', 'x2', 'x3', 'x4']])
# Print results
print('\nOLS:')
print('Beta =', ols(Y, X)[0])
print('S.e. =', ols(Y, X)[1])
print('2SLS:')
print('Beta =', twosls(Y, X, Z)[0])
print('S.e. =', twosls(Y, X, Z)[1])
#---------
# DWH test
#---------
Y = np.matrix(df[expl]).T
X = np.matrix(df[['const', 'x1', 'x2', 'x3', 'x4']])
# Predicted price
Phat = ols(Y, X)[2]
Y = np.matrix(df[dep]).T
X = np.hstack([np.matrix(df[['const', expl, 'x1', 'x2']]), Phat])
beta_hat, se = ols(Y, X)[:2]
DWH = beta_hat[-1] / se[-1]
print('\nDWH statistic = %.2f' % DWH)
In [6]:
solution('q')
solution('p')