by Milan Van den Heuvel, Ken Bastiaensen, Gonzalo Villa *Advanced Econometrics 2016-2017.
Strict set of GM assumptions:
Small sample properties:
Asymptotic properties:
In [1]:
using Distributions: Normal, TDist, ccdf, fit
using Plots
gc()
In [2]:
# include functions from file
include("functions_lib.jl");
In [3]:
#testing for normality
function JB_test(X)
E_X = mean(X,1)[1]
σ = std(X,1)[1]
n = length(X)
S = sum((X - E_X).^3/σ^3)/n
K = sum((X - E_X).^4/σ^4)/n
return n*(S^2/6 + (K-3)^2/24)
end
Out[3]:
In [4]:
# Note that we can use unicode for identifiers by using latex and tab completion (e.g. \beta+<TAB>)
β₀ = 10
β₁ = 1
β = [β₀, β₁]
σ² = 1
T = 25 # sample size
runs = 10_000 # underscore for readability, doesn't affect number
Out[4]:
In [5]:
# simple implementation
function mc_simple(β, σ², T, runs)
K = length(β)
# simulate X once, deterministically
X = hcat(ones(T), rand(Normal(5, 2), T, K-1)) #concatenation of column of ones for the constant terms and the randomly drawn x's for the beta terms
# variables with mc results
β_mc = zeros(runs, K)
β_var_mc= zeros(runs, K)
# pre-allocate memory to speed up value-allocation process
Xβ = X * β
μ_dist = Normal(0, √σ²)
for run = 1:runs
y = Xβ + rand(μ_dist, T)
result = ols(y, X)
β_mc[run, :] = result.coefs
β_var_mc[run, :] = diag(result.vcv)
end
return β_mc, β, mean(β_mc,1), sqrt(mean(β_var_mc,1)), std(β_mc,1)
end
Out[5]:
In [6]:
function fill_jarque_bera_simple(T)
jb = zeros(1000)
for i = 1:1000
β_mcs, True_β, Est_β, True_σ, Est_σ = mc_simple(β, σ², T, runs)
jb[i] = JB_test(β_mcs[:,1])
i+=1
end
end
Out[6]:
In [7]:
jb = fill_jarque_bera_simple(25)
In [8]:
function JB_prob_mass(jb)
pass = 0
for i = 1:length(jb)
if jb[i] <= 5.99
pass +=1
end
end
return pass/length(jb)
end
Out[8]:
In [9]:
JB_prob_mass(jb)
In [10]:
histogram(jb)
In [11]:
for T = [25, 50, 100, 500]
β_mcs, True_β, Est_β, True_σ, Est_σ = mc_simple(β, σ², T, runs)
println("For sample size: ", T, " True_β: ", True_β," Est_β: ", Est_β, " True_σ: ", True_σ, " Est_σ: ", Est_σ)
end
We now perform a t-test for several null hypothesis for $\beta_1 = 1; 0.9; 0.8$ and this for several sample sizes, we also report the p-values. Because we know that X is normally distributed we do not need to use the simulated t-stats since we know that the t-values will be student-t distributed and can immediately do a standard t-test.
In [12]:
runs = 10_000
for T = [25, 50, 100, 500, 10000]
println("## T = ",T," ##")
for β₁_hyp = [1, 0.9, 0.7, 0.5]
β_mc, _, β_mean, _, β_se = mc_simple(β, σ², T, runs)
K = size(β_mean)[1] #amount of estimated parameters = amount of d.o.f. lost
ttest = (β_mean[2] - β₁_hyp) / β_se[2]
pval = 2 * ccdf(TDist(T-K), abs(ttest)) # what is the change that if you reject a correct null
println("β₁=", β₁_hyp, "; T-test: ", ttest)
println("β₁=", β₁_hyp, "; P-val: ", pval)
end
end
In [13]:
runs = 10_000
t_values = zeros(runs)
for T = [25, 50, 100, 500]
println("## T = ",T," ##")
for β₁_hyp = [1]
β_mc, _, β_mean, _, β_se = mc_simple(β, σ², T, runs)
t_values = (β_mc[:,2] - 1)/β_se[2]
sort!(t_values)
println("For β_hyp = ",β₁_hyp,", critical values: 2,5%: ",t_values[250], "97,5%: ", t_values[9750])
end
end
In [14]:
histogram(t_values)
Out[14]:
In [15]:
function tstat_prob_mass(t_values)
pass = 0
for i = 1:length(t_values)
if abs(t_values[i]) <= 1.96
pass +=1
end
end
return pass/length(t_values)
end
Out[15]:
In [16]:
tstat_prob_mass(t_values)
Out[16]:
From these results we clearly see that under the strict assumptions of Gauss-Markov the OLS estimator for $\beta$ ($\hat{\beta}$) and the estimator for the standard deviation ($\hat{\sigma}$) of this estimator are unbiased and, even for small samples, very close to the true values. The estimated standard deviations can also be seen to decline with increasing sample size, thus the distribution of the estimated parameters grows more peaked around the real values. Since the error terms are pulled from a normal distribution and $\hat{\beta}$ is a weighted sum of these, where the weigting is deterministic, it is itself also normally distributed. This is proven by the Jarque-Bera test.
X now changes over each run, this causes some properties to change.
We know X and $\mu$ are still independent.
OLS estimator is still:
but in small samples no longer necessarily normally distributed and the standard covariance matrix should be interpreted as being conditional on X. In large samples, the estimator will still be normally distributed.
In [17]:
function mc_stoch(β, σ², T, runs)
K = length(β)
# variables with mc results
β_mc = zeros(runs, length(β))
β_var_mc= zeros(runs, length(β))
# pre-allocate
μ_dist = Normal(0, √σ²)
X_dist = Normal(5, 2)
X = ones(T, K)
for run = 1:runs
# simulate inside the loop
X[:, 2:end] = rand(X_dist, T, K-1)
y = X*β + rand(μ_dist, T)
result = ols(y, X)
β_mc[run, :] = result.coefs
β_var_mc[run,:] = diag(result.vcv)
end
return β_mc, β, mean(β_mc,1), sqrt(mean(β_var_mc,1)), std(β_mc,1)
end
Out[17]:
In [18]:
β_mcs2, True_β, Est_β, True_σ, Est_σ = mc_stoch(β, σ², 25, 5000)
histogram(β_mcs2[:,1])
Out[18]:
In [19]:
function fill_jarque_bera_stoch(β, σ², T, runs)
jb = zeros(1000)
for i = 1:1000
β_mcs, True_β, Est_β, True_σ, Est_σ = mc_stoch(β, σ², T, runs)
jb[i] = JB_test(β_mcs[:,1])
i+=1
end
end
Out[19]:
In [20]:
jb = fill_jarque_bera_stoch(β, σ², 25, runs)
In [21]:
JB_prob_mass(jb)
In [22]:
histogram(jb)
In [23]:
runs = 10_000
for T = [25, 50, 100, 500]
println("## T = ",T," ##")
for β₁_hyp = [1, 0.9, 0.7, 0.5]
_, β_mean, _, β_se = mc_stoch(β, σ², T, runs)
K = size(β_mean)[1] #amount of estimated parameters = amount of d.o.f. lost
ttest = (β_mean[2] - β₁_hyp) / β_se[2]
pval = 2 * ccdf(TDist(T-K), abs(ttest)) # what is the change that you reject a correct null
println("β₁=", β₁_hyp, "; T-test: ", ttest)
println("β₁=", β₁_hyp, "; P-val: ", pval)
end
end
In [24]:
runs = 10_000
t_values = zeros(runs)
for T = [25, 50, 100, 500]
println("## T = ",T," ##")
for β₁_hyp = [1]
β_mc, _, β_mean, _, β_se = mc_simple(β, σ², T, runs)
t_values = (β_mc[:,2] - 1)/β_se[2]
sort!(t_values)
println("For β_hyp = ",β₁_hyp,", critical values: 2,5%: ",t_values[250], "97,5%: ", t_values[9750])
end
end
In [25]:
histogram(t_values)
Out[25]:
In [26]:
tstat_prob_mass(t_values)
Out[26]:
The Jarque-Bera test shows that the power of it is lower because it is clear that there is a lot more probability mass beyond the critical value of 5,99 than in the deterministic case. The estimators are still unbiased and consistent.
Introducing lagged dependent variables makes it so that the assumption "X and $\mu$ are independent" has to be relaxed to $E[\mu_t|x_t] = 0$ or thus that the errors are contemporaneously independent with any explanatory variables.
The OLS estimator becomes:
In [27]:
# AR1 MC simulation
function mc_ar1(β, σ², T, runs)
K = length(β)
β₀, β₁ = β
σ = √σ² # = sqrt(σ²)
# variables with mc averages
β_mc = zeros(runs, K)
β_var_mc = zeros(runs, K)
# pre-allocate
y = zeros(T)
X = ones(T, K) # fill second column with y_{t-1}
y₀_dist = Normal(β₀/(1-β₁), sqrt(σ²/(1-β₁^2)))
for run = 1:runs
# simulate y
y₀ = rand(y₀_dist)
y[1] = β₀ + β₁*y₀ + σ*randn()
for t = 2:T
y[t] = β₀ + β₁*y[t-1] + σ*randn()
end
# copy into X
X[1,2] = y₀
X[2:end, 2] = y[1:end-1]
# ols
result = ols(y, X)
β_mc[run,:] = result.coefs
β_var_mc[run,:]= diag(result.vcv)
end
return β, mean(β_mc,1), sqrt(mean(β_var_mc,1)), std(β_mc,1)
end
Out[27]:
In [28]:
β₀, β₁ = 10, 0.1
σ² = 1
T = 1000
runs = 10_000
@time mc_ar1([β₀, β₁], σ², T, runs)
Out[28]:
Let's plot the bias
In [29]:
using Plots
gr();
In [30]:
Ts = vcat(collect(10:10:90), collect(100:25:500))
β₁s = [0, 0.5, 0.9]
β̂ = [mc_ar1([β₀, β₁], σ², T, runs)[2][1] for T in Ts, β₁ in β₁s]
Out[30]:
In [31]:
plot(Ts, β̂, label=string.(β₁s'))
Out[31]:
Given a certain sample size and estimated AR(1) coefficient, you can use the matrix for $\hat\beta$ (or the graph) to estimate the bias for $\beta_1$ (note that reported values are relative to 10).
In [32]:
# around 4s on my laptop
@time mc_simple(β, σ², T, 100_000);
In [33]:
using ProfileView #run `Pkg.add("ProfileView")` if not yet installed
In [34]:
Profile.clear()
@profile mc_simple(β, σ², T, 1_000)
ProfileView.view() #interactive graph with mouse over, scroll and drag
In [35]:
# implementation that factorizes X only once
function mc_fact(β, σ², T, runs)
# simulate X once, deterministically
X = hcat(ones(T), rand(Normal(5, 2), T))
# variables with mc results
β_mc = zeros(runs, length(β))
β_var_mc= zeros(runs) #only keep σ̂²T = dot(μ̂, μ̂) = σ̂²*(T-K) per run
# pre-allocate
Xβ = X * β
μ_dist = Normal(0, √σ²)
x_fact = factorize(X)
XtXinvd= diag(inv(X'*X))
for run = 1:runs
y = Xβ + rand(μ_dist, T)
β̂ = x_fact \ y #factorization already done now
μ̂ = y - X * β̂
σ̂²T = dot(μ̂, μ̂) #put factor /(T-K) outside of loop
β_mc[run, :] = β̂
β_var_mc[run,:] = σ̂²T
end
se_true = std(β_mc, 1)
se_mc = sqrt(mean(β_var_mc) / (T - length(β)) * XtXinvd)
return β, mean(β_mc, 1), se_true, se_mc
end
Out[35]:
In [36]:
# runs in about 2s on my laptop
mc_fact(β, σ², 25, 1) # first run includes JIT compilation
@time mc_fact(β, σ², T, 100_000)
Out[36]: