We are interested in modeling unemployment duration. For that purpose consider the following “hazard function”: $$ \lambda\left(t,X_i\right)=\frac{\gamma_i\alpha t^{\alpha-1}}{1+\gamma_it^{\alpha}}, \quad\gamma_i=\exp\left\{ X_i\beta\right\} ,\quad i=\overline{1,N}, $$ where $\alpha$ and $\beta$ are unknown parameters and $X_{i}$ are explanatory variables. If we define a variable $T_{i}$ as an unemployment duration, then the hazard function can be interpreted using the following definition: $$ \lambda\left(t,X_i\right)=\lim_{h\downarrow 0}\frac{P\left[\left.t\leq T_{i}<t+h\right|T_{i}\geq t,X_{i}\right]}{h}. $$
Given that, we can find the distribution of $\log\left(T_{i}\right)$: \begin{aligned} F\left(\exp\left(t\right)\right)= & 1-\exp\left\{ -\int_{0}^{\exp\left(t\right)}\lambda\left(s\right)ds\right\} \\ = & 1-\exp\left\{ -\int_{0}^{\exp\left(t\right)}\frac{\gamma\alpha s^{\alpha-1}}{1+\gamma s^{\alpha}}ds\right\} \\ = & 1-\exp\left\{ -\int_{0}^{\exp\left(t\right)}\frac{d\left(\gamma s^{\alpha}\right)}{1+\gamma s^{\alpha}}\right\} \\ = & 1-\exp\left\{ -\log\left(1+\gamma\exp\left(\alpha t\right)\right)\right\} \\ = & 1-\frac{1}{1+\gamma\exp\left(\alpha t\right)}\\ = & \frac{\gamma\exp\left(\alpha t\right)}{1+\gamma\exp\left(\alpha t\right)}. \end{aligned} This is logistic distribution.
Alternatively, \begin{aligned}F\left(\exp\left(t\right)\right)= & \left[1+\exp\left(-\alpha t\right)/\gamma\right]^{-1}\\ = & \left[1+\exp\left\{ -\log\left(\gamma\right)-\alpha t\right\} \right]^{-1}\\ = & \left[1+\exp\left\{ -\frac{t+\log\left(\gamma\right)/\alpha}{1/\alpha}\right\} \right]^{-1} \end{aligned} Hence, $$ E\left[\log T\right]=-\frac{\log\left(\gamma\right)}{\alpha} =-\frac{X_{i}\beta}{\alpha},\quad V\left[\log T\right]=\frac{\pi^{2}}{3\alpha^{2}}. $$
Conditional density of $\log T_{i}$ is $$ f\left(Y_{i}\left|X_{i},\theta\right.\right) =s_{i}^{-1}\exp\left\{ -\frac{Y_{i}-\mu_{i}}{s_{i}}\right\} \left(1+\exp\left\{ -\frac{Y_{i}-\mu_{i}}{s_{i}}\right\} \right)^{-2}, $$ where $$ \mu_{i}=-\frac{X_{i}\beta}{\alpha},\quad s_{i}=\frac{\pi}{\sqrt{3}\alpha}. $$ The conditional log-likelihood function is $$ L\left(\theta\right)=\frac{1}{n}\sum_{i=1}^{n}l_{i}\left(\theta\right), $$ where \begin{aligned}l_{i}\left(\theta\right) = & \log f\left(Y_{i}\left|X_{i},\theta\right.\right)\\ = & -\log\left(s_{i}\right)-\frac{Y_{i}-\mu_{i}}{s_{i}}-2\log\left(1+\exp\left\{ -\frac{Y_{i}-\mu_{i}}{s_{i}}\right\} \right).\\ = & \log\left(\pi^{-1}\sqrt{3}\right)+\log\left(\alpha\right)-\pi^{-1}\sqrt{3}\left(\alpha Y_{i}+X_{i}\beta\right)-2\log\left(1+\exp\left\{ -\pi^{-1}\sqrt{3}\left(\alpha Y_{i}+X_{i}\beta\right)\right\} \right). \end{aligned}
The score function is $$ s_{i}\left(\theta\right)=\frac{\partial}{\partial\theta}l_{i}\left(\theta\right) =\left[\frac{\partial}{\partial\alpha}l_{i}\left(\theta\right),\frac{\partial}{\partial\beta^{\prime}}l_{i}\left(\theta\right)\right]. $$ The first term is $$ \frac{\partial}{\partial\alpha}l_{i}\left(\theta\right) =\alpha^{-1}-\pi^{-1}\sqrt{3}Y_{i}+2\pi^{-1}\sqrt{3}Y_{i}\left(1+\gamma_{i}\right)^{-1}, $$ where $$ \gamma_{i}=\exp\left\{ \pi^{-1}\sqrt{3}\left(\alpha Y_{i}+X_{i}\beta\right)\right\} $$ The second is $$ \frac{\partial}{\partial\beta^{\prime}}l_{i}\left(\theta\right) =-\pi^{-1}\sqrt{3}X_{i}+2\pi^{-1}\sqrt{3}X_{i}\left(1+\gamma_{i}\right)^{-1}. $$
The Hessian function is $$ H_{i}\left(\theta\right) =\frac{\partial^{2}}{\partial\theta\partial\theta^{\prime}}l_{i}\left(\theta\right) =\left[\begin{array}{cc} \frac{\partial^{2}}{\partial\alpha^{2}}l_{i}\left(\theta\right) & \frac{\partial^{2}}{\partial\alpha\partial\beta^{\prime}}l_{i}\left(\theta\right)\\ \frac{\partial^{2}}{\partial\alpha\partial\beta}l_{i}\left(\theta\right) & \frac{\partial^{2}}{\partial\beta^{\prime}\partial\beta}l_{i}\left(\theta\right) \end{array}\right]. $$ The first term diagonal term is $$ \frac{\partial^{2}}{\partial\alpha^{2}}l_{i}\left(\theta\right) =-\alpha^{-2}-6\pi^{-2}Y_{i}^{2}\gamma_{i}\left(1+\gamma_{i}\right)^{-2}. $$ The second diagonal term is $$ \frac{\partial^{2}}{\partial\beta\partial\beta^{\prime}}l_{i}\left(\theta\right) =-6\pi^{-2}X_{i}X_{i}^{\prime}\gamma_{i}\left(1+\gamma_{i}\right)^{-2}. $$ The off-diagonal term is $$ \frac{\partial^{2}}{\partial\alpha\partial\beta}l_{i}\left(\theta\right) =-6\pi^{-2}Y_{i}X_{i}\gamma_{i}\left(1+\gamma_{i}\right)^{-2}. $$
In [1]:
import zipfile
import numpy as np
import pandas as pd
from scipy.optimize import minimize, check_grad
np.set_printoptions(precision=2, suppress=True)
pd.set_option('float_format', '{:6.3f}'.format)
%matplotlib inline
In [2]:
zf = zipfile.ZipFile('../data/UnempDur.zip')
# Get the name of the csv file
name = zf.namelist()[0]
# Set up reader
data = zf.open(name)
# Read csv file
raw = pd.read_csv(data).iloc[:, 1:]
print(raw.shape)
print(raw.head())
In [3]:
def dummy(x):
if x == 'yes':
return 1
else:
return 0
raw['ui'] = raw['ui'].apply(dummy)
Y = np.log(raw['spell'].values)
X = raw.drop('spell', axis=1).values
print(Y.shape, X.shape)
In [4]:
class LogisticDuration(object):
"""The class for logistic log-duration model."""
def __init__(self, Y, X):
# Initialize the data
self.Y, self.X = Y, X
# Compute number of observations and explanatory variables.
self.N, self.K = X.shape
# Note that the number of parameters is K+1!
self.K +=1
def pmean(self):
"""Returns (N,) array."""
return -(self.X.dot(self.beta)) / self.alpha
def pvar(self):
"""Returns float."""
return np.pi/3**.5 / self.alpha
def loglikelihood(self, theta):
"""Negative of the log-likelihood function.
Parameters
----------
theta : (K,) array
Parameters of the model
Returns
-------
float
"""
self.alpha, self.beta = theta[0], theta[1:]
normalized = (self.Y - self.pmean()) / self.pvar()
f = -np.log(self.pvar()) - normalized - 2 * np.log(1 + np.exp(-normalized))
return -f.mean()
def gamma(self):
"""Helper function.
Returns
-------
(N,) array
"""
return np.exp(3**.5/np.pi * (self.alpha * Y + self.X.dot(self.beta)))
def dl_da(self):
"""Derivative with respect to alpha.
Returns
-------
(N,) array
"""
return 1/self.alpha - Y * 3**.5/np.pi * (1 - 2 / (1 + self.gamma()))
def dl_db(self):
"""Derivative with respect to beta.
Returns
-------
(K, N) array
"""
return -X.T * 3**.5/np.pi * (1 - 2 / (1 + self.gamma()))
def score(self):
"""Score fucntion of the log-likelihood.
Returns
-------
(K, N) array
"""
return -np.vstack([self.dl_da(), self.dl_db()])
def jac(self, theta):
"""Gradient of the objective function.
Parameters
----------
theta : (K,) array
Parameters of the model
Returns
-------
(K,) array
"""
self.alpha, self.beta = theta[0], theta[1:]
return self.score().mean(1)
def d2l_da2(self):
"""Second derivative with respect to alpha.
Returns
-------
(N,) array
"""
return -1/self.alpha**2 - 6/np.pi**2 * self.Y**2 * self.gamma() / (1 + self.gamma())**2
def d2l_dadb(self):
"""Second derivative with respect to alpha and beta.
Returns
-------
(K, N) array
"""
return - 6/np.pi**2 * self.X.T * self.Y * self.gamma() / (1 + self.gamma())**2
def d2l_db2(self):
"""Second derivative with respect to alpha and beta.
Returns
-------
(N, K, K) array
"""
XX = self.X.T[np.newaxis,:,:] * self.X.T[:,np.newaxis,:]
return - 6/np.pi**2 * XX * self.gamma() / (1 + self.gamma())**2
def hess(self, theta):
"""Hessian of the objective function.
Parameters
----------
theta : (K,) array
Parameters of the model
Returns
-------
(K, K) array
"""
self.alpha, self.beta = theta[0], theta[1:]
mat = np.empty((self.K, self.K))
mat[0, 0] = self.d2l_da2().mean()
mat[1:, 0] = self.d2l_dadb().mean(1)
mat[0, 1:] = mat[1:, 0]
mat[1:, 1:] = self.d2l_db2().mean(-1)
return -mat
def info_matrix(self):
"""Variance of the scores.
Returns
-------
(K, K) array
"""
score = self.score()
return score.dot(score.T) / self.N
def theta_var(self, version):
"""Variance matrix of parameter estimates.
Parameters
----------
version : str
The choice of estimator. Choose among ['scores', 'hess', 'sandwich']
Returns
-------
(K, K) array
"""
if version == 'scores':
return np.linalg.inv(self.info_matrix()) / model.N
elif version == 'hess':
return np.linalg.inv(self.hess(self.theta_hat)) / model.N
elif version == 'sandwich':
Hinv = np.linalg.inv(self.hess(self.theta_hat)) / model.N
return Hinv.dot(self.info_matrix()).dot(Hinv) * model.N
else:
raise Exception("Choose version among ['scores', 'hess', 'sandwich']!")
def std_err(self, version):
"""Standard errors of parameter estimates.
Parameters
----------
version : str
The choice of estimator. Choose among ['scores', 'hess', 'sandwich']
Returns
-------
(K,) array
"""
return np.diag(self.theta_var(version)) ** .5
def t_stats(self, version):
"""T-statistics of parameter estimates.
Parameters
----------
version : str
The choice of estimator. Choose among ['scores', 'hess', 'sandwich']
Returns
-------
(K,) array
"""
return self.theta_hat / self.std_err(version)
In [5]:
model = LogisticDuration(Y, X)
theta_start = np.ones(X.shape[1] + 1)
In [6]:
print('The difference between analytic and numerical gradient =',
check_grad(model.loglikelihood, model.jac, theta_start))
In [7]:
res1 = minimize(model.loglikelihood, theta_start, method='BFGS')
res2 = minimize(model.loglikelihood, theta_start, method='BFGS', jac=model.jac)
res3 = minimize(model.loglikelihood, theta_start, method='dogleg', jac=model.jac, hess=model.hess)
print(res1.x)
print(res2.x)
print(res3.x)
# Check that the results are close
print(np.allclose(res1.x, res2.x, rtol=1e-3))
print(np.allclose(res1.x, res3.x, rtol=1e-1))
In [8]:
%timeit minimize(model.loglikelihood, theta_start, method='BFGS')
%timeit minimize(model.loglikelihood, theta_start, method='BFGS', jac=model.jac)
%timeit minimize(model.loglikelihood, theta_start, method='dogleg', jac=model.jac, hess=model.hess)
In [9]:
model.theta_hat = res1.x
versions = ['scores', 'hess', 'sandwich']
stderr, tstats = [], []
for version in versions:
stderr.append(model.std_err(version))
tstats.append(model.t_stats(version))
stderr = pd.DataFrame(np.vstack(stderr).T, columns=versions)
tstats = pd.DataFrame(np.vstack(tstats).T, columns=versions)
df = pd.concat([stderr, tstats], keys=['stderr', 'tstats'], names=['stat', 'version'], axis=1)
df['theta_hat'] = model.theta_hat
print(df)