In [1]:
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
np.set_printoptions(precision=4, suppress=True)
sns.set_context('notebook')
%matplotlib inline
In [2]:
# True parameter
theta = 1.5
# Sample size
n = int(1e2)
# Independent variable, N(0,1)
X = np.random.normal(0, 1, n)
# Error term, N(0,1)
e = np.random.logistic(0, 1, n)
# Sort data for nice plots
X = np.sort(X)
# Unobservable dependent variable
Ys = X * theta + e
# Generate observable binary variable
Y = np.zeros_like(Ys)
Y[Ys > 0] = 1
In [3]:
plt.figure(figsize=(16, 8))
# Unobservables
plt.subplot(2, 1, 1)
plt.plot(X, X * theta, label='True model')
plt.scatter(X[Ys > 0], Ys[Ys > 0], c='red', label='Unobserved > 0', s=40)
plt.scatter(X[Ys < 0], Ys[Ys < 0], c='blue', label='Unobserved < 0', s=40)
plt.ylabel(r'$Y^*$')
plt.xlabel(r'$X$')
plt.legend()
# Observables
plt.subplot(2, 1, 2)
plt.scatter(X, Y, c=[], s=40, lw=2)
plt.ylabel(r'$Y$')
plt.xlabel(r'$X$')
plt.show()
In [4]:
import scipy.optimize as opt
from scipy.stats import logistic
# Define objective function
def f(theta, X, Y):
Q = - np.sum(Y * np.log(1e-3 + logistic.cdf(X * theta)) + (1 - Y) * np.log(1e-3 + 1 - logistic.cdf(X * theta)))
return Q
# Run optimization routine
theta_hat = opt.fmin_bfgs(f, 0., args=(X, Y))
print(theta_hat)
In [5]:
# Generate data for objective function plot
th = np.linspace(-3., 3., 1e2)
Q = [f(z, X, Y) for z in th]
# Plot the data
plt.figure(figsize=(8, 4))
plt.plot(th, Q, label='Q')
plt.xlabel(r'$\theta$')
plt.axvline(x=theta_hat, c='red', label='Estimated')
plt.axvline(x=theta, c='black', label='True')
plt.legend()
plt.show()
In [6]:
from scipy.optimize import fsolve
# Define the first order condition
def df(theta, X, Y):
return - np.sum(logistic.pdf(X * theta) * (Y - logistic.cdf(X * theta)))
# Solve FOC
theta_hat = fsolve(df, 0., args=(X, Y))
print(theta_hat)
In [7]:
# Generate data for the plot
th = np.linspace(-3., 3., 1e2)
Q = np.array([df(z, X, Y) for z in th])
# Plot the data
plt.figure(figsize=(8, 4))
plt.plot(th, Q, label='Q')
plt.xlabel(r'$\beta$')
plt.axvline(x=theta_hat, c='red', label='Estimated')
plt.axvline(x=theta, c='black', label='True')
plt.axhline(y=0, c='green')
plt.legend()
plt.show()
In [8]:
plt.figure(figsize=(16, 8))
# Unobservables
plt.subplot(2, 1, 1)
plt.plot(X, X * theta, label='True model')
plt.plot(X, X * theta_hat, label='Fitted model')
plt.scatter(X[Ys > 0], Ys[Ys > 0], c='red', label='Unobserved > 0', s=40)
plt.scatter(X[Ys < 0], Ys[Ys < 0], c='blue', label='Unobserved < 0', s=40)
plt.ylabel(r'$Y^*$')
plt.xlabel(r'$X$')
plt.legend()
# Observables
plt.subplot(2, 1, 2)
plt.scatter(X, Y, c=[], label='Y', s=40)
plt.plot(X, logistic.cdf(X * theta), label=r'$\Phi(X\theta)$')
plt.ylabel(r'$Y$')
plt.xlabel(r'$X$')
plt.legend()
plt.show()