In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
import pickle as pkl
import numpy as np
import copy
from statsmodels.sandbox.regression.gmm import IV2SLS
from mc_exploration_functions import *
import statsmodels.api as sm
import seaborn.apionly as sns
import grmpy
In [3]:
model_base = get_model_dict('mc_exploration.grmpy.ini')
model_base['SIMULATION']['source'] = 'mc_data'
df_base = grmpy.simulate('mc_exploration.grmpy.ini')
df_base.head()
Out[3]:
In [4]:
d_treated = (df_base['D'] == 1)
ate = np.mean(df_base['Y1'] - df_base['Y0'])
tt = np.mean(df_base['Y1'].loc[d_treated] - df_base['Y0'].loc[d_treated])
tut = np.mean(df_base['Y1'].loc[~d_treated] - df_base['Y0'].loc[~d_treated])
true_effect = ate
print('Effect ', ate, tt, tut)
However, we still have a considerable amount of treatment effect heterogeneity.
In [5]:
plot_distribution_of_benefits(df_base)
Now let us get ready to explore the effects of essential heterogeneity with some auxiliary functions.
In [6]:
def update_correlation_structure(model_dict, rho):
"""This function takes a valid model specification and updates the correlation structure
among the unobservables."""
# We first extract the baseline information from the model dictionary.
sd_v = model_dict['DIST']['all'][-1]
sd_u = model_dict['DIST']['all'][0]
# Now we construct the implied covariance, which is relevant for the initialization file.
cov = rho * sd_v * sd_u
model_dict['DIST']['all'][2] = cov
# We print out the specification to an initialization file with the name mc_init.grmpy.ini.
print_model_dict(model_dict)
def collect_effects(model_base, which, grid_points):
"""This function collects numerous effects for alternative correlation structures."""
model_mc = copy.deepcopy(model_base)
effects = []
for rho in np.linspace(0.00, 0.99, grid_points):
# We create a new initialization file with an updated correlation structure.
update_correlation_structure(model_mc, rho)
# We use this new file to simulate a new sample.
df_mc = grmpy.simulate('mc_init.grmpy.ini')
# We extract auxiliary objects for further processing.
endog, exog, instr = df_mc['Y'], df_mc[['X_0', 'D']], df_mc[['X_0', 'Z_1']]
d_treated = df_mc['D'] == 1
# We calculate our parameter of interest.
label = which.lower()
if label == 'randomization':
stat = np.mean(endog.loc[d_treated]) - np.mean(endog.loc[~d_treated])
elif label == 'ordinary_least_squares':
stat = sm.OLS(endog, exog).fit().params[1]
elif label == 'conventional_instrumental_variables':
stat = IV2SLS(endog, exog, instr).fit().params[1]
elif label == 'local_instrumental_variables':
grmpy.estimate('mc_init.grmpy.ini')
stat = get_effect_grmpy()
elif label == 'conventional_average_effects':
ate = np.mean(df_mc['Y1'] - df_mc['Y0'])
tt = np.mean(df_mc['Y1'].loc[d_treated] - df_mc['Y0'].loc[d_treated])
stat = (ate, tt)
else:
raise NotImplementedError
effects += [stat]
return effects
How doe he different treatment effect parameters diverge if we introduce essential heterogeneity?
In [7]:
effects = collect_effects(model_base, 'conventional_average_effects', 10)
plot_effects(effects)
Let us investigate the essential heterogentiy with respect to the distribution of the unobservables.
In [8]:
df_mc = pkl.load(open('mc_data.grmpy.pkl', 'rb'))
for df in [df_base, df_mc]:
plot_joint_distribution_unobservables(df)
Let us revisit the shape of the marginal benefit of treatment with and without essential hetergeneity.
In [9]:
for fname in ['data', 'mc_data']:
plot_marginal_effect(get_marginal_effect_grmpy(fname + '.grmpy.info'))
In [10]:
effect = np.mean(df_base['Y'].loc[d_treated]) - np.mean(df_base['Y'].loc[~d_treated])
print('Effect ', effect)
Now we can directly look at the effect of essential heterogeneity.
In [11]:
effects = collect_effects(model_base, 'randomization', 10)
plot_estimates(true_effect, effects)
In [12]:
results = sm.OLS(df_base['Y'], df_base[['X_0', 'D']]).fit()
results.summary()
Out[12]:
Now we again investigate the effect of essential heterogeneity on our estimates.
In [13]:
effects = collect_effects(model_base, 'ordinary_least_squares', 10)
plot_estimates(true_effect, effects)
In [14]:
result = IV2SLS(df_base['Y'], df_base[['X_0', 'D']], df_base[['X_0', 'Z_1']]).fit()
result.summary()
Out[14]:
Now we introduce essential heterogeneity.
In [15]:
effects = collect_effects(model_base, 'conventional_instrumental_variables', 10)
plot_estimates(true_effect, effects)
We look at our baseline specification first.
In [16]:
rslt = grmpy.estimate('mc_exploration.grmpy.ini')
print('Effect ', get_effect_grmpy())
In [17]:
effects = collect_effects(model_base, 'local_instrumental_variables', 10)
plot_estimates(true_effect, effects)
In [18]:
plot_joint_distribution_potential(df)
Now we turn to the joint distribution of benefits and costs. What is the meaning of each quadrant?
In [19]:
plot_joint_distribution_benefits_surplus(model_base, df)