For Binary Logit and Multinomial Logit, including sampling of alternatives.
To install ChoiceModels, git clone the repository (https://github.com/ual/choicemodels).
In [1]:
import os; os.chdir('../choicemodels')
import choicemodels
In [2]:
import numpy as np
import pandas as pd
import statsmodels.api as sm # for binary logit
from collections import OrderedDict # for MNL model specifications
In [3]:
# Import or generate estimation data
Y = np.random.randint(2, size=50) # 50x1 vector of random 0's and 1's
X = np.random.rand(50, 3) # 50x3 matrix of random floats
data = pd.DataFrame(data=X, columns=['x1','x2','x3'])
data['y'] = Y
print(data.describe())
In [4]:
# Fit a model
specification = 'y ~ x1 + x2 + x3'
m = sm.Logit.from_formula(specification, data)
results = m.fit()
print(results.summary())
In [ ]:
In [ ]:
In [5]:
# Import or generate estimation data
def rand(len, min, max):
""" Generate `len` random floats uniformly distributed from `min` to `max` """
return (max - min) * np.random.rand(len) + min
numalts = 50 # alternatives
X = np.concatenate((rand(numalts/2, 0, 10), rand(numalts/2, 100, 110)))
numobs = 1000 # agents/observations
beta = np.zeros(1000) + 1.5
U = [[beta[n]*x + np.random.gumbel() for x in X] for n in range(numobs)] # utility matrix
choices = [np.argmax(a) for a in U]
In [6]:
# Set up the estimation dataset in long format
d = [[n, i, int(choices[n]==i), X[i]] for n in range(numobs) for i in range(numalts)]
df = pd.DataFrame(d, columns=['obs_id', 'alt_id', 'chosen', 'x'])
In [7]:
print(df.describe())
In [8]:
# Set up model spec
spec = OrderedDict([('x', 'all_same')])
labels = OrderedDict([('x', 'beta_x')])
In [9]:
def estimate_model(init_val):
"""
Initialize and fit a model, returning it as an object. Will use the
current values of `df`, `spec`, and `labels`.
"""
m = choicemodels.MNLogit(data = df,
alt_id_col = 'alt_id',
obs_id_col = 'obs_id',
choice_col = 'chosen',
specification = spec,
names = labels)
results = m.fit_mle(init_vals = np.array([init_val]))
return results
In [10]:
%%time
results = estimate_model(init_val = 1.2)
print(results.summary())
In [ ]:
In [ ]:
In [11]:
def alts(obs_id, C, K):
"""
This function generates a restricted choice set D for a particular
observation. Expects list `C` of alternatives to sample from (either
the full choice set or a stratrum), int `K` alternatives to sample,
and list `choices` of the alt_id chosen for each obs_id. Returns list
of K alt_id's including the chosen one.
"""
chosen = choices[obs_id] # id of chosen alternative
unchosen = [i for i in C if chosen != i] # id's of unchosen alts
sample_unchosen = np.random.choice(unchosen, size=K-1, replace=False).tolist()
return np.sort([chosen] + sample_unchosen)
In [12]:
# Set up the estimation dataset, which can use the same spec as earlier
C = range(numalts) # choice set to sample from
K = 10
d = [[n, i, int(choices[n]==i), X[i]] for n in range(numobs) for i in alts(n, C, K)]
df = pd.DataFrame(d, columns=['obs_id', 'alt_id', 'chosen', 'x'])
In [13]:
print df.describe()
In [14]:
%%time
results = estimate_model(init_val = 1.2)
print results.summary()
In [ ]:
In [ ]:
In [ ]:
In [ ]: