In [2]:
%load_ext autoreload
%aimport choicemodels
%autoreload 1
In [3]:
import choicemodels
import numpy as np
import pandas as pd
from collections import OrderedDict
In [3]:
# Set up estimation data
endog = np.random.randint(2, size=50) # 50x1 vector of random 0's and 1's
exog = np.random.rand(50, 5) # 50x5 matrix of random floats
In [4]:
# Estimate a model
m = choicemodels.Logit(endog, exog)
results = m.fit()
In [5]:
# Show estimation results
print(results.summary())
In [ ]:
In [4]:
# Load some real data
path = '../../timothyb0912/pylogit/examples/data/swissmetro.dat'
swissmetro = pd.read_table(path, sep='\t')
include = (swissmetro.PURPOSE.isin([1, 3]) & (swissmetro.CHOICE != 0))
swissmetro = swissmetro.loc[include]
In [ ]:
swissmetro.describe()
In [5]:
# Convert to long format
ind_vars = swissmetro.columns.tolist()[:15]
alt_varying_vars = {'travel_time': dict([(1, 'TRAIN_TT'), (2, 'SM_TT'), (3, 'CAR_TT')]),
'travel_cost': dict([(1, 'TRAIN_CO'), (2, 'SM_CO'), (3, 'CAR_CO')]),
'headway': dict([(1, 'TRAIN_HE'), (2, 'SM_HE')])}
availability_vars = {1: 'TRAIN_AV', 2: 'SM_AV', 3: 'CAR_AV'}
alt_id_col = 'mode_id'
swissmetro['custom_id'] = np.arange(swissmetro.shape[0], dtype=int) + 1
obs_id_col = 'custom_id'
choice_col = 'CHOICE'
data = choicemodels.convert_wide_to_long(swissmetro, ind_vars, alt_varying_vars,
availability_vars, obs_id_col, choice_col, new_alt_id_name=alt_id_col)
In [ ]:
data.describe()
In [8]:
# Rescale variables
data["travel_time_hrs"] = data["travel_time"] / 60.0
data["headway_hrs"] = data["headway"] / 60.0
data["travel_cost_scaled"] = data["travel_cost"] / 100.0
In [9]:
# Set up specification
spec = OrderedDict()
labels = OrderedDict()
spec["intercept"] = [1, 2]
labels["intercept"] = ['ASC Train', 'ASC Swissmetro']
spec["travel_time_hrs"] = [[1, 2,], 3]
labels["travel_time_hrs"] = ['Travel Time (Train/SM)', 'Travel Time (Car)']
spec["travel_cost_scaled"] = [1, 2, 3]
labels["travel_cost_scaled"] = ['Travel Cost (Train)', 'Travel Cost (Swissmetro)',
'Travel Cost (Car)']
spec["headway_hrs"] = [1, 2]
labels["headway_hrs"] = ["Headway (Train)", "Headway (Swissmetro)"]
In [10]:
# Set up and estimate the model
m = choicemodels.MNLogit(data, alt_id_col, obs_id_col, choice_col, spec, names=labels)
results = m.fit_mle(np.zeros(9))
In [11]:
# Show results
print(results.summary())
In [ ]:
In [ ]:
# StatsModels allows the following syntax:
spec = 'outcome ~ const + var1 + np.log(var2)'
m = choicemodels.Logit.from_formula(spec, data)
results = m.fit_mle()
In [ ]:
# It would be nice to enable something similar for multinomial models,
# so that the user interface follows the utility functions more closely
spec = {
'1': 'choice ~ ASC_t + btt * time_t/60 + bct * cost_t/100 + bht * headway_t/60',
'2': 'choice ~ ASC_sm + btt * time_sm/60 + bcs * cost_sm/100 + bhs * headway_sm/60',
'3': 'choice ~ btc * time_c/60 + bcc * cost_c/100' }
labels: {
'ASC_t': "ASC Train",
'ASC_sm': "ASC Swissmetro",
'btt': "Travel Time (Train/SM)",
'btc': "Travel Time (Car)",
'bct': "Travel Cost (Train)",
'bcs': "Travel Cost (Swissmetro)",
'bht': "Headway (Train)",
'bhs': "Headway (Swissmetro)", }
m = choicemodels.MNL.from_formula(spec, data, alt_id_col)
results = m.fit_mle()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: