ChoiceModels usage demo

Sam Maurer, October 10, 2016



In [2]:

    
%load_ext autoreload
%aimport choicemodels
%autoreload 1



In [3]:

    
import choicemodels
import numpy as np
import pandas as pd
from collections import OrderedDict

Binary Logit



In [3]:

    
# Set up estimation data

endog = np.random.randint(2, size=50)  # 50x1 vector of random 0's and 1's
exog = np.random.rand(50, 5)  # 50x5 matrix of random floats



In [4]:

    
# Estimate a model

m = choicemodels.Logit(endog, exog)
results = m.fit()









    



Optimization terminated successfully.
         Current function value: 0.635509
         Iterations 5



In [5]:

    
# Show estimation results

print(results.summary())









    



                           Logit Regression Results                           
==============================================================================
Dep. Variable:                      y   No. Observations:                   50
Model:                          Logit   Df Residuals:                       45
Method:                           MLE   Df Model:                            4
Date:                Fri, 07 Oct 2016   Pseudo R-squ.:                 0.07890
Time:                        16:31:07   Log-Likelihood:                -31.775
converged:                       True   LL-Null:                       -34.497
                                        LLR p-value:                    0.2447
==============================================================================
                 coef    std err          z      P>|z|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
x1             0.0305      0.899      0.034      0.973        -1.731     1.792
x2             1.4040      0.977      1.436      0.151        -0.512     3.320
x3            -2.2294      1.034     -2.156      0.031        -4.256    -0.202
x4             0.0607      0.996      0.061      0.951        -1.892     2.013
x5             0.5010      0.995      0.503      0.615        -1.450     2.452
==============================================================================



In [ ]:

Multinomial Logit



In [4]:

    
# Load some real data

path = '../../timothyb0912/pylogit/examples/data/swissmetro.dat'
swissmetro = pd.read_table(path, sep='\t')

include = (swissmetro.PURPOSE.isin([1, 3]) & (swissmetro.CHOICE != 0))
swissmetro = swissmetro.loc[include]



In [ ]:

    
swissmetro.describe()



In [5]:

    
# Convert to long format

ind_vars = swissmetro.columns.tolist()[:15]

alt_varying_vars = {'travel_time': dict([(1, 'TRAIN_TT'), (2, 'SM_TT'), (3, 'CAR_TT')]),
                    'travel_cost': dict([(1, 'TRAIN_CO'), (2, 'SM_CO'), (3, 'CAR_CO')]),
                    'headway': dict([(1, 'TRAIN_HE'), (2, 'SM_HE')])}

availability_vars = {1: 'TRAIN_AV', 2: 'SM_AV', 3: 'CAR_AV'}

alt_id_col = 'mode_id'

swissmetro['custom_id'] = np.arange(swissmetro.shape[0], dtype=int) + 1
obs_id_col = 'custom_id'

choice_col = 'CHOICE'

data = choicemodels.convert_wide_to_long(swissmetro, ind_vars, alt_varying_vars, 
                availability_vars, obs_id_col, choice_col, new_alt_id_name=alt_id_col)









    



/Users/smmaurer/Dropbox/Git-rMBP/timothyb0912/pylogit/pylogit/choice_tools.py:431: UserWarning: Note, there are 29 variables in wide_data but the inputs ind_vars, alt_specific_vars, and subset_specific_vars only account for 28 variables.
  msg_2 + msg_3.format(num_vars_accounted_for))



In [ ]:

    
data.describe()



In [8]:

    
# Rescale variables

data["travel_time_hrs"] = data["travel_time"] / 60.0
data["headway_hrs"] = data["headway"] / 60.0
data["travel_cost_scaled"] = data["travel_cost"] / 100.0



In [9]:

    
# Set up specification

spec = OrderedDict()
labels = OrderedDict()

spec["intercept"] = [1, 2]
labels["intercept"] = ['ASC Train', 'ASC Swissmetro']

spec["travel_time_hrs"] = [[1, 2,], 3]
labels["travel_time_hrs"] = ['Travel Time (Train/SM)', 'Travel Time (Car)']

spec["travel_cost_scaled"] = [1, 2, 3]
labels["travel_cost_scaled"] = ['Travel Cost (Train)', 'Travel Cost (Swissmetro)', 
                                'Travel Cost (Car)']

spec["headway_hrs"] = [1, 2]
labels["headway_hrs"] = ["Headway (Train)", "Headway (Swissmetro)"]



In [10]:

    
# Set up and estimate the model

m = choicemodels.MNLogit(data, alt_id_col, obs_id_col, choice_col, spec, names=labels)

results = m.fit_mle(np.zeros(9))









    



Log-likelihood at zero: -6,964.6630
Initial Log-likelihood: -6,964.6630
Estimation Time: 0.09 seconds.
Final log-likelihood: -5,359.1984






    



/Users/smmaurer/anaconda/lib/python2.7/site-packages/scipy/optimize/_minimize.py:385: RuntimeWarning: Method BFGS does not use Hessian information (hess).
  RuntimeWarning)



In [11]:

    
# Show results

print(results.summary())









    



                     Multinomial Logit Model Regression Results                    
===================================================================================
Dep. Variable:                      CHOICE   No. Observations:                6,768
Model:             Multinomial Logit Model   Df Residuals:                    6,759
Method:                                MLE   Df Model:                            9
Date:                     Fri, 07 Oct 2016   Pseudo R-squ.:                   0.231
Time:                             16:31:26   Pseudo R-bar-squ.:               0.229
converged:                           False   Log-Likelihood:             -5,359.198
                                             LL-Null:                    -6,964.663
============================================================================================
                               coef    std err          z      P>|z|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------------
ASC Train                   -0.4710      0.128     -3.674      0.000        -0.722    -0.220
ASC Swissmetro               0.2597      0.104      2.504      0.012         0.056     0.463
Travel Time (Train/SM)      -0.7459      0.041    -18.011      0.000        -0.827    -0.665
Travel Time (Car)           -0.5572      0.043    -13.065      0.000        -0.641    -0.474
Travel Cost (Train)          0.0637      0.004     14.386      0.000         0.055     0.072
Travel Cost (Swissmetro)     0.0096      0.003      2.969      0.003         0.003     0.016
Travel Cost (Car)           -0.2327      0.091     -2.546      0.011        -0.412    -0.054
Headway (Train)             -0.3592      0.064     -5.590      0.000        -0.485    -0.233
Headway (Swissmetro)        -0.4353      0.192     -2.265      0.023        -0.812    -0.059
============================================================================================



In [ ]:

Alternate syntax for setting up a multinomial specification

This section is speculative -- not yet implemented!



In [ ]:

    
# StatsModels allows the following syntax:

spec = 'outcome ~ const + var1 + np.log(var2)'

m = choicemodels.Logit.from_formula(spec, data)
results = m.fit_mle()



In [ ]:

    
# It would be nice to enable something similar for multinomial models,
# so that the user interface follows the utility functions more closely

spec = {
    '1': 'choice ~ ASC_t + btt * time_t/60 + bct * cost_t/100 + bht * headway_t/60',
    
    '2': 'choice ~ ASC_sm + btt * time_sm/60 + bcs * cost_sm/100 + bhs * headway_sm/60',
    
    '3': 'choice ~ btc * time_c/60 + bcc * cost_c/100' }

labels: {
    'ASC_t': "ASC Train",
    'ASC_sm': "ASC Swissmetro", 
    'btt': "Travel Time (Train/SM)", 
    'btc': "Travel Time (Car)", 
    'bct': "Travel Cost (Train)", 
    'bcs': "Travel Cost (Swissmetro)", 
    'bht': "Headway (Train)", 
    'bhs': "Headway (Swissmetro)", }

m = choicemodels.MNL.from_formula(spec, data, alt_id_col)
results = m.fit_mle()



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]: