Using Larch within Scikit-Learn



In [1]:

    
import larch
import pandas

from larch import PX, P, X



In [2]:

    
from larch.data_warehouse import example_file
df = pandas.read_csv(example_file("MTCwork.csv.gz"))
df.set_index(['casenum','altnum'], inplace=True, drop=False)



In [3]:

    
m = larch.Model()

m.utility_ca = (
    PX('tottime') 
    + PX('totcost') 
    + sum(P(f'ASC_{i}') * X(f'altnum=={i}') for i in [2,3,4,5,6])
    + sum(P(f'HHINC#{i}') * X(f'(altnum=={i})*hhinc') for i in [2,3,4,5,6])
)



In [4]:

    
m.fit(df, y=df.chose)









    




Iteration 010 [Converged] 






    




LL = -3626.186255512929






    







  
    
      
      value
      initvalue
      nullvalue
      minimum
      maximum
      holdfast
      note
      best
    
  
  
    
      ASC_2
      -2.178014
      0.0
      0.0
      -inf
      inf
      0
      
      -2.178014
    
    
      ASC_3
      -3.725078
      0.0
      0.0
      -inf
      inf
      0
      
      -3.725078
    
    
      ASC_4
      -0.670861
      0.0
      0.0
      -inf
      inf
      0
      
      -0.670861
    
    
      ASC_5
      -2.376328
      0.0
      0.0
      -inf
      inf
      0
      
      -2.376328
    
    
      ASC_6
      -0.206775
      0.0
      0.0
      -inf
      inf
      0
      
      -0.206775
    
    
      HHINC#2
      -0.002170
      0.0
      0.0
      -inf
      inf
      0
      
      -0.002170
    
    
      HHINC#3
      0.000358
      0.0
      0.0
      -inf
      inf
      0
      
      0.000358
    
    
      HHINC#4
      -0.005286
      0.0
      0.0
      -inf
      inf
      0
      
      -0.005286
    
    
      HHINC#5
      -0.012808
      0.0
      0.0
      -inf
      inf
      0
      
      -0.012808
    
    
      HHINC#6
      -0.009686
      0.0
      0.0
      -inf
      inf
      0
      
      -0.009686
    
    
      totcost
      -0.004920
      0.0
      0.0
      -inf
      inf
      0
      
      -0.004920
    
    
      tottime
      -0.051342
      0.0
      0.0
      -inf
      inf
      0
      
      -0.051342
    
  








    Out[4]:





<larch.Model (MNL)>



In [5]:

    
proba = m.predict_proba(df)
proba.head(10)









    Out[5]:





   altnum
0  1         0.817458
   2         0.077710
   3         0.017906
   4         0.071428
   5         0.015497
1  1         0.336928
   2         0.074339
   3         0.052072
   4         0.498117
   5         0.038545
dtype: float64



In [6]:

    
score = m.score(df, y=df.chose)
score









    Out[6]:





-0.7210551313408093



In [7]:

    
score * m.dataframes.n_cases









    Out[7]:





-3626.18625551293

	value	minimum	maximum	best
ASC_2	-2.178014	-inf	inf	-2.178014
ASC_3	-3.725078	-inf	inf	-3.725078
ASC_4	-0.670861	-inf	inf	-0.670861
ASC_5	-2.376328	-inf	inf	-2.376328
ASC_6	-0.206775	-inf	inf	-0.206775
HHINC#2	-0.002170	-inf	inf	-0.002170
HHINC#3	0.000358	-inf	inf	0.000358
HHINC#4	-0.005286	-inf	inf	-0.005286
HHINC#5	-0.012808	-inf	inf	-0.012808
HHINC#6	-0.009686	-inf	inf	-0.009686
totcost	-0.004920	-inf	inf	-0.004920
tottime	-0.051342	-inf	inf	-0.051342