Using Larch within Scikit-Learn


In [1]:
import larch
import pandas

from larch import PX, P, X

In [2]:
from larch.data_warehouse import example_file
df = pandas.read_csv(example_file("MTCwork.csv.gz"))
df.set_index(['casenum','altnum'], inplace=True, drop=False)

In [3]:
m = larch.Model()

m.utility_ca = (
    PX('tottime') 
    + PX('totcost') 
    + sum(P(f'ASC_{i}') * X(f'altnum=={i}') for i in [2,3,4,5,6])
    + sum(P(f'HHINC#{i}') * X(f'(altnum=={i})*hhinc') for i in [2,3,4,5,6])
)

In [4]:
m.fit(df, y=df.chose)


Iteration 010 [Converged]

LL = -3626.186255512929

value initvalue nullvalue minimum maximum holdfast note best
ASC_2 -2.178014 0.0 0.0 -inf inf 0 -2.178014
ASC_3 -3.725078 0.0 0.0 -inf inf 0 -3.725078
ASC_4 -0.670861 0.0 0.0 -inf inf 0 -0.670861
ASC_5 -2.376328 0.0 0.0 -inf inf 0 -2.376328
ASC_6 -0.206775 0.0 0.0 -inf inf 0 -0.206775
HHINC#2 -0.002170 0.0 0.0 -inf inf 0 -0.002170
HHINC#3 0.000358 0.0 0.0 -inf inf 0 0.000358
HHINC#4 -0.005286 0.0 0.0 -inf inf 0 -0.005286
HHINC#5 -0.012808 0.0 0.0 -inf inf 0 -0.012808
HHINC#6 -0.009686 0.0 0.0 -inf inf 0 -0.009686
totcost -0.004920 0.0 0.0 -inf inf 0 -0.004920
tottime -0.051342 0.0 0.0 -inf inf 0 -0.051342
Out[4]:
<larch.Model (MNL)>

In [5]:
proba = m.predict_proba(df)
proba.head(10)


Out[5]:
   altnum
0  1         0.817458
   2         0.077710
   3         0.017906
   4         0.071428
   5         0.015497
1  1         0.336928
   2         0.074339
   3         0.052072
   4         0.498117
   5         0.038545
dtype: float64

In [6]:
score = m.score(df, y=df.chose)
score


Out[6]:
-0.7210551313408093

In [7]:
score * m.dataframes.n_cases


Out[7]:
-3626.18625551293