UnSupervisedIOHMM


In [1]:
from __future__ import  division

import json
import warnings


import numpy as np
import pandas as pd


from IOHMM import UnSupervisedIOHMM
from IOHMM import OLS, DiscreteMNL, CrossEntropyMNL


warnings.simplefilter("ignore")

Load speed data


In [2]:
speed = pd.read_csv('../data/speed.csv')
speed.head()


Out[2]:
Unnamed: 0 rt corr Pacc prev
0 1 6.456770 cor 0.0 inc
1 2 5.602119 cor 0.0 cor
2 3 6.253829 inc 0.0 cor
3 4 5.451038 inc 0.0 inc
4 5 5.872118 inc 0.0 inc

Example 1

Set up a simple model manully


In [3]:
# set two hidden states
SHMM = UnSupervisedIOHMM(num_states=2, max_EM_iter=200, EM_tol=1e-6)

# This model has only one output which is modeled by a linear regression model
SHMM.set_models(model_emissions = [OLS()], 
                model_transition=CrossEntropyMNL(solver='lbfgs'),
                model_initial=CrossEntropyMNL(solver='lbfgs'))

# We don't set any covariates to this OLS model
SHMM.set_inputs(covariates_initial = [], covariates_transition = [], covariates_emissions = [[]])

# This OLS has only one output target, which is 'rt' column in the dataframe
SHMM.set_outputs([['rt']])

# we only have a list of one sequence.
SHMM.set_data([speed])

Start training


In [4]:
SHMM.train()

See the training results


In [5]:
# The coefficients of the OLS model for each hidden states
print(SHMM.model_emissions[0][0].coef)
print(SHMM.model_emissions[1][0].coef)


[[ 5.51036369]]
[[ 6.38505309]]

In [6]:
# The scale/dispersion of the OLS model for each hidden states
print(np.sqrt(SHMM.model_emissions[0][0].dispersion))
print(np.sqrt(SHMM.model_emissions[1][0].dispersion))


[[ 0.19175214]]
[[ 0.24415967]]

In [7]:
# The transition probability between two hidden states
print(np.exp(SHMM.model_transition[0].predict_log_proba(np.array([[]]))))
print(np.exp(SHMM.model_transition[1].predict_log_proba(np.array([[]]))))


[[ 0.88351131  0.11648869]]
[[ 0.08433152  0.91566848]]

Example 2

Set up the another model with two outputs


In [8]:
SHMM = UnSupervisedIOHMM(num_states=2, max_EM_iter=200, EM_tol=1e-6)

# This model has two outputs which is modeled by 
# a linear regression model and 
# a discrete logistic regression model
SHMM.set_models(model_emissions = [OLS(est_stderr=True), 
                                   DiscreteMNL(solver='lbfgs')], 
                model_transition=CrossEntropyMNL(solver='lbfgs'),
                model_initial=CrossEntropyMNL(solver='lbfgs'))

# We set no covariates associated with the first output and
# We set 'Pacc' as the input covariate associate with the second output
SHMM.set_inputs(covariates_initial = [], covariates_transition = [], covariates_emissions = [[],['Pacc']])

# 'rt' is one output modeled by linear regression and
# 'corr' is the other output modeled by discrete logistic regression model
SHMM.set_outputs([['rt'],['corr']])

SHMM.set_data([speed])

Start training


In [9]:
SHMM.train()

See the training results


In [10]:
# See the coefficients of the emission model 1 ('rt') of each hidden states
print(SHMM.model_emissions[0][0].coef)
print(SHMM.model_emissions[1][0].coef)
print('')
# See the coefficients of the emission model 2 ('corr') of each hidden states
print(SHMM.model_emissions[0][1].coef)
print(SHMM.model_emissions[1][1].coef)


[[ 6.38764009]]
[[ 5.51390328]]
()
[[-1.13690447 -2.17394618]]
[[-0.21848303  0.57762625]]

In [11]:
# See the estimated standard error of coefficients of the emission model 1 ('rt') of each hidden states
print(SHMM.model_emissions[0][0].stderr)
print(SHMM.model_emissions[1][0].stderr)
print('')
# See the estimated standard error of coefficients of the emission model 2 ('corr') of each hidden states
# Note that est_stderr is not supported in the MNL model.
print(SHMM.model_emissions[0][1].stderr)
print(SHMM.model_emissions[1][1].stderr)


[[ 0.01498923]]
[[ 0.0143314]]
()
None
None

In [12]:
# The transition probability between two hidden states
print(np.exp(SHMM.model_transition[0].predict_log_proba(np.array([[]]))))
print(np.exp(SHMM.model_transition[1].predict_log_proba(np.array([[]]))))


[[ 0.91491024  0.08508976]]
[[ 0.11590608  0.88409392]]

Save the trained model


In [13]:
json_dict = SHMM.to_json('../models/UnSupervisedIOHMM/')
json_dict


Out[13]:
{'data_type': 'UnSupervisedIOHMM',
 'properties': {'EM_tol': 1e-06,
  'covariates_emissions': [[], ['Pacc']],
  'covariates_initial': [],
  'covariates_transition': [],
  'max_EM_iter': 200,
  'model_emissions': [[{'data_type': 'OLS',
     'properties': {'alpha': 0,
      'coef': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_0/emission_0/coef.npy'},
      'dispersion': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_0/emission_0/dispersion.npy'},
      'est_stderr': True,
      'fit_intercept': True,
      'l1_ratio': 0,
      'max_iter': 100,
      'n_targets': 1,
      'reg_method': None,
      'solver': 'svd',
      'stderr': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_0/emission_0/stderr.npy'},
      'tol': 0.0001}},
    {'data_type': 'DiscreteMNL',
     'properties': {'alpha': 0,
      'classes': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_0/emission_1/classes.npy'},
      'coef': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_0/emission_1/coef.npy'},
      'est_stderr': False,
      'fit_intercept': True,
      'l1_ratio': 0,
      'max_iter': 100,
      'reg_method': 'l2',
      'solver': 'lbfgs',
      'stderr': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_0/emission_1/stderr.npy'},
      'tol': 0.0001}}],
   [{'data_type': 'OLS',
     'properties': {'alpha': 0,
      'coef': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_1/emission_0/coef.npy'},
      'dispersion': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_1/emission_0/dispersion.npy'},
      'est_stderr': True,
      'fit_intercept': True,
      'l1_ratio': 0,
      'max_iter': 100,
      'n_targets': 1,
      'reg_method': None,
      'solver': 'svd',
      'stderr': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_1/emission_0/stderr.npy'},
      'tol': 0.0001}},
    {'data_type': 'DiscreteMNL',
     'properties': {'alpha': 0,
      'classes': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_1/emission_1/classes.npy'},
      'coef': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_1/emission_1/coef.npy'},
      'est_stderr': False,
      'fit_intercept': True,
      'l1_ratio': 0,
      'max_iter': 100,
      'reg_method': 'l2',
      'solver': 'lbfgs',
      'stderr': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_1/emission_1/stderr.npy'},
      'tol': 0.0001}}]],
  'model_initial': {'data_type': 'CrossEntropyMNL',
   'properties': {'alpha': 0,
    'coef': {'data_type': 'numpy.ndarray',
     'path': '../models/UnSupervisedIOHMM/model_initial/coef.npy'},
    'est_stderr': False,
    'fit_intercept': True,
    'l1_ratio': 0,
    'max_iter': 100,
    'n_classes': 2,
    'reg_method': 'l2',
    'solver': 'lbfgs',
    'stderr': {'data_type': 'numpy.ndarray',
     'path': '../models/UnSupervisedIOHMM/model_initial/stderr.npy'},
    'tol': 0.0001}},
  'model_transition': [{'data_type': 'CrossEntropyMNL',
    'properties': {'alpha': 0,
     'coef': {'data_type': 'numpy.ndarray',
      'path': '../models/UnSupervisedIOHMM/model_transition/state_0/coef.npy'},
     'est_stderr': False,
     'fit_intercept': True,
     'l1_ratio': 0,
     'max_iter': 100,
     'n_classes': 2,
     'reg_method': 'l2',
     'solver': 'lbfgs',
     'stderr': {'data_type': 'numpy.ndarray',
      'path': '../models/UnSupervisedIOHMM/model_transition/state_0/stderr.npy'},
     'tol': 0.0001}},
   {'data_type': 'CrossEntropyMNL',
    'properties': {'alpha': 0,
     'coef': {'data_type': 'numpy.ndarray',
      'path': '../models/UnSupervisedIOHMM/model_transition/state_1/coef.npy'},
     'est_stderr': False,
     'fit_intercept': True,
     'l1_ratio': 0,
     'max_iter': 100,
     'n_classes': 2,
     'reg_method': 'l2',
     'solver': 'lbfgs',
     'stderr': {'data_type': 'numpy.ndarray',
      'path': '../models/UnSupervisedIOHMM/model_transition/state_1/stderr.npy'},
     'tol': 0.0001}}],
  'num_states': 2,
  'responses_emissions': [['rt'], ['corr']]}}

In [14]:
with open('../models/UnSupervisedIOHMM/model.json', 'w') as outfile:
    json.dump(json_dict, outfile, indent=4, sort_keys=True)

Load back the trained model


In [15]:
SHMM_from_json = UnSupervisedIOHMM.from_json(json_dict)

See if the coefficients are any different


In [16]:
# See the coefficients of the emission model 1 ('rt') of each hidden states
print(SHMM_from_json.model_emissions[0][0].coef)
print(SHMM_from_json.model_emissions[1][0].coef)
print('')
# See the coefficients of the emission model 2 ('corr') of each hidden states
print(SHMM_from_json.model_emissions[0][1].coef)
print(SHMM_from_json.model_emissions[1][1].coef)


[[ 6.38764009]]
[[ 5.51390328]]
()
[[-1.13690447 -2.17394618]]
[[-0.21848303  0.57762625]]

Set up the model using a config file, instead of doing it manully


In [17]:
with open('../models/UnSupervisedIOHMM/config.json') as json_data:
    json_dict = json.load(json_data)

SHMM_from_config = UnSupervisedIOHMM.from_config(json_dict)

Set data and start training


In [18]:
SHMM_from_config.set_data([speed])
SHMM_from_config.train()

See if the training results are any different?


In [19]:
# See the coefficients of the emission model 1 ('rt') of each hidden states
print(SHMM_from_config.model_emissions[0][0].coef)
print(SHMM_from_config.model_emissions[1][0].coef)
print('')
# See the coefficients of the emission model 2 ('corr') of each hidden states
print(SHMM_from_config.model_emissions[0][1].coef)
print(SHMM_from_config.model_emissions[1][1].coef)


[[ 6.38763985]]
[[ 5.51390293]]
()
[[-1.13689776 -2.17395753]]
[[-0.21848318  0.57762729]]

In [ ]: