UnSupervisedIOHMM



In [1]:

    
from __future__ import  division

import json
import warnings


import numpy as np
import pandas as pd


from IOHMM import UnSupervisedIOHMM
from IOHMM import OLS, DiscreteMNL, CrossEntropyMNL


warnings.simplefilter("ignore")

Load speed data



In [2]:

    
speed = pd.read_csv('../data/speed.csv')
speed.head()

Example 1

Set up a simple model manully



In [3]:

    
# set two hidden states
SHMM = UnSupervisedIOHMM(num_states=2, max_EM_iter=200, EM_tol=1e-6)

# This model has only one output which is modeled by a linear regression model
SHMM.set_models(model_emissions = [OLS()], 
                model_transition=CrossEntropyMNL(solver='lbfgs'),
                model_initial=CrossEntropyMNL(solver='lbfgs'))

# We don't set any covariates to this OLS model
SHMM.set_inputs(covariates_initial = [], covariates_transition = [], covariates_emissions = [[]])

# This OLS has only one output target, which is 'rt' column in the dataframe
SHMM.set_outputs([['rt']])

# we only have a list of one sequence.
SHMM.set_data([speed])

Start training



In [4]:

    
SHMM.train()

See the training results



In [5]:

    
# The coefficients of the OLS model for each hidden states
print(SHMM.model_emissions[0][0].coef)
print(SHMM.model_emissions[1][0].coef)









    



[[ 5.51036369]]
[[ 6.38505309]]



In [6]:

    
# The scale/dispersion of the OLS model for each hidden states
print(np.sqrt(SHMM.model_emissions[0][0].dispersion))
print(np.sqrt(SHMM.model_emissions[1][0].dispersion))









    



[[ 0.19175214]]
[[ 0.24415967]]



In [7]:

    
# The transition probability between two hidden states
print(np.exp(SHMM.model_transition[0].predict_log_proba(np.array([[]]))))
print(np.exp(SHMM.model_transition[1].predict_log_proba(np.array([[]]))))









    



[[ 0.88351131  0.11648869]]
[[ 0.08433152  0.91566848]]

Example 2

Set up the another model with two outputs



In [8]:

    
SHMM = UnSupervisedIOHMM(num_states=2, max_EM_iter=200, EM_tol=1e-6)

# This model has two outputs which is modeled by 
# a linear regression model and 
# a discrete logistic regression model
SHMM.set_models(model_emissions = [OLS(est_stderr=True), 
                                   DiscreteMNL(solver='lbfgs')], 
                model_transition=CrossEntropyMNL(solver='lbfgs'),
                model_initial=CrossEntropyMNL(solver='lbfgs'))

# We set no covariates associated with the first output and
# We set 'Pacc' as the input covariate associate with the second output
SHMM.set_inputs(covariates_initial = [], covariates_transition = [], covariates_emissions = [[],['Pacc']])

# 'rt' is one output modeled by linear regression and
# 'corr' is the other output modeled by discrete logistic regression model
SHMM.set_outputs([['rt'],['corr']])

SHMM.set_data([speed])

Start training



In [9]:

    
SHMM.train()

See the training results



In [10]:

    
# See the coefficients of the emission model 1 ('rt') of each hidden states
print(SHMM.model_emissions[0][0].coef)
print(SHMM.model_emissions[1][0].coef)
print('')
# See the coefficients of the emission model 2 ('corr') of each hidden states
print(SHMM.model_emissions[0][1].coef)
print(SHMM.model_emissions[1][1].coef)









    



[[ 6.38764009]]
[[ 5.51390328]]
()
[[-1.13690447 -2.17394618]]
[[-0.21848303  0.57762625]]



In [11]:

    
# See the estimated standard error of coefficients of the emission model 1 ('rt') of each hidden states
print(SHMM.model_emissions[0][0].stderr)
print(SHMM.model_emissions[1][0].stderr)
print('')
# See the estimated standard error of coefficients of the emission model 2 ('corr') of each hidden states
# Note that est_stderr is not supported in the MNL model.
print(SHMM.model_emissions[0][1].stderr)
print(SHMM.model_emissions[1][1].stderr)









    



[[ 0.01498923]]
[[ 0.0143314]]
()
None
None



In [12]:

    
# The transition probability between two hidden states
print(np.exp(SHMM.model_transition[0].predict_log_proba(np.array([[]]))))
print(np.exp(SHMM.model_transition[1].predict_log_proba(np.array([[]]))))









    



[[ 0.91491024  0.08508976]]
[[ 0.11590608  0.88409392]]

Save the trained model



In [13]:

    
json_dict = SHMM.to_json('../models/UnSupervisedIOHMM/')
json_dict









    Out[13]:





{'data_type': 'UnSupervisedIOHMM',
 'properties': {'EM_tol': 1e-06,
  'covariates_emissions': [[], ['Pacc']],
  'covariates_initial': [],
  'covariates_transition': [],
  'max_EM_iter': 200,
  'model_emissions': [[{'data_type': 'OLS',
     'properties': {'alpha': 0,
      'coef': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_0/emission_0/coef.npy'},
      'dispersion': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_0/emission_0/dispersion.npy'},
      'est_stderr': True,
      'fit_intercept': True,
      'l1_ratio': 0,
      'max_iter': 100,
      'n_targets': 1,
      'reg_method': None,
      'solver': 'svd',
      'stderr': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_0/emission_0/stderr.npy'},
      'tol': 0.0001}},
    {'data_type': 'DiscreteMNL',
     'properties': {'alpha': 0,
      'classes': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_0/emission_1/classes.npy'},
      'coef': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_0/emission_1/coef.npy'},
      'est_stderr': False,
      'fit_intercept': True,
      'l1_ratio': 0,
      'max_iter': 100,
      'reg_method': 'l2',
      'solver': 'lbfgs',
      'stderr': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_0/emission_1/stderr.npy'},
      'tol': 0.0001}}],
   [{'data_type': 'OLS',
     'properties': {'alpha': 0,
      'coef': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_1/emission_0/coef.npy'},
      'dispersion': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_1/emission_0/dispersion.npy'},
      'est_stderr': True,
      'fit_intercept': True,
      'l1_ratio': 0,
      'max_iter': 100,
      'n_targets': 1,
      'reg_method': None,
      'solver': 'svd',
      'stderr': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_1/emission_0/stderr.npy'},
      'tol': 0.0001}},
    {'data_type': 'DiscreteMNL',
     'properties': {'alpha': 0,
      'classes': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_1/emission_1/classes.npy'},
      'coef': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_1/emission_1/coef.npy'},
      'est_stderr': False,
      'fit_intercept': True,
      'l1_ratio': 0,
      'max_iter': 100,
      'reg_method': 'l2',
      'solver': 'lbfgs',
      'stderr': {'data_type': 'numpy.ndarray',
       'path': '../models/UnSupervisedIOHMM/model_emissions/state_1/emission_1/stderr.npy'},
      'tol': 0.0001}}]],
  'model_initial': {'data_type': 'CrossEntropyMNL',
   'properties': {'alpha': 0,
    'coef': {'data_type': 'numpy.ndarray',
     'path': '../models/UnSupervisedIOHMM/model_initial/coef.npy'},
    'est_stderr': False,
    'fit_intercept': True,
    'l1_ratio': 0,
    'max_iter': 100,
    'n_classes': 2,
    'reg_method': 'l2',
    'solver': 'lbfgs',
    'stderr': {'data_type': 'numpy.ndarray',
     'path': '../models/UnSupervisedIOHMM/model_initial/stderr.npy'},
    'tol': 0.0001}},
  'model_transition': [{'data_type': 'CrossEntropyMNL',
    'properties': {'alpha': 0,
     'coef': {'data_type': 'numpy.ndarray',
      'path': '../models/UnSupervisedIOHMM/model_transition/state_0/coef.npy'},
     'est_stderr': False,
     'fit_intercept': True,
     'l1_ratio': 0,
     'max_iter': 100,
     'n_classes': 2,
     'reg_method': 'l2',
     'solver': 'lbfgs',
     'stderr': {'data_type': 'numpy.ndarray',
      'path': '../models/UnSupervisedIOHMM/model_transition/state_0/stderr.npy'},
     'tol': 0.0001}},
   {'data_type': 'CrossEntropyMNL',
    'properties': {'alpha': 0,
     'coef': {'data_type': 'numpy.ndarray',
      'path': '../models/UnSupervisedIOHMM/model_transition/state_1/coef.npy'},
     'est_stderr': False,
     'fit_intercept': True,
     'l1_ratio': 0,
     'max_iter': 100,
     'n_classes': 2,
     'reg_method': 'l2',
     'solver': 'lbfgs',
     'stderr': {'data_type': 'numpy.ndarray',
      'path': '../models/UnSupervisedIOHMM/model_transition/state_1/stderr.npy'},
     'tol': 0.0001}}],
  'num_states': 2,
  'responses_emissions': [['rt'], ['corr']]}}



In [14]:

    
with open('../models/UnSupervisedIOHMM/model.json', 'w') as outfile:
    json.dump(json_dict, outfile, indent=4, sort_keys=True)

Load back the trained model



In [15]:

    
SHMM_from_json = UnSupervisedIOHMM.from_json(json_dict)

See if the coefficients are any different



In [16]:

    
# See the coefficients of the emission model 1 ('rt') of each hidden states
print(SHMM_from_json.model_emissions[0][0].coef)
print(SHMM_from_json.model_emissions[1][0].coef)
print('')
# See the coefficients of the emission model 2 ('corr') of each hidden states
print(SHMM_from_json.model_emissions[0][1].coef)
print(SHMM_from_json.model_emissions[1][1].coef)









    



[[ 6.38764009]]
[[ 5.51390328]]
()
[[-1.13690447 -2.17394618]]
[[-0.21848303  0.57762625]]

Set up the model using a config file, instead of doing it manully



In [17]:

    
with open('../models/UnSupervisedIOHMM/config.json') as json_data:
    json_dict = json.load(json_data)

SHMM_from_config = UnSupervisedIOHMM.from_config(json_dict)

Set data and start training



In [18]:

    
SHMM_from_config.set_data([speed])
SHMM_from_config.train()

See if the training results are any different?



In [19]:

    
# See the coefficients of the emission model 1 ('rt') of each hidden states
print(SHMM_from_config.model_emissions[0][0].coef)
print(SHMM_from_config.model_emissions[1][0].coef)
print('')
# See the coefficients of the emission model 2 ('corr') of each hidden states
print(SHMM_from_config.model_emissions[0][1].coef)
print(SHMM_from_config.model_emissions[1][1].coef)









    



[[ 6.38763985]]
[[ 5.51390293]]
()
[[-1.13689776 -2.17395753]]
[[-0.21848318  0.57762729]]



In [ ]:

	Unnamed: 0	rt	corr	prev
0	1	6.456770	cor	inc
1	2	5.602119	cor	cor
2	3	6.253829	inc	cor
3	4	5.451038	inc	inc
4	5	5.872118	inc	inc