Mixed Data DDM

DDM using both patient and matched control data


In [2]:
%matplotlib inline
%cd ..

import warnings; warnings.filterwarnings('ignore')


/Users/celefthe/Programming/projects/language_decision

Start by parsing the .mat files from the matched controls


In [2]:
from utils import matparser, data_compiler
import glob

data_dir = 'data/controls/'

matparser.parse_dir(data_dir)

out_dir = "data/controls.csv"
data_compiler.compile_dir(data_dir, out_dir)


Out[2]:
'data/controls.csv'

Clean up - remove nan entries as these cause hddm to fail


In [3]:
!cat data/controls.csv | grep -v nan > data/controls_clean.csv

Merge patient and matched control data to single .csv file

First, create tagged versions of the csv files - include "tag" column to differentiate between patient and control


In [36]:
!cat data/patients_clean.csv | sed 1d | awk -v d="patient" -F"," 'BEGIN { OFS = "," } {$5=d; print}' | tr -d $'\r' > data/patients_tagged.csv
!cat data/controls_clean.csv | sed 1d | awk -v d="control" -F"," 'BEGIN { OFS = "," } {$5=d; print}' | tr -d $'\r' > data/controls_tagged.csv

!echo "response,rt,subj_idx,stim,subj_type" > data/combined_clean.csv
!cat data/patients_tagged.csv >> data/combined_clean.csv; sed 1d data/controls_tagged.csv >> data/combined_clean.csv

Build HDDM model


In [3]:
import hddm

data = hddm.load_csv('data/combined_clean.csv')

model = hddm.HDDM(data, depends_on={'v': ['stim', 'subj_type'], 'a': 'subj_type'})
model.find_starting_values()
model.sample(6000, burn=20)


After 7.000000 retries, still no good fit found.
---------------------------------------------------------------------------
ZeroProbability                           Traceback (most recent call last)
<ipython-input-3-4d628ff3ae4e> in <module>()
      3 data = hddm.load_csv('data/combined_clean.csv')
      4 
----> 5 model = hddm.HDDM(data, depends_on={'v': ['stim', 'subj_type'], 'a': 'subj_type'})
      6 model.find_starting_values()
      7 model.sample(6000, burn=20)

~/anaconda/envs/lang-dec/lib/python3.5/site-packages/hddm/models/hddm_info.py in __init__(self, *args, **kwargs)
    111         self.is_informative = kwargs.pop('informative', True)
    112 
--> 113         super(HDDM, self).__init__(*args, **kwargs)
    114 
    115     def _create_stochastic_knodes(self, include):

~/anaconda/envs/lang-dec/lib/python3.5/site-packages/hddm/models/base.py in __init__(self, data, bias, include, wiener_params, p_outlier, **kwargs)
    687         self.wfpt_class = hddm.likelihoods.generate_wfpt_stochastic_class(wp, cdf_range=self.cdf_range)
    688 
--> 689         super(HDDMBase, self).__init__(data, **kwargs)
    690 
    691     def __getstate__(self):

~/anaconda/envs/lang-dec/lib/python3.5/site-packages/hddm/models/base.py in __init__(self, data, **kwargs)
     38         self.std_depends = kwargs.pop('std_depends', False)
     39 
---> 40         super(AccumulatorModel, self).__init__(data, **kwargs)
     41 
     42 

~/anaconda/envs/lang-dec/lib/python3.5/site-packages/kabuki/hierarchical.py in __init__(self, data, is_group_model, depends_on, trace_subjs, plot_subjs, plot_var, group_only_nodes)
    346         self.db = None
    347 
--> 348         self._setup_model()
    349 
    350     def _setup_model(self):

~/anaconda/envs/lang-dec/lib/python3.5/site-packages/kabuki/hierarchical.py in _setup_model(self)
    357 
    358         # constructs pymc nodes etc and connects them appropriately
--> 359         self.create_model()
    360 
    361     def __getstate__(self):

~/anaconda/envs/lang-dec/lib/python3.5/site-packages/kabuki/hierarchical.py in create_model(self, max_retries)
    437         else:
    438             print("After %f retries, still no good fit found." %(tries))
--> 439             _create()
    440 
    441         # create node container

~/anaconda/envs/lang-dec/lib/python3.5/site-packages/kabuki/hierarchical.py in _create()
    427         def _create():
    428             for knode in self.knodes:
--> 429                 knode.create()
    430 
    431         for tries in range(max_retries):

~/anaconda/envs/lang-dec/lib/python3.5/site-packages/kabuki/hierarchical.py in create(self)
    166                 kwargs['doc'] = node_name
    167 
--> 168             node = self.create_node(node_name, kwargs, grouped_data)
    169 
    170             if node is not None:

~/anaconda/envs/lang-dec/lib/python3.5/site-packages/kabuki/hierarchical.py in create_node(self, node_name, kwargs, data)
    174     def create_node(self, node_name, kwargs, data):
    175         #actually create the node
--> 176         return self.pymc_node(name=node_name, **kwargs)
    177 
    178     def create_tag_and_subj_idx(self, cols, uniq_elem):

~/anaconda/envs/lang-dec/lib/python3.5/site-packages/pymc/distributions.py in __init__(self, *args, **kwds)
    318                     logp_partial_gradients=logp_partial_gradients,
    319                     dtype=dtype,
--> 320                     **arg_dict_out)
    321 
    322     new_class.__name__ = name

~/anaconda/envs/lang-dec/lib/python3.5/site-packages/pymc/PyMCObjects.py in __init__(self, logp, doc, name, parents, random, trace, value, dtype, rseed, observed, cache_depth, plot, verbose, isdata, check_logp, logp_partial_gradients)
    773         if check_logp:
    774             # Check initial value
--> 775             if not isinstance(self.logp, float):
    776                 raise ValueError(
    777                     "Stochastic " +

~/anaconda/envs/lang-dec/lib/python3.5/site-packages/pymc/PyMCObjects.py in get_logp(self)
    930                     (self._value, self._parents.value))
    931             else:
--> 932                 raise ZeroProbability(self.errmsg)
    933 
    934         return logp

ZeroProbability: Stochastic wfpt(18439.control).1.5451359127328033's value is outside its support,
 or it forbids its parents' current values.