In [1]:
"""
Environment setup
"""
%matplotlib inline
%cd /lang_dec
import warnings; warnings.filterwarnings('ignore')
import hddm
import numpy as np
import matplotlib.pyplot as plt
from utils import model_tools
In [2]:
# Import patient data (as pandas dataframe)
patients_data = hddm.load_csv('/lang_dec/data/patients_clean.csv')
In [5]:
us = patients_data.loc[patients_data['stim'] == 'US']
ss = patients_data.loc[patients_data['stim'] == 'SS']
cp = patients_data.loc[patients_data['stim'] == 'CP']
cs = patients_data.loc[patients_data['stim'] == 'CS']
plt.boxplot([ss.rt.values, cp.rt.values, cs.rt.values, us.rt.values],
labels=('SS', 'CP', 'CS', 'US'),)
plt.title('Comparison of Reaction Time Differences Between Stimuli Groups')
plt.show()
In [6]:
ss_accuracy = (len([x for x in ss.response.values if x >= 1]) / len(ss.response.values)) * 100
cp_accuracy = (len([x for x in cp.response.values if x >= 1]) / len(cp.response.values)) * 100
cs_accuracy = (len([x for x in cs.response.values if x >= 1]) / len(cs.response.values)) * 100
us_accuracy = (len([x for x in us.response.values if x >= 1]) / len(us.response.values)) * 100
print("SS Accuracy: " + str(ss_accuracy) + "%")
print("CP Accuracy: " + str(cp_accuracy) + "%")
print("CS Accuracy: " + str(cs_accuracy) + "%")
print("US Accuracy: " + str(us_accuracy) + "%")
In [7]:
plt.bar([1,2,3,4],
[ss_accuracy, cp_accuracy, cs_accuracy, us_accuracy])
Out[7]:
In [10]:
"""
Plot Drift Diffusion Model for controls
"""
patients_model = hddm.HDDM(patients_data, depends_on={'v': 'stim'}, bias=True)
patients_model.find_starting_values()
patients_model.sample(9000, burn=200, dbname='language_decision/models/patients', db='txt')
Out[10]:
In [11]:
patients_model.plot_posteriors()
PASS - No problematic patterns, such as drifts or large jumps, can be in any of the traces above. Autocorrelation also drops to zero quite quickly when considering past samples - which is what we want.
We can also formally test for model convergence using the Gelman-Rubin R statistic$^2$, which compares the within- and between-chain variance of different runs of the same model; models converge if variables are between $0.98$ and $1.02$. A simple algorithm to check this is outlined below:
In [10]:
models = []
for i in range(5):
m = hddm.HDDM(patients_data, depends_on={'v': 'stim'})
m.find_starting_values()
m.sample(6000, burn=20)
models.append(m)
model_tools.check_convergence(models)
Out[10]:
PASS - Formal testing reveals no convergence problems; Gelman-Rubin R statistic values for all model variables fall within the desired range ($0.98$ to $1.02$)
In [12]:
patients_stats = patients_model.gen_stats()
print("Threshold (a) Mean: " + str(patients_stats['mean']['a']) + " (std: " + str(patients_stats['std']['a']) + ")")
print("Non-Decision (t) Mean: " + str(patients_stats['mean']['t']) + " (std: " + str(patients_stats['std']['t']) + ")")
print("Bias (z) Mean: " + str(patients_stats['mean']['z']) + " (std: " + str(patients_stats['std']['z']) + ")")
print("SS Mean Drift Rate: " + str(patients_stats['mean']['v(SS)']) + " (std: " + str(patients_stats['std']['v(SS)']) + ")")
print("CP Mean Drift Rate: " + str(patients_stats['mean']['v(CP)']) + " (std: " + str(patients_stats['std']['v(CP)']) + ")")
print("CS Mean Drift Rate: " + str(patients_stats['mean']['v(CS)']) + " (std: " + str(patients_stats['std']['v(CS)']) + ")")
print("US Mean Drift Rate: " + str(patients_stats['mean']['v(US)']) + " (std: " + str(patients_stats['std']['v(US)']) + ")")
In [26]:
v_SS, v_CP, v_CS, v_US = patients_model.nodes_db.node[['v(SS)', 'v(CP)', 'v(CS)', 'v(US)']]
hddm.analyze.plot_posterior_nodes([v_SS, v_CP, v_CS, v_US])
In [14]:
print('P(SS > US) = ' + str((v_SS.trace() > v_US.trace()).mean()))
print('P(CP > SS) = ' + str((v_CP.trace() > v_SS.trace()).mean()))
print('P(CS > SS) = ' + str((v_CS.trace() > v_SS.trace()).mean()))
print('P(CP > CS) = ' + str((v_CP.trace() > v_CS.trace()).mean()))
print('P(CP > US) = ' + str((v_CP.trace() > v_US.trace()).mean()))
print('P(CS > US) = ' + str((v_CS.trace() > v_US.trace()).mean()))
In [15]:
"""
Distribution for the non-decision time t
"""
time_nondec = patients_model.nodes_db.node[['t']]
hddm.analyze.plot_posterior_nodes(time_nondec)
In [16]:
"""
Distribution of bias z
"""
z = patients_model.nodes_db.node[['z']]
hddm.analyze.plot_posterior_nodes(z)
Threshold (or a) describes the relative difference in the distance between the upper and lower response boundaries of the DDM.
We explore whether stimulus type affects the threshold / distance between the two boundaries
In [3]:
patients_model_threshold = hddm.HDDM(patients_data, depends_on={'v': 'stim', 'a': 'stim'}, bias=True)
patients_model_threshold.find_starting_values()
patients_model_threshold.sample(10000, burn=200, dbname='language_decision/models/patients_threshold', db='txt')
Out[3]:
In [23]:
models_threshold = []
for i in range(5):
m = hddm.HDDM(patients_data, depends_on={'v': 'stim', 'a': 'stim'})
m.find_starting_values()
m.sample(6000, burn=20)
models_threshold.append(m)
model_tools.check_convergence(models_threshold)
Out[23]:
In [18]:
a_SS, a_CP, a_CS, a_US = patients_model_threshold.nodes_db.node[['a(SS)', 'a(CP)', 'a(CS)', 'a(US)']]
hddm.analyze.plot_posterior_nodes([a_SS, a_CP, a_CS, a_US])
In [19]:
print('P(SS > US) = ' + str((a_SS.trace() > a_US.trace()).mean()))
print('P(SS > CS) = ' + str((a_SS.trace() > a_CS.trace()).mean()))
print('P(CP > SS) = ' + str((a_CP.trace() > a_SS.trace()).mean()))
print('P(CP > CS) = ' + str((a_CP.trace() > a_CS.trace()).mean()))
print('P(CP > US) = ' + str((a_CP.trace() > a_US.trace()).mean()))
print('P(CS > US) = ' + str((a_CS.trace() > a_US.trace()).mean()))
In [20]:
print("a(US) mean: " + str(a_US.trace().mean()))
print("a(SS) mean: " + str(a_SS.trace().mean()))
print("a(CS) mean: " + str(a_CS.trace().mean()))
print("a(CP) mean: " + str(a_CP.trace().mean()))
In [4]:
patients_model_lumped = hddm.HDDM(patients_data)
patients_model_lumped.find_starting_values()
patients_model_lumped.sample(10000, burn=200, dbname='language_decision/models/patients_lumped', db='txt')
Out[4]:
In [5]:
patients_model_lumped.plot_posteriors()