In [1]:
"""
Environment setup
"""
%matplotlib inline
%cd /lang_dec
import warnings; warnings.filterwarnings('ignore')
import hddm
import numpy as np
import matplotlib.pyplot as plt
from utils import model_tools
In [2]:
# Import pilot subject data (as pandas dataframe)
pilot_data = hddm.load_csv('/lang_dec/data/pilot_clean.csv')
In [19]:
us = pilot_data.loc[pilot_data['stim'] == 'US']
ss = pilot_data.loc[pilot_data['stim'] == 'SS']
cp = pilot_data.loc[pilot_data['stim'] == 'CP']
cs = pilot_data.loc[pilot_data['stim'] == 'CS']
plt.boxplot([ss.rt.values, cp.rt.values, cs.rt.values, us.rt.values],
labels=('SS', 'CP', 'CS', 'US'),)
plt.title('Comparison of Reaction Time Differences Between Stimuli Groups')
plt.show()
In [83]:
ss_accuracy = (len([x for x in ss.response.values if x >= 1]) / len(ss.response.values)) * 100
cp_accuracy = (len([x for x in cp.response.values if x >= 1]) / len(cp.response.values)) * 100
cs_accuracy = (len([x for x in cs.response.values if x >= 1]) / len(cs.response.values)) * 100
us_accuracy = (len([x for x in us.response.values if x >= 1]) / len(us.response.values)) * 100
print("SS Accuracy: " + str(ss_accuracy) + "%")
print("CP Accuracy: " + str(cp_accuracy) + "%")
print("CS Accuracy: " + str(cs_accuracy) + "%")
print("US Accuracy: " + str(us_accuracy) + "%")
In [117]:
plt.bar([1,2,3,4],
[ss_accuracy, cp_accuracy, cs_accuracy, us_accuracy])
Out[117]:
In [23]:
"""
Construct a model where the drift rate depends on the stimulus type
"""
pilot_model = hddm.HDDM(pilot_data, depends_on={'v': 'stim'}, bias=True)
pilot_model.find_starting_values()
pilot_model.sample(9000, burn=200, dbname='language_decision/models/pilot', db='txt')
Out[23]:
In [136]:
pilot_model.plot_posteriors()
PASS - No problematic patterns, such as drifts or large jumps, can be in any of the traces above. Autocorrelation also drops to zero quite quickly when considering past samples - which is what we want.
We can also formally test for model convergence using the Gelman-Rubin R statistic$^2$, which compares the within- and between-chain variance of different runs of the same model; models converge if variables are between $0.98$ and $1.02$. A simple algorithm to check this is outlined below:
In [153]:
models = []
for i in range(100):
m = hddm.HDDM(pilot_data, depends_on={'v': 'stim'}, bias=True)
m.find_starting_values()
m.sample(6000, burn=20)
models.append(m)
model_tools.check_convergence(models)
Out[153]:
PASS - Formal testing reveals no convergence problems; Gelman-Rubin R statistic values for all model variablesfall within the desired range ($0.98$ to $1.02$)
In [137]:
pilot_stats = pilot_model.gen_stats()
print("Threshold (a) Mean: " + str(pilot_stats['mean']['a']) + " (std: " + str(pilot_stats['std']['a']) + ")")
print("Non-Decision (t) Mean: " + str(pilot_stats['mean']['t']) + " (std: " + str(pilot_stats['std']['t']) + ")")
print("Bias (z) Mean: " + str(pilot_stats['mean']['z']) + " (std: " + str(pilot_stats['std']['z']) + ")")
print("SS Mean Drift Rate: " + str(pilot_stats['mean']['v(SS)']) + " (std: " + str(pilot_stats['std']['v(SS)']) + ")")
print("CP Mean Drift Rate: " + str(pilot_stats['mean']['v(CP)']) + " (std: " + str(pilot_stats['std']['v(CP)']) + ")")
print("CS Mean Drift Rate: " + str(pilot_stats['mean']['v(CS)']) + " (std: " + str(pilot_stats['std']['v(CS)']) + ")")
print("US Mean Drift Rate: " + str(pilot_stats['mean']['v(US)']) + " (std: " + str(pilot_stats['std']['v(US)']) + ")")
In [138]:
v_SS, v_CP, v_CS, v_US = pilot_model.nodes_db.node[['v(SS)', 'v(CP)', 'v(CS)', 'v(US)']]
hddm.analyze.plot_posterior_nodes([v_SS, v_CP, v_CS, v_US])
Estimating the model in a Bayesian framework allows us to test for significance directly on the posterior, rather than having to rely on conventional frequency statistical tests:
In [139]:
print('P(SS > US) = ' + str((v_SS.trace() > v_US.trace()).mean()))
print('P(CP > SS) = ' + str((v_CP.trace() > v_SS.trace()).mean()))
print('P(CS > SS) = ' + str((v_CS.trace() > v_SS.trace()).mean()))
print('P(CP > CS) = ' + str((v_CP.trace() > v_CS.trace()).mean()))
In [140]:
"""
Distribution for the non-decision time t
"""
time_nondec = pilot_model.nodes_db.node[['t']]
hddm.analyze.plot_posterior_nodes(time_nondec)
In [141]:
"""
Distribution of bias z
"""
z = pilot_model.nodes_db.node[['z']]
hddm.analyze.plot_posterior_nodes(z)
Threshold (or a) describes the relative difference in the distance between the upper and lower response boundaries of the DDM.
We explore whether stimulus type affects the threshold / distance between the two boundaries
In [3]:
pilot_model_threshold = hddm.HDDM(pilot_data, depends_on={'v': 'stim', 'a': 'stim'}, bias=True)
pilot_model_threshold.find_starting_values()
pilot_model_threshold.sample(10000, burn=200, dbname='language_decision/models/pilot_threshold', db='txt')
Out[3]:
In [61]:
models_threshold = []
for i in range(5):
m = hddm.HDDM(pilot_subjects, depends_on={'v': 'stim', 'a': 'stim'})
m.find_starting_values()
m.sample(6000, burn=20)
models_threshold.append(m)
model_tools.check_convergence(models_threshold)
Out[61]:
In [150]:
a_SS, a_CP, a_CS, a_US = pilot_model_threshold.nodes_db.node[['a(SS)', 'a(CP)', 'a(CS)', 'a(US)']]
hddm.analyze.plot_posterior_nodes([a_SS, a_CP, a_CS, a_US])
In [151]:
print('P(SS > US) = ' + str((a_SS.trace() > a_US.trace()).mean()))
print('P(CP > SS) = ' + str((a_CP.trace() > a_SS.trace()).mean()))
print('P(CS > SS) = ' + str((a_CS.trace() > a_SS.trace()).mean()))
print('P(CP > CS) = ' + str((a_CP.trace() > a_CS.trace()).mean()))
print('P(CS > US) = ' + str((a_CS.trace() > a_US.trace()).mean()))
No significant differences in threshold found across the different groups.
In [152]:
print("a(US) mean: " + str(a_US.trace().mean()))
print("a(SS) mean: " + str(a_SS.trace().mean()))
print("a(CS) mean: " + str(a_CS.trace().mean()))
print("a(CP) mean: " + str(a_CP.trace().mean()))
In [4]:
pilot_model_lumped = hddm.HDDM(pilot_data)
pilot_model_lumped.find_starting_values()
pilot_model_lumped.sample(10000, burn=200, dbname='language_decision/models/pilot_lumped', db='txt')
Out[4]:
In [5]:
pilot_model_lumped.plot_posteriors()