Event Info Generation

The cells in this notebook perform two steps related to processing the log files written by Dharsh's PsychToolBox code into a format we directly use in the analysis notebook.

First, read each functional run's .mat file and extract the relevant information. Combine the runs and write a master .csv file for each subject that we will use for all subsequent processing (i.e. the .mat files are never touched again). This file will be read directly by the analysis notebook and used for behavioral and fMRI analyses.

Additionally, we will extract more specific information related to "decoding problems" that will be used, behind the scenes, by the lyman code in the analysis notebook that builds datasets for decoding analyses. This information is saved in event files that contain at least the following fields:

  • run
  • condition
  • onset

Further fields can be used for confound regression -- in the code below, we save the rt (reaction time) fields for this purpose.


In [ ]:
import os
import os.path as op
import numpy as np
import pandas as pd
from scipy.io import loadmat
import lyman

In [ ]:
subjects = lyman.determine_subjects()
project = lyman.gather_project_info()
data_dir = project["data_dir"]
anal_dir = project["analysis_dir"]
behav_temp = op.join(data_dir, "%s/behav/behav_data.csv")
event_temp = op.join(data_dir, "%s/design/%s.csv")
artifact_temp = op.join(anal_dir, "dksort/%s/preproc/run_%d/artifacts.csv")

dim_rules = ["shape", "color", "pattern"]
dec_rules = ["same", "different"]

sched_columns = ["run", "condition", "onset", "rt"]

In [ ]:
for subj in subjects:
    if not op.exists(op.join(data_dir, subj, "design")):
        os.mkdir(op.join(data_dir, subj, "design"))

In [ ]:
def dimension_rules(subj):

    subj_df = pd.read_csv(behav_temp % subj)
    sched = pd.DataFrame(columns=sched_columns, index=subj_df.index)
    subj_df = subj_df[subj_df["clean"] & subj_df["correct"]]
    sched.update(subj_df)
    sched["condition"] = subj_df.dim_rule
    sched["onset"] = subj_df.stim_time
    sched = sched.dropna()
    sched.to_csv(event_temp % (subj, "dimension"), index=False)

In [ ]:
def parse_ptb_files(subj):

    data = dict(run=[], rt=[], congruent=[],
                dim_rule=[], dec_rule=[],
                dim_shift=[], dec_shift=[],
                dim_stay=[], dec_stay=[],
                dim_shift_lag=[], dec_shift_lag=[],
                attend_match=[], distract_match=[],
                shape1=[], shape2=[],
                color1=[], color2=[],
                pattern1=[], pattern2=[],
                answer=[], made_resp=[], correct=[],
                stim_time=[], clean=[], block_pos=[])

    for r in range(1, 5):
        mat_file = op.join(data_dir, subj,
                           "behav/%s_run%d.mat" % (subj, r))
        mat = loadmat(mat_file, squeeze_me=True)
        events = mat["theData"]
        
        n_trials = len(events["RT"][()])

        data["rt"].append(events["RT"][()])
        data["answer"].append(events["keypress"][()])
        data["made_resp"].append(events["keypress"][()] > 0)
        data["stim_time"].append(events["stim1_onset"][()] - 12)
        data["correct"].append(events["score"][()] > 0)
        data["block_pos"].append(np.tile(range(3), n_trials / 3))
        data["run"].append(np.ones(n_trials, int) * r)

        stim_vols = np.round(data["stim_time"][-1] / 2)
        artifacts = pd.read_csv(artifact_temp % (subj, r)).max(axis=1).values
        art_vols = np.unique(np.argwhere(artifacts)[:, None] + np.arange(-1, 5))
        clean = np.logical_not(np.in1d(stim_vols, art_vols))
        data["clean"].append(clean)

        sched = mat["sess_trials"]

        matches = sched[:n_trials, 2:] - 1
        data["congruent"].append(~np.logical_xor(*matches.T))
        data["attend_match"].append(matches[:, 0] == 0)
        data["distract_match"].append(matches[:, 1] == 0)

        for idx, rule in enumerate(["dim", "dec"]):
            rule_key = sched[:n_trials, idx] - 1
            rule_shift = np.ones(len(rule_key), bool)
            rule_shift[1:] = rule_key[1:] != rule_key[:-1]
            data["%s_rule" % rule].append(rule_key)
            data["%s_shift" % rule].append(rule_shift)
            rule_stay = np.logical_and(data["block_pos"][-1] == 0,
                                       np.logical_not(rule_shift))
            data["%s_stay" % rule].append(rule_stay)

            lag = []
            for i, shift_i in enumerate(rule_shift):
                if shift_i:
                    lag.append(0)
                else:
                    lag.append(i - np.argwhere(rule_shift[:i]).max())
            data["%s_shift_lag" % rule].append(lag)
        
        stims = mat["trial"][r - 1]["stim"]
        for j, dim in enumerate(dim_rules):
            for t in [1, 2]:
                data["%s%d" % (dim, t)].append(stims[j][:72, t - 1].astype(int))

    data = {k: np.concatenate(v) for k, v in data.iteritems()}
    df = pd.DataFrame(data)
    df["subj"] = subj
    df["dim_rule"] = df.dim_rule.map(dict(enumerate(dim_rules)))
    df["dec_rule"] = df.dec_rule.map(dict(enumerate(dec_rules)))
    df["answer"] = df.answer.map({1: "no", 2: "yes"})
    for dim in dim_rules:
        df["%s_match" % dim] = df["%s1" % dim] == df["%s2" % dim]
    
    csv_file = op.join(data_dir, subj, "behav/behav_data.csv")
    df.to_csv(csv_file, index_label="trial")

In [ ]:
def decision_rules(subj):

    subj_df = pd.read_csv(behav_temp % subj)
    sched = pd.DataFrame(columns=sched_columns, index=subj_df.index)
    subj_df = subj_df[subj_df["clean"] & subj_df["correct"]]
    sched.update(subj_df)
    sched["condition"] = subj_df.dec_rule
    sched["onset"] = subj_df.stim_time
    sched = sched.dropna()
    sched.to_csv(event_temp % (subj, "decision"), index=False)

In [ ]:
def dimension_cues(subj):

    subj_df = pd.read_csv(behav_temp % subj)
    sched = pd.DataFrame(columns=sched_columns, index=subj_df.index)
    subj_df = subj_df[subj_df["clean"] & subj_df["correct"]]
    subj_df = subj_df[subj_df.block_pos == 0]
    sched.update(subj_df)
    sched["condition"] = subj_df.dim_rule
    sched["onset"] = subj_df.stim_time - 12
    sched = sched.dropna()
    sched.to_csv(event_temp % (subj, "dimension_cue"), index=False)

In [ ]:
for subj in subjects:
    parse_ptb_files(subj)
    dimension_rules(subj)
    decision_rules(subj)
    dimension_cues(subj)