In [54]:
import pandas as pd
import numpy as np
import os
import pickle as pkl
from pprint import pprint
import re
In [83]:
fn = 'DEBUG_1_2017-10-07_12.05.54_outputDict.pickle'
pp_num = fn.split('_')[1]
with open(fn, 'rb') as f:
outDict = pkl.load(f)
events = outDict['eventArray']
# Also load prepared design
prepped_design = pd.read_csv('../designs/pp_%s/all_blocks/trials.csv' % str(pp_num).zfill(3))
In [84]:
dat = pd.DataFrame(columns=['trial_ID', 'trial_start_time', 'stimulus_onset_time'])
dat['trial_ID'] = prepped_design['trial_ID']
dat['block_trial_ID'] = prepped_design['block_trial_ID']
# Extract using regex
re1 = re.compile('trial (?P<trial_ID>[0-9]*) started at (?P<start_time>[0-9]+.[0-9]+)')
re2 = re.compile('trial (?P<trial_ID>[0-9]*) phase (?P<phase_num>[0-9]+) started at (?P<start_time>[0-9]+.[0-9]+)')
for trial_n, trial in enumerate(events):
for trial_event_n, trial_event in enumerate(trial):
# Lists are events: keypressed, saccades, etc. Non-lists are trial timings
if not isinstance(trial_event, list):
# Check for trial start
if re1.match(trial_event) is not None:
d = re1.match(trial_event).groupdict()
dat.loc[dat['trial_ID'] == int(d['trial_ID']), 'trial_start_time'] = float(d['start_time'])
# Check for phase start
elif re2.match(trial_event) is not None:
d = re2.match(trial_event).groupdict()
dat.loc[dat['trial_ID'] == int(d['trial_ID']), 'phase_' + d['phase_num'] + '_start'] = float(d['start_time'])
In [85]:
# Drop all rows without a trial start time [instructions etc]
dat = dat.dropna(axis=0, subset=['trial_start_time']).reset_index(drop=True)
# The stimulus onset time is phase_4_start
dat['stimulus_onset_time'] = dat['phase_4_start'].copy()
# The cure onset time is phase_2_start
dat['cue_onset_time'] = dat['phase_2_start']
# The start of phase_0 is the recorded 'trial start time': the trial code starts, but waits for scanner pulse
dat['phase_0_start'] = dat['trial_start_time'].copy()
# The actual trial start time is the start time of phase 1: the pre-cue fixation cross is shown.
dat['trial_start_time'] = dat['phase_1_start']
# Get start time of first trial: this is the phase_1 start of the first trial
t0 = dat['phase_1_start'].min()
for col in dat.columns:
if 'phase' in col or 'time' in col:
dat[col] = dat[col] - t0
# Calculate actual durations of each phase: this is the start time of the 'next' phase minus the own start time
dat['phase_6'] = dat['phase_7_start'] - dat['phase_6_start']
dat['phase_5'] = dat['phase_6_start'] - dat['phase_5_start']
dat['phase_4'] = dat['phase_5_start'] - dat['phase_4_start']
dat['phase_3'] = dat['phase_4_start'] - dat['phase_3_start']
dat['phase_2'] = dat['phase_3_start'] - dat['phase_2_start']
dat['phase_1'] = dat['phase_2_start'] - dat['phase_1_start']
dat['phase_0'] = dat['phase_1_start'] - dat['phase_0_start']
dat[['phase_' + str(x) for x in range(0,7)] + ['trial_start_time', 'stimulus_onset_time']]
Out[85]:
In [94]:
dat['trial_start_time_block'] = dat['trial_start_time']
dat['stimulus_onset_time_block'] = dat['stimulus_onset_time']
dat['cue_onset_time_block'] = dat['cue_onset_time']
new_block_rows = np.where(dat['block_trial_ID']==0)
# Loop over blocks
for i in range(len(new_block_rows)):
idx_start = new_block_rows[i]
idx_end = new_block_rows[i+1] if i < (len(new_block_rows)-1) else dat.shape[0]
print('Correcting row idx: %d-%d' % (idx_start, idx_end))
t0_this_block = dat.loc[dat['trial_ID'] == idx_start, 'trial_start_time'].values[0]
dat.loc[idx_start:(idx_end-1), 'trial_start_time_block'] = dat.loc[idx_start:(idx_end-1), 'trial_start_time_block'] - t0_this_block
dat.loc[idx_start:(idx_end-1), 'cue_onset_time_block'] = dat.loc[idx_start:(idx_end-1), 'cue_onset_time_block'] - t0_this_block
dat.loc[idx_start:(idx_end-1), 'stimulus_onset_time_block'] = dat.loc[idx_start:(idx_end-1), 'stimulus_onset_time_block'] - t0_this_block
In [101]:
compare_cols = ['trial_start_time_block', 'cue_onset_time_block', 'stimulus_onset_time_block']
dat[compare_cols] - prepped_design[compare_cols]
Out[101]: