Localizer Behavioral Data & Onset Generation


In [1]:
import pandas as pd
import moss
from scipy import stats
import scipy as sp
import seaborn as sns
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os.path as op
import os

# for plotting
sns.set(style='ticks', context='poster', font_scale=1.3)
%matplotlib inline

# import rpy2.rinterface as ri
# ri.set_initoptions(('rpy2', '--quiet', '--vanilla', '--no-save'))
# ri.initr()

# # R for stats
# %load_ext rpy2.ipython

In [2]:
# %R require(lme4)
# %R require(lmerTest)

Gather experiment info


In [3]:
dirs = dict()
dirs['basedir'] = op.join(op.expanduser('~'), 'Experiments/localizer')
dirs['datadir'] = op.join(dirs['basedir'], 'data/')
dirs['analydir'] = op.join(dirs['basedir'], 'analysis')
dirs['subj_info_file'] = op.join(dirs['datadir'], 'subj_info.csv')

In [4]:
subj_info = pd.read_csv(dirs['subj_info_file'])
subj_info = subj_info[pd.isnull(subj_info.remove)]
subj_info


Out[4]:
subid group remove
0 loc100 control NaN
1 loc101 control NaN
2 loc102 control NaN
3 loc103 control NaN
4 loc104 control NaN
5 loc105 control NaN
6 loc107 control NaN
7 loc108 control NaN
8 loc109 control NaN
9 loc110 control NaN
10 loc111 control NaN
11 loc113 control NaN
12 loc114 control NaN
13 loc115 control NaN
14 loc116 control NaN
15 loc117 control NaN
16 loc118 control NaN
17 loc119 control NaN
18 loc120 control NaN
19 loc121 control NaN
20 loc122 control NaN
21 loc158 control NaN
22 loc150 stress NaN
23 loc151 stress NaN
24 loc152 stress NaN
25 loc153 stress NaN
26 loc154 stress NaN
27 loc155 stress NaN
28 loc156 stress NaN
29 loc157 stress NaN
30 loc159 stress NaN
31 loc160 stress NaN
32 loc161 stress NaN
33 loc162 stress NaN
34 loc163 stress NaN
35 loc164 stress NaN
36 loc165 stress NaN
37 loc166 stress NaN
38 loc167 stress NaN
39 loc168 stress NaN
40 loc169 stress NaN
41 loc170 stress NaN
42 loc172 stress NaN
43 loc171 stress NaN
44 loc173 stress NaN
45 loc174 stress NaN

Load in data


In [5]:
dl = pd.DataFrame()

for subid in subj_info.subid:
    print subid
    
    # add study file
    study_file = op.join(dirs['datadir'], subid, subid + '_behav_localizer.csv')
    d = pd.read_csv(study_file)
    d['subid'] = subid
    dl = dl.append(d, ignore_index=True)
    
# Merge with subj_info
dl = dl.merge(subj_info, on='subid', how='outer')


loc100
loc101
loc102
loc103
loc104
loc105
loc107
loc108
loc109
loc110
loc111
loc113
loc114
loc115
loc116
loc117
loc118
loc119
loc120
loc121
loc122
loc158
loc150
loc151
loc152
loc153
loc154
loc155
loc156
loc157
loc159
loc160
loc161
loc162
loc163
loc164
loc165
loc166
loc167
loc168
loc169
loc170
loc172
loc171
loc173
loc174

In [6]:
dl.head()


Out[6]:
index run trial onset duration cond subcond resp acc respRT ISIresp ISIacc ISIrespRT subid group remove
0 1 1 1 12.0068 1.4946 object manmade NR 0 999 manmade 1 0.3153 loc100 control NaN
1 2 1 2 13.9136 1.0869 object manmade NR 0 999 manmade 1 0.3784 loc100 control NaN
2 3 1 3 15.0290 1.4716 object natural NR 0 999 natural 1 0.1792 loc100 control NaN
3 4 1 4 16.5293 1.4712 object manmade NR 0 999 manmade 1 0.1867 loc100 control NaN
4 5 1 5 18.0304 1.4701 object natural NR 0 999 natural 1 0.0906 loc100 control NaN

In [7]:
len(dl.subid.unique())


Out[7]:
46

In [8]:
dl.groupby(['subid', 'group']).mean().reset_index().groupby('group').count().subid


Out[8]:
group
control    22
stress     24
Name: subid, dtype: int64

Deal with no responses (NR): Set ISI resp as resp


In [9]:
dl.loc[dl.resp == 'NR', 'respRT'] = dl.loc[dl.resp == 'NR', 'ISIrespRT'] + .5 # adjust for stimTime
dl.loc[dl.resp == 'NR', 'acc'] = dl.loc[dl.resp == 'NR', 'ISIacc']
dl.loc[dl.resp == 'NR', 'resp'] = dl.loc[dl.resp == 'NR', 'ISIresp']

drop_cols = ['ISIrespRT', 'ISIresp', 'ISIacc', 'remove', 'index']
dl.drop(drop_cols, 1, inplace=True)

# Correct duration of image trials by subtracting ITI (1s fix)
dl['duration_adj'] = dl.duration
dl.ix[dl.cond != 'rest', 'duration_adj'] = dl.ix[dl.cond != 'rest', 'duration_adj'] - 1 # subtract ITI

In [10]:
dl.head()


Out[10]:
run trial onset duration cond subcond resp acc respRT subid group duration_adj
0 1 1 12.0068 1.4946 object manmade manmade 1 0.8153 loc100 control 0.4946
1 1 2 13.9136 1.0869 object manmade manmade 1 0.8784 loc100 control 0.0869
2 1 3 15.0290 1.4716 object natural natural 1 0.6792 loc100 control 0.4716
3 1 4 16.5293 1.4712 object manmade manmade 1 0.6867 loc100 control 0.4712
4 1 5 18.0304 1.4701 object natural natural 1 0.5906 loc100 control 0.4701

Remove time for lead-in (12 s)


In [11]:
dl['onset_adj'] = dl.onset - 12
dl.head()


Out[11]:
run trial onset duration cond subcond resp acc respRT subid group duration_adj onset_adj
0 1 1 12.0068 1.4946 object manmade manmade 1 0.8153 loc100 control 0.4946 0.0068
1 1 2 13.9136 1.0869 object manmade manmade 1 0.8784 loc100 control 0.0869 1.9136
2 1 3 15.0290 1.4716 object natural natural 1 0.6792 loc100 control 0.4716 3.0290
3 1 4 16.5293 1.4712 object manmade manmade 1 0.6867 loc100 control 0.4712 4.5293
4 1 5 18.0304 1.4701 object natural natural 1 0.5906 loc100 control 0.4701 6.0304

In [12]:
durations = dl.onset - dl.onset.shift(1)
durations[durations > -5].hist(bins=20)
sns.distplot(durations[durations > -5])


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x10b844f50>

In [13]:
sns.distplot(dl[dl.duration_adj < 5].duration)


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x12131dd10>

In [ ]:

Switch Costs


In [14]:
dl_switch = dl[dl.cond != 'rest']
dl_switch['switch'] = dl_switch.cond ==  dl_switch.cond.shift(1)
dl_switch.switch = dl_switch.switch.apply(lambda x: 0 if x == True else 1)

for increment in [1, 2, 3, 4, 5]:
    dl_switch.switch = np.nansum([dl_switch.switch, 
                                  dl_switch.switch.apply(lambda x: 0 if x < increment else x+1).shift(1)], axis=0)


/Users/steph-backup/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
/Users/steph-backup/anaconda/lib/python2.7/site-packages/pandas/core/generic.py:2387: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value

In [15]:
dl_resp = dl_switch[(dl_switch.resp != 'NR') & (dl_switch.switch > 0)]
means = dl_resp.groupby(['subid', 'switch', 'group', 'cond', 'subcond', 'acc']).mean().reset_index()

sns.factorplot(x='switch', hue='group', y='respRT', col='cond', 
               units='subid', ci=68, dodge=.1,palette=['dodgerblue', 'orange'],
               data=means.query('acc == 1'))


Out[15]:
<seaborn.axisgrid.FacetGrid at 0x10b856110>

In [16]:
dl_resp = dl_switch[(dl_switch.resp != 'NR') & (dl_switch.switch > 0)]
means = dl_resp.groupby(['subid', 'switch', 'group', 'acc']).mean().reset_index()
means.switch = means.switch.astype(int)

g = sns.factorplot(x='switch', hue='group', y='respRT', aspect=1.5,
               units='subid', ci=68, dodge=.1, palette=['dodgerblue', 'orange'],
               data=means.query('acc == 1'))
g.set_xlabels('trial relative to switch')
g.set_ylabels('RT (s)')

plt.savefig('/Users/steph-backup/Dropbox/Stanford/Presentations/AP/behav_loc_RTs_switch_acc.png')


Analyze RTs


In [17]:
dl_resp = dl[(dl.resp != 'NR') & (dl.cond != 'rest')]
means = dl_resp.groupby(['subid', 'group', 'cond', 'subcond', 'acc']).mean().reset_index()

sns.factorplot(x='cond', hue='acc', y='respRT', col='group',
               units='subid', ci=68, dodge=.1,
               data=means)


Out[17]:
<seaborn.axisgrid.FacetGrid at 0x122799f90>

In [18]:
dl_resp = dl[(dl.resp != 'NR') & (dl.cond != 'rest')]
means = dl_resp.groupby(['subid', 'group', 'cond', 'subcond', 'acc']).mean().reset_index()

sns.factorplot(x='subcond', aspect=1.5,
               order=['female', 'male', 'manmade', 'natural', 'indoor', 'outdoor'],
               hue='acc', y='respRT', col='group',
               units='subid', ci=68, dodge=.1,
               data=means)


Out[18]:
<seaborn.axisgrid.FacetGrid at 0x121608650>

Analyze Accuracy


In [19]:
dl_resp = dl[(dl.cond != 'rest')]
means = dl_resp.groupby(['subid', 'group', 'cond', 'subcond']).mean().reset_index()

sns.factorplot(x='cond', y='acc', hue='group',
               units='subid', ci=68, dodge=.1, aspect=1.2,
               data=means, palette=['dodgerblue', 'orange'])
sns.despine(trim=True)

plt.savefig('/Users/steph-backup/Dropbox/Stanford/Presentations/AP/behav_loc_acc.png')



In [20]:
dl_resp = dl[(dl.cond != 'rest')]
%R -i dl_resp


ERROR:root:Line magic function `%R` not found.

In [21]:
%%R

print(str(dl_resp))
contrasts(dl_resp$group) = c(1,-1); print(contrasts(dl_resp$group))
contrasts(dl_resp$cond) = cbind(placeVSother = c(1,1,-2), faceVSobj=c(1,-1,0)); print(contrasts(dl_resp$cond))

res = lmer(acc ~ group * cond + (1 | subid), data=dl_resp)


ERROR:root:Cell magic `%%R` not found.

In [ ]:


In [22]:
dl_resp = dl[(dl.cond != 'rest')]
means = dl_resp.groupby(['subid', 'group', 'cond', 'subcond']).mean().reset_index()

sns.factorplot(x='subcond', aspect=1.5,
               x_order=['female', 'male', 'manmade', 'natural', 'indoor', 'outdoor'],
               y='acc', hue='group', palette=['dodgerblue', 'orange'],
               units='subid', ci=68, dodge=.1,
               data=means)


/Users/steph-backup/anaconda/lib/python2.7/site-packages/seaborn-0.8.dev0-py2.7.egg/seaborn/categorical.py:3304: UserWarning: The `x_order` parameter has been renamed `order`
  UserWarning)
Out[22]:
<seaborn.axisgrid.FacetGrid at 0x124b54d90>

In [ ]:


Generate onset files

Each csv file must have a column for run, condition, onset, duration, and value


In [23]:
output_filename = 'localizer_cond.csv'
output_dir = '/Volumes/group/awagner/sgagnon/AP/data'

for subid in subj_info.subid:
    print subid
    
    dsub = dl[(dl.subid == subid) & (dl.cond != 'rest')]
    ddesign = pd.DataFrame({'run': dsub.run,
                            'condition': dsub.cond,
                            'onset': dsub.onset_adj,
                            'duration': 0.5,
                            'value': 1})
    
    sub_output_dir = op.join(output_dir, subid.replace('loc', 'ap'), 'design')
    if not os.path.exists(sub_output_dir):
        os.makedirs(sub_output_dir)
    
    ddesign.to_csv(op.join(sub_output_dir, output_filename), index=False)


loc100
loc101
loc102
loc103
loc104
loc105
loc107
loc108
loc109
loc110
loc111
loc113
loc114
loc115
loc116
loc117
loc118
loc119
loc120
loc121
loc122
loc158
loc150
loc151
loc152
loc153
loc154
loc155
loc156
loc157
loc159
loc160
loc161
loc162
loc163
loc164
loc165
loc166
loc167
loc168
loc169
loc170
loc172
loc171
loc173
loc174

In [24]:
output_filename = 'localizer_cond_dur=RT.csv'
output_dir = '/Volumes/group/awagner/sgagnon/AP/data'

for subid in subj_info.subid:
    print subid
    
    dsub = dl[(dl.subid == subid) & (dl.cond != 'rest')]
    ddesign = pd.DataFrame({'run': dsub.run,
                            'condition': dsub.cond,
                            'onset': dsub.onset_adj,
                            'duration': dsub.respRT,
                            'value': 1})
    
    sub_output_dir = op.join(output_dir, subid.replace('loc', 'ap'), 'design')
    if not os.path.exists(sub_output_dir):
        os.makedirs(sub_output_dir)
    
    ddesign.to_csv(op.join(sub_output_dir, output_filename), index=False)


loc100
loc101
loc102
loc103
loc104
loc105
loc107
loc108
loc109
loc110
loc111
loc113
loc114
loc115
loc116
loc117
loc118
loc119
loc120
loc121
loc122
loc158
loc150
loc151
loc152
loc153
loc154
loc155
loc156
loc157
loc159
loc160
loc161
loc162
loc163
loc164
loc165
loc166
loc167
loc168
loc169
loc170
loc172
loc171
loc173
loc174

In [25]:
output_filename = 'localizer_cond_mvpa.csv'
output_dir = '/Volumes/group/awagner/sgagnon/AP/data'

for subid in subj_info.subid:
    print subid
    
    dsub = dl[(dl.subid == subid)] # & (dl.cond != 'rest')
    ddesign = pd.DataFrame({'run': dsub.run + 6,
                            'condition': dsub.cond,
                            'onset': dsub.onset_adj,
                            'duration': 0.5,
                            'value': 1})
    
    ddesign.to_csv(op.join(output_dir, subid.replace('loc', 'ap'), 
                           'design', output_filename), index=False)


loc100
loc101
loc102
loc103
loc104
loc105
loc107
loc108
loc109
loc110
loc111
loc113
loc114
loc115
loc116
loc117
loc118
loc119
loc120
loc121
loc122
loc158
loc150
loc151
loc152
loc153
loc154
loc155
loc156
loc157
loc159
loc160
loc161
loc162
loc163
loc164
loc165
loc166
loc167
loc168
loc169
loc170
loc172
loc171
loc173
loc174

In [ ]:


In [ ]:


In [ ]: