Iterated Estimation of fMRI Data

Deconvolution within the Lyman framework, based on Mumford et al. (2012) LS-S method. For each subject and run, iteratively fit models to estimate the $\beta$/$t$-statistic for each condition of interest, while including all the other trials in a single regression. Extract the appropriate values for all voxels within a given mask. This approach uses a combination of OLS and AR1 procedures during GLM fitting. This step assumes that the timeseries has been preprocessed, and all runs have been linearly transformed into the first run's space.

Import necessary packages


In [1]:
import pandas as pd
import json
from scipy import stats, signal, linalg
from sklearn.decomposition import PCA
import nibabel as nib
import nipype
from nipype import Node, SelectFiles, DataSink, IdentityInterface
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.use("Agg")
from nipype.interfaces import fsl
from nipy.modalities.fmri.glm import FMRILinearModel
from nibabel import Nifti1Image, save
import numpy as np

import os
import os.path as op
import shutil
import sys
import copy

import lyman
import moss
from lyman import tools
from lyman import default_experiment_parameters
import lyman.workflows as wf
from moss import glm
import seaborn as sns

%matplotlib inline
sns.set(context="notebook", style="ticks", font="Arial")
pd.set_option('display.precision', 3)


/Users/steph-backup/anaconda/lib/python2.7/site-packages/matplotlib/__init__.py:1256: UserWarning:  This call to matplotlib.use() has no effect
because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

  warnings.warn(_use_error_msg)

Define helper functions


In [2]:
def plotSimilarityStruct(run_data, run_evs):    
    from scipy.cluster.hierarchy import linkage, dendrogram
    from scipy.spatial.distance import pdist, squareform
    data_dist = pdist(run_data.T, 'correlation')
    data_link = linkage(data_dist)

    # Compute and plot first dendrogram.
    fig = plt.figure(figsize=(8,8))
    # x ywidth height
    ax1 = fig.add_axes([0.05,0.1,0.2,0.6])
    Y = linkage(data_dist, method='single')
    Z1 = dendrogram(Y, orientation='right',labels=run_evs, distance_sort=True) # adding/removing the axes
    ax1.set_xticks([])

    # Compute and plot second dendrogram.
    ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
    Z2 = dendrogram(Y)
    ax2.set_xticks([])
    ax2.set_yticks([])

    #Compute and plot the heatmap
    axmatrix = fig.add_axes([0.37,0.1,0.6,0.6])
    idx1 = Z1['leaves']
    idx2 = Z2['leaves']
    D = squareform(data_dist)
    D = D[idx1,:]
    D = D[:,idx2]
    im = axmatrix.matshow(D, aspect='auto', origin='lower', cmap=plt.cm.YlGnBu)
    axmatrix.set_xticks([])
    axmatrix.set_yticks([])

    # Plot colorbar.
    axcolor = fig.add_axes([1,0.1,0.02,0.6])
    plt.colorbar(im, cax=axcolor)
    
    
def removeSegment(run_evs, sep, remove_seg):
    run_evs = ["-".join(x.split('-')[0:remove_seg]) for x in list(run_evs)]
    return run_evs

def transform_fisherZ(r):
    z = 0.5*np.log((1+r)/(1-r))
    return z

Project specific parameters


In [5]:
experiment = 'objfam'
altmodel = 'trial-prototype'
nruns = 12
subject_list = '/Volumes/group/awagner/sgagnon/ObjFam/data/subids_subset_no23or19.txt'
unsmoothed = True
condition_labels = True # If condition_file to specify which condition the trials belong to
condition_filename = 'trial-prototype-condition.csv' # only necessary if condition_labels = True

In [6]:
project = lyman.gather_project_info()
exp = lyman.gather_experiment_info(experiment, altmodel)
group = np.loadtxt(subject_list, str).tolist()
exp_base = experiment
exp_name = "-".join([exp_base, altmodel])

data_dir = project["data_dir"]
analysis_dir = project["analysis_dir"]

smoothing = "unsmoothed" if unsmoothed else "smoothed"

In [9]:
data_dir


Out[9]:
'/Volumes/group/awagner/sgagnon/ObjFam/data'

Iterate through subjects and runs


In [10]:
mask_name = 'lateraloccipital'
out_val = 't' # t or beta

In [12]:
sub_mat = []
group_evs = []
for subid in group:
    print subid

    design_file = op.join(data_dir, subid, "design", exp["design_name"] + ".csv")
    sub_dmat = pd.read_csv(design_file)
    
    if condition_labels:
        condition_file = op.join(data_dir, subid, "design", condition_filename)
        cond_map = pd.read_csv(condition_file)
    
    # get 3D mask as bool
#     mask_file = op.join(timeseries_dir, "functional_mask_xfm.nii.gz")
    mask_file = op.join(data_dir, subid, 'masks', mask_name + '.nii.gz')
    mask = nib.load(mask_file).get_data() == 1

    run_mat = []
    ev_list = []
    for run in range(1, nruns+1):
        print 'Run: ' + str(run)
        
        # Setup run specific directories
        # preproc timeseries registered to first run
        timeseries_dir = op.join(analysis_dir, experiment, subid, "reg/epi/unsmoothed/run_" + str(run))
        preproc_dir = op.join(analysis_dir, experiment, subid, "preproc/run_" + str(run))
        
        realign_file = op.join(preproc_dir, "realignment_params.csv")
        artifact_file = op.join(preproc_dir, "artifacts.csv")
        timeseries_file = op.join(timeseries_dir, "timeseries_xfm.nii.gz")
        
        # Build the model design
        run_dmat = sub_dmat[sub_dmat.run == run]
        
        realign = pd.read_csv(realign_file)
        realign = realign.filter(regex="rot|trans").apply(stats.zscore)
        
        artifacts = pd.read_csv(artifact_file).max(axis=1)
        ntp = len(artifacts)
        tr = exp["TR"]
        
        hrf = getattr(glm, exp["hrf_model"])
        hrf = hrf(exp["temporal_deriv"], tr, **exp["hrf_params"])
        
        ev_mat = []
        for ev in run_dmat.condition.unique():
            ev_list.append(ev)
            
            design_LSS = copy.deepcopy(run_dmat)
            design_LSS.condition[design_LSS.condition != ev] = 'other'
            
            design_kwargs = dict(confounds=realign,
                                 artifacts=artifacts,
                                 tr=tr,
                                 condition_names=sorted(design_LSS.condition.unique()),  # sort to keep condition of interest first
                                 confound_pca=exp["confound_pca"],
                                 hpf_cutoff=exp["hpf_cutoff"])
            X = glm.DesignMatrix(design_LSS, hrf, ntp, **design_kwargs)
            
#             print ev
#             print X.design_matrix.columns
            
            # Fit model
            fmri_glm = FMRILinearModel(timeseries_file, np.array(X.design_matrix), mask=mask_file)
            fmri_glm.fit(do_scaling=True, model='ar1')

            # Get beta
            beta_hat = fmri_glm.glms[0].get_beta()
            
            # Output appropriate statistic
            if out_val == 'beta':
                ev_mat.append(beta_hat[0])
            elif out_val == 't':
                # Calc t-statistic
                num_reg = beta_hat.shape[0]
                con = [[1] + [0]*(num_reg-1)]
                t_map, = fmri_glm.contrast(con, con_id=ev, contrast_type='t')
                t_map = t_map.get_data()[mask].ravel()
                
                ev_mat.append(t_map)

        run_mat.append(ev_mat)
        
    sub_mat.append(run_mat)
    group_evs.append(ev_list)


subj03
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj04
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj05
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj06
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj07
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj08
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj09
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj10
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj11
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj12
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj13
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj15
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj16
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj17
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj20
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj21
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj24
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
subj25
Run: 1
Run: 2
Run: 3
Run: 4
Run: 5
Run: 6
Run: 7
Run: 8
Run: 9
Run: 10
Run: 11
Run: 12
/Users/steph-backup/anaconda/lib/python2.7/site-packages/nipy/modalities/fmri/glm.py:211: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.
  if self.labels_ == None or self.results_ == None:

In [14]:
data = np.array(sub_mat)
evs = np.array(group_evs)

print 'Data shape (subid x run x trial x voxel):' + str(data.shape)
print 'EVs shape (subid x trial):' + str(evs.shape)


Data shape (subid x run x trial x voxel):(18, 12, 30)
EVs shape (subid x trial):(18, 360)

In [15]:
data[5,0]
group[5]


Out[15]:
'subj08'

In [ ]:
sub_num = 8
run_data = data[sub_num,0]
run_data = np.vstack(run_data).T # voxel x ev
run_evs = evs[sub_num].reshape(12,30)[0]

sns.corrplot(run_data[np.argsort(run_evs)], 
             names = run_evs[np.argsort(run_evs)], 
             diag_names=False, 
             annot=False, cmap_range=(-1,1))

In [ ]:
df = pd.DataFrame(run_data, columns=run_evs)
corr_mat = df.corr()
sns.clustermap(corr_mat)

Compute 2-way correlations


In [30]:
data.shape[0]


Out[30]:
18

In [32]:
sub_data.shape[0]


Out[32]:
12

In [41]:
df_corr = pd.DataFrame(columns=['subid', 'run', 'condition', 'corr'])
df_condmap = pd.DataFrame()

for sub_num in range(data.shape[0]):
    print group[sub_num]
    subid = group[sub_num]
    sub_data = data[sub_num]
    sub_evs = evs[sub_num].reshape(12,30)
    
    condition_file = op.join(data_dir, subid, "design", condition_filename)
    cond_map = pd.read_csv(condition_file)
    cond_map['subid'] = subid
    df_condmap = pd.concat([df_condmap, cond_map])
    
    for run in range(sub_data.shape[0]):
        
        run_data = sub_data[run]
        run_data = np.vstack(run_data).T # voxel x ev
        run_evs = sub_evs[run]

        run_conds = removeSegment(run_evs, '-', 2)
        df = pd.DataFrame(run_data, columns=run_conds)

        for cond in set(run_conds):
            corr_value = np.array(df[cond].corr())[0][1]
            df_corr = df_corr.append([dict(subid=subid,
                                           run=run+1,
                                           condition=cond, 
                                           corr=corr_value)], 
                                     ignore_index=True)
            
            
df_corr.head()


subj03
subj04
subj05
subj06
subj07
subj08
subj09
subj10
subj11
subj12
subj13
subj15
subj16
subj17
subj20
subj21
subj24
subj25
Out[41]:
subid run condition corr
0 subj03 1 1-8 0.56
1 subj03 1 1-29 0.59
2 subj03 1 1-1 0.28
3 subj03 1 1-26 0.69
4 subj03 1 1-5 0.62

In [42]:
df_corr.subid.unique().shape


Out[42]:
(18,)

In [43]:
df_condmap.subid.unique().shape


Out[43]:
(18,)

In [45]:
df_corr2 = df_corr.merge(df_condmap, on=['subid', 'run', 'condition'])
df_corr2.head()


Out[45]:
subid run condition corr morphmem
0 subj03 1 1-8 0.56 1_new
1 subj03 1 1-29 0.59 1_old
2 subj03 1 1-1 0.28 1_guess
3 subj03 1 1-26 0.69 2_guess
4 subj03 1 1-5 0.62 3_guess

In [46]:
df_corr2.subid.unique().shape


Out[46]:
(18,)

In [ ]:
sns.distplot(data.ix[1500:3000]['corr'])

In [25]:
sns.set(context='poster', style='whitegrid')

In [48]:
data_corr = df_corr2.join(pd.DataFrame(df_corr2.morphmem.str.split('_').tolist(), 
                                 columns=['morph', 'resp']))

sns.factorplot(x='morph', y='corr', hue='resp',dodge=0.1,
               hue_order=['new', 'old'],
               ci=68, units='subid', data=data_corr)


Out[48]:
<seaborn.axisgrid.FacetGrid at 0x12128d650>

In [49]:
%load_ext rpy2.ipython
%R require(lme4)
%R require(lmerTest)
%R require(ggplot2)


The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython
Out[49]:
<Vector - Python:0x11b598518 / R:0x148a711f8>
[       1]

In [60]:
%R -i data_corr

In [63]:
%%R
print(str(data_corr))

data_corr$morph_q = as.numeric(data_corr$morph)

data_corr_noguess = data_corr[data_corr$resp != 'guess',]
data_corr_noguess$resp = factor(data_corr_noguess$resp)
print(str(data_corr_noguess))

contrasts(data_corr_noguess$resp) = c(-1,1)
print(contrasts(data_corr_noguess$resp))


'data.frame':	3240 obs. of  8 variables:
 $ subid    : Factor w/ 18 levels "subj03","subj04",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ run      : num [1:3240(1d)] 1 1 1 1 1 1 1 1 1 1 ...
 $ condition: Factor w/ 360 levels "1-1","1-10","1-11",..: 29 22 1 19 26 15 16 7 17 8 ...
 $ corr     : num [1:3240(1d)] 0.558 0.59 0.28 0.688 0.621 ...
 $ morphmem : Factor w/ 9 levels "1_guess","1_new",..: 2 3 1 4 7 6 8 5 6 8 ...
 $ morph    : Factor w/ 3 levels "1","2","3": 1 1 1 2 3 2 3 2 2 3 ...
 $ resp     : Factor w/ 3 levels "guess","new",..: 2 3 1 1 1 3 2 2 3 2 ...
 $ morph_q  : num  1 1 1 2 3 2 3 2 2 3 ...
NULL
'data.frame':	2977 obs. of  8 variables:
 $ subid    : Factor w/ 18 levels "subj03","subj04",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ run      : num [1:2977(1d)] 1 1 1 1 1 1 1 1 1 1 ...
 $ condition: Factor w/ 360 levels "1-1","1-10","1-11",..: 29 22 15 16 7 17 8 28 5 24 ...
 $ corr     : num [1:2977(1d)] 0.558 0.59 0.342 0.396 0.474 ...
 $ morphmem : Factor w/ 9 levels "1_guess","1_new",..: 2 3 6 8 5 6 8 9 3 8 ...
 $ morph    : Factor w/ 3 levels "1","2","3": 1 1 2 3 2 2 3 3 1 3 ...
 $ resp     : Factor w/ 2 levels "new","old": 1 2 2 1 1 2 1 2 2 1 ...
 $ morph_q  : num  1 1 2 3 2 2 3 3 1 3 ...
NULL
    [,1]
new   -1
old    1

In [64]:
%%R 

res1 = lmer(corr ~ morph_q * resp + (1|subid), data=data_corr_noguess)
print(summary(res1))


Linear mixed model fit by REML ['merModLmerTest']
Formula: corr ~ morph_q * resp + (1 | subid)
   Data: data_corr_noguess

REML criterion at convergence: -3764

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-5.1586 -0.5544  0.0742  0.6684  3.6024 

Random effects:
 Groups   Name        Variance Std.Dev.
 subid    (Intercept) 0.007518 0.0867  
 Residual             0.015936 0.1262  
Number of obs: 2977, groups: subid, 18

Fixed effects:
                Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)    3.565e-01  2.178e-02  2.140e+01  16.366 1.44e-13 ***
morph_q       -2.552e-03  3.236e-03  2.954e+03  -0.789  0.43024    
resp1          2.148e-02  7.605e-03  2.956e+03   2.825  0.00476 ** 
morph_q:resp1 -9.573e-03  3.266e-03  2.956e+03  -2.931  0.00341 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) mrph_q resp1 
morph_q     -0.324              
resp1       -0.188  0.425       
mrph_q:rsp1  0.147 -0.315 -0.936

In [20]:
data_corr['morph_q'] = data_corr.morph.astype('int')

In [ ]:
data_group = data.groupby(['subid', 'morph']).mean().reset_index()

In [ ]:
data_group.head()

In [ ]:
data_group['z'] = transform_fisherZ(data_group['corr'])
data_group.head()

In [ ]:
sns.violinplot(x='morph', y='z', data=data_group, 
               inner="points")

In [ ]:
sns.coefplot("z~scale(morph_q)", 
             data=data_group,
             ci=68)

In [ ]:


In [ ]:


In [115]:


In [ ]: