Face versus Place


In [1]:
import os
import glob

import numpy as np
import nibabel as nb
import pylab as pl

import matplotlib.gridspec as gridspec

from scipy.stats import ttest_1samp
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import StratifiedKFold, cross_val_score
from sklearn.preprocessing import LabelEncoder, LabelBinarizer
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import f_classif
from sklearn.pipeline import Pipeline
from nilearn.input_data import NiftiMasker
from nipy.labs.viz import plot_map
from nipy.labs.viz_tools import cm
from nipy.modalities.fmri.glm import GeneralLinearModel

from joblib import Memory

root_dir = '/home/ys218403/Data/intra_stats'
cache_dir = '/home/ys218403/Data/cache'
result_dir = '/home/ys218403/Data/classif'


/usr/lib/python2.7/dist-packages/nose/util.py:14: DeprecationWarning: The compiler package is deprecated and removed in Python 3.x.
  from compiler.consts import CO_GENERATOR

Utilities


In [2]:
def squeeze_niimg(niimg):
    data = np.squeeze(niimg.get_data())
    return nb.Nifti1Image(data, niimg.get_affine())

In [3]:
def plot_niimg(niimg, title, percentile=90, threshold=None, cut_coords='auto', figure=None, ax=None):
    if isinstance(cut_coords, int):
        slicer = 'z'
    else:
        slicer = 'ortho'
    niimg = squeeze_niimg(niimg)
    data = niimg.get_data()
    affine = niimg.get_affine()
    vmax = np.max(np.abs(data))
    if threshold is None:
        threshold = np.percentile(data, percentile)
    niimg.to_filename('%s.nii.gz' % title.replace(' ', '_'))
    slicer_ = plot_map(data, affine, axes=ax, figure=figure,
                       cut_coords=cut_coords, slicer=slicer,
                       vmin=-vmax, vmax=vmax, cmap=cm.cold_hot,
                       title=title, threshold=threshold)

    if slicer == 'z':
        slicer = sorted(slicer_.axes.keys())[-1]
    elif slicer == 'ortho':
        slicer = 'z'
    cax = slicer_.axes[slicer].ax.get_figure().add_axes([0.999, 0.05, 0.03, 0.8])
    pl.colorbar(slicer_.axes[slicer].ax.images[1], cax=cax)
    return slicer

In [4]:
def classify(clf, loader, cv, X, y):
    scores = []
    coefs = []
    for i, (train, test) in enumerate(cv):
        y_train = y[train]
        y_test = y[test]
        X_train = loader.fit_transform(X[train], y_train);
        X_test = loader.transform(X[test]);

        score = clf.fit(X_train, y_train).score(X_test, y_test)
        scores.append(score)
    
        coef = loader.inverse_transform(clf.coef_[0]);
        coefs.append(squeeze_niimg(coef))

    # compute and plot mean coefs
    coef_mean = np.mean([coef.get_data() for coef in coefs], axis=0)
    coef_ = nb.Nifti1Image(coef_mean, affine=coef.get_affine())
    
    return coef_, scores

Classification

Loading data


In [5]:
target = []
niimgs = []

for study_dir in glob.glob(root_dir + '/*'):
    study_id = os.path.split(study_dir)[1]
    house = glob.glob(study_dir + '/sub???/*house_vs_baseline*.nii.gz')
    face = glob.glob(study_dir + '/sub???/*face_vs_baseline*.nii.gz')
    niimgs.extend(house)
    niimgs.extend(face)

    target.append([(study_id, '0house')] * len(house))
    target.append([(study_id, '1face')] * len(face))

target = np.vstack(target)
niimgs = np.array(niimgs)
le = LabelEncoder()
y = le.fit_transform(target[:, 1])

masker = NiftiMasker(mask='mask.nii.gz',
                     standardize=True,
                     memory=Memory(cache_dir, verbose=0),
                     verbose=0)
selector = SelectPercentile(f_classif, percentile=50)
loader = Pipeline([('masker', masker), ('selector', selector)])

LogisticRegression l2


In [6]:
# all studies
clf = LogisticRegression()
cv = StratifiedKFold(y=y, n_folds=5)

coef, scores = classify(clf, loader, cv, niimgs, y)
plot_niimg(coef, 'face_coef', percentile=99, cut_coords=(43, -55, -20));


/home/ys218403/.local/lib/python2.7/site-packages/nilearn/input_data/base_masker.py:262: UserWarning: [NiftiMasker.fit] Generation of a mask has been requested (y != None) while a mask has been provided at masker creation. Given mask will be used.
  ' will be used.' % self.__class__.__name__)
/home/ys218403/.local/lib/python2.7/site-packages/nilearn/_utils/cache_mixin.py:206: UserWarning: memory_level is currently set to 0 but a Memory object has been provided. Setting memory_level to 1.
  warnings.warn("memory_level is currently set to 0 but "

In [7]:
# adding studies
clf = LogisticRegression()

select_studies = []
for study_id in np.unique(target[:, 0]):
    select_studies.append(study_id)
    studies = np.in1d(target[:, 0], select_studies) 

    cv = StratifiedKFold(y=y[studies], n_folds=5)
    coef, scores = classify(clf, loader, cv, niimgs[studies], y[studies])
    plot_niimg(coef, 'face_%s' % '_'.join(select_studies), percentile=99, cut_coords=(43, -55, -20));


/home/ys218403/.local/lib/python2.7/site-packages/sklearn/feature_selection/univariate_selection.py:112: RuntimeWarning: invalid value encountered in divide
  f = msb / msw

In [8]:
# predicting studies

clf = LogisticRegression()
enc = LabelEncoder()

selection = ['ds105', 'gauthier2009resonance']
studies = np.in1d(target[:, 0], selection)
studies_y = enc.fit_transform(target[studies, 0])
cv = StratifiedKFold(y=studies_y, n_folds=5)
coef, scores = classify(clf, loader, cv, niimgs[studies], studies_y)
title = ' vs '.join(enc.classes_[::-1])
plot_niimg(coef, title, percentile=99, cut_coords=6);

selection = ['pinel2009twins', 'gauthier2009resonance']
studies = np.in1d(target[:, 0], selection) 
studies_y = enc.fit_transform(target[studies, 0])
cv = StratifiedKFold(y=studies_y, n_folds=5)
coef, scores = classify(clf, loader, cv, niimgs[studies], studies_y)
title = ' vs '.join(enc.classes_[::-1])
plot_niimg(coef, title, percentile=99, cut_coords=6);


LogisticRegression l1


In [10]:
# all studies
clf = LogisticRegression(penalty='l1')
cv = StratifiedKFold(y=y, n_folds=5)

coef, scores = classify(clf, loader, cv, niimgs, y)
plot_niimg(coef, 'face_coef', threshold=1e-3, cut_coords=(43, -55, -20));



In [11]:
# adding studies
clf = LogisticRegression(penalty='l1')

select_studies = []
for study_id in np.unique(target[:, 0]):
    select_studies.append(study_id)
    studies = np.in1d(target[:, 0], select_studies) 

    cv = StratifiedKFold(y=y[studies], n_folds=5)
    coef, scores = classify(clf, loader, cv, niimgs[studies], y[studies])
    plot_niimg(coef, 'face_%s' % '_'.join(select_studies), threshold=1e-3, cut_coords=(43, -55, -20));



In [12]:
# predicting studies

clf = LogisticRegression(penalty='l1')
enc = LabelEncoder()

selection = ['ds105', 'gauthier2009resonance']
studies = np.in1d(target[:, 0], selection)
studies_y = enc.fit_transform(target[studies, 0])
cv = StratifiedKFold(y=studies_y, n_folds=5)
coef, scores = classify(clf, loader, cv, niimgs[studies], studies_y)
title = ' vs '.join(enc.classes_[::-1])
plot_niimg(coef, title, threshold=1e-6, cut_coords=6);

selection = ['pinel2009twins', 'gauthier2009resonance']
studies = np.in1d(target[:, 0], selection) 
studies_y = enc.fit_transform(target[studies, 0])
cv = StratifiedKFold(y=studies_y, n_folds=5)
coef, scores = classify(clf, loader, cv, niimgs[studies], studies_y)
title = ' vs '.join(enc.classes_[::-1])
plot_niimg(coef, title, threshold=1e-6, cut_coords=6);


Group level analysis

Loading


In [13]:
lb = LabelBinarizer()

Y = masker.fit_transform(niimgs);
X = lb.fit_transform(target[:, 1])[:, 0]
Y_house = Y[X == 0]
Y_face = Y[X == 1]

One sample t-test


In [14]:
t_val, p_val = ttest_1samp(Y_face - Y_house, 0)
t_val = masker.inverse_transform(t_val)
plot_niimg(t_val, 'face_t_val max=%.03f' % t_val.get_data().max(), percentile=99, cut_coords=(43, -55, -20));


Group level study differences

Loading


In [15]:
target = []
niimgs = []
studies = []
for study_dir in glob.glob(root_dir + '/*'):
    study_id = os.path.split(study_dir)[1]
    studies.append(study_id)
    imgs = glob.glob(study_dir + '/sub???/*face_vs_house*.nii.gz')
    niimgs.extend(imgs)
    target.extend([(study_id, )] * len(imgs))
    
Y = masker.fit_transform(niimgs)
X = lb.fit_transform(target)
X[:, -1] = 0

GLM


In [16]:
glm = GeneralLinearModel(X)
glm.fit(Y, model='ols')

for i, study_id in enumerate(studies[:-1]):
    con_val = np.zeros(len(studies))
    con_val[i] = 1
    contrast = glm.contrast(con_val, contrast_type='t')
    z_map = masker.inverse_transform(contrast.z_score());
    plot_niimg(z_map, '%s vs %s' % (study_id, studies[-1]), percentile=99, cut_coords=6);