In [1]:
import os
import glob
import numpy as np
import nibabel as nb
import pylab as pl
import matplotlib.gridspec as gridspec
from scipy.stats import ttest_1samp
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import StratifiedKFold, cross_val_score
from sklearn.preprocessing import LabelEncoder, LabelBinarizer
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import f_classif
from sklearn.pipeline import Pipeline
from nilearn.input_data import NiftiMasker
from nipy.labs.viz import plot_map
from nipy.labs.viz_tools import cm
from nipy.modalities.fmri.glm import GeneralLinearModel
from joblib import Memory
root_dir = '/home/ys218403/Data/intra_stats'
cache_dir = '/home/ys218403/Data/cache'
result_dir = '/home/ys218403/Data/classif'
In [2]:
def squeeze_niimg(niimg):
data = np.squeeze(niimg.get_data())
return nb.Nifti1Image(data, niimg.get_affine())
In [3]:
def plot_niimg(niimg, title, percentile=90, threshold=None, cut_coords='auto', figure=None, ax=None):
if isinstance(cut_coords, int):
slicer = 'z'
else:
slicer = 'ortho'
niimg = squeeze_niimg(niimg)
data = niimg.get_data()
affine = niimg.get_affine()
vmax = np.max(np.abs(data))
if threshold is None:
threshold = np.percentile(data, percentile)
niimg.to_filename('%s.nii.gz' % title.replace(' ', '_'))
slicer_ = plot_map(data, affine, axes=ax, figure=figure,
cut_coords=cut_coords, slicer=slicer,
vmin=-vmax, vmax=vmax, cmap=cm.cold_hot,
title=title, threshold=threshold)
if slicer == 'z':
slicer = sorted(slicer_.axes.keys())[-1]
elif slicer == 'ortho':
slicer = 'z'
cax = slicer_.axes[slicer].ax.get_figure().add_axes([0.999, 0.05, 0.03, 0.8])
pl.colorbar(slicer_.axes[slicer].ax.images[1], cax=cax)
return slicer
In [4]:
def classify(clf, loader, cv, X, y):
scores = []
coefs = []
for i, (train, test) in enumerate(cv):
y_train = y[train]
y_test = y[test]
X_train = loader.fit_transform(X[train], y_train);
X_test = loader.transform(X[test]);
score = clf.fit(X_train, y_train).score(X_test, y_test)
scores.append(score)
coef = loader.inverse_transform(clf.coef_[0]);
coefs.append(squeeze_niimg(coef))
# compute and plot mean coefs
coef_mean = np.mean([coef.get_data() for coef in coefs], axis=0)
coef_ = nb.Nifti1Image(coef_mean, affine=coef.get_affine())
return coef_, scores
In [5]:
target = []
niimgs = []
for study_dir in glob.glob(root_dir + '/*'):
study_id = os.path.split(study_dir)[1]
house = glob.glob(study_dir + '/sub???/*house_vs_baseline*.nii.gz')
face = glob.glob(study_dir + '/sub???/*face_vs_baseline*.nii.gz')
niimgs.extend(house)
niimgs.extend(face)
target.append([(study_id, '0house')] * len(house))
target.append([(study_id, '1face')] * len(face))
target = np.vstack(target)
niimgs = np.array(niimgs)
le = LabelEncoder()
y = le.fit_transform(target[:, 1])
masker = NiftiMasker(mask='mask.nii.gz',
standardize=True,
memory=Memory(cache_dir, verbose=0),
verbose=0)
selector = SelectPercentile(f_classif, percentile=50)
loader = Pipeline([('masker', masker), ('selector', selector)])
In [6]:
# all studies
clf = LogisticRegression()
cv = StratifiedKFold(y=y, n_folds=5)
coef, scores = classify(clf, loader, cv, niimgs, y)
plot_niimg(coef, 'face_coef', percentile=99, cut_coords=(43, -55, -20));
In [7]:
# adding studies
clf = LogisticRegression()
select_studies = []
for study_id in np.unique(target[:, 0]):
select_studies.append(study_id)
studies = np.in1d(target[:, 0], select_studies)
cv = StratifiedKFold(y=y[studies], n_folds=5)
coef, scores = classify(clf, loader, cv, niimgs[studies], y[studies])
plot_niimg(coef, 'face_%s' % '_'.join(select_studies), percentile=99, cut_coords=(43, -55, -20));
In [8]:
# predicting studies
clf = LogisticRegression()
enc = LabelEncoder()
selection = ['ds105', 'gauthier2009resonance']
studies = np.in1d(target[:, 0], selection)
studies_y = enc.fit_transform(target[studies, 0])
cv = StratifiedKFold(y=studies_y, n_folds=5)
coef, scores = classify(clf, loader, cv, niimgs[studies], studies_y)
title = ' vs '.join(enc.classes_[::-1])
plot_niimg(coef, title, percentile=99, cut_coords=6);
selection = ['pinel2009twins', 'gauthier2009resonance']
studies = np.in1d(target[:, 0], selection)
studies_y = enc.fit_transform(target[studies, 0])
cv = StratifiedKFold(y=studies_y, n_folds=5)
coef, scores = classify(clf, loader, cv, niimgs[studies], studies_y)
title = ' vs '.join(enc.classes_[::-1])
plot_niimg(coef, title, percentile=99, cut_coords=6);
In [10]:
# all studies
clf = LogisticRegression(penalty='l1')
cv = StratifiedKFold(y=y, n_folds=5)
coef, scores = classify(clf, loader, cv, niimgs, y)
plot_niimg(coef, 'face_coef', threshold=1e-3, cut_coords=(43, -55, -20));
In [11]:
# adding studies
clf = LogisticRegression(penalty='l1')
select_studies = []
for study_id in np.unique(target[:, 0]):
select_studies.append(study_id)
studies = np.in1d(target[:, 0], select_studies)
cv = StratifiedKFold(y=y[studies], n_folds=5)
coef, scores = classify(clf, loader, cv, niimgs[studies], y[studies])
plot_niimg(coef, 'face_%s' % '_'.join(select_studies), threshold=1e-3, cut_coords=(43, -55, -20));
In [12]:
# predicting studies
clf = LogisticRegression(penalty='l1')
enc = LabelEncoder()
selection = ['ds105', 'gauthier2009resonance']
studies = np.in1d(target[:, 0], selection)
studies_y = enc.fit_transform(target[studies, 0])
cv = StratifiedKFold(y=studies_y, n_folds=5)
coef, scores = classify(clf, loader, cv, niimgs[studies], studies_y)
title = ' vs '.join(enc.classes_[::-1])
plot_niimg(coef, title, threshold=1e-6, cut_coords=6);
selection = ['pinel2009twins', 'gauthier2009resonance']
studies = np.in1d(target[:, 0], selection)
studies_y = enc.fit_transform(target[studies, 0])
cv = StratifiedKFold(y=studies_y, n_folds=5)
coef, scores = classify(clf, loader, cv, niimgs[studies], studies_y)
title = ' vs '.join(enc.classes_[::-1])
plot_niimg(coef, title, threshold=1e-6, cut_coords=6);
In [13]:
lb = LabelBinarizer()
Y = masker.fit_transform(niimgs);
X = lb.fit_transform(target[:, 1])[:, 0]
Y_house = Y[X == 0]
Y_face = Y[X == 1]
In [14]:
t_val, p_val = ttest_1samp(Y_face - Y_house, 0)
t_val = masker.inverse_transform(t_val)
plot_niimg(t_val, 'face_t_val max=%.03f' % t_val.get_data().max(), percentile=99, cut_coords=(43, -55, -20));
In [15]:
target = []
niimgs = []
studies = []
for study_dir in glob.glob(root_dir + '/*'):
study_id = os.path.split(study_dir)[1]
studies.append(study_id)
imgs = glob.glob(study_dir + '/sub???/*face_vs_house*.nii.gz')
niimgs.extend(imgs)
target.extend([(study_id, )] * len(imgs))
Y = masker.fit_transform(niimgs)
X = lb.fit_transform(target)
X[:, -1] = 0
In [16]:
glm = GeneralLinearModel(X)
glm.fit(Y, model='ols')
for i, study_id in enumerate(studies[:-1]):
con_val = np.zeros(len(studies))
con_val[i] = 1
contrast = glm.contrast(con_val, contrast_type='t')
z_map = masker.inverse_transform(contrast.z_score());
plot_niimg(z_map, '%s vs %s' % (study_id, studies[-1]), percentile=99, cut_coords=6);