Andres HOYOS-IDROBO, Gael VAROQUAUX and Bertrand THIRION
PARIETAL TEAM, INRIA, CEA, University Paris-Saclay
Presented on: the 6th International workshop on Pattern Recognition in Neuroimaging(PRNI) 2016. Trento, Italy
In [1]:
%matplotlib inline
import numpy as np
import time
import matplotlib.pyplot as plt
from nilearn.plotting import plot_stat_map
from nilearn.input_data import NiftiMasker
In [2]:
# Fetching haxby dataset
from nilearn import datasets
data_files = datasets.fetch_haxby(n_subjects=1)
masker = NiftiMasker(smoothing_fwhm=4, standardize=True, mask_strategy='epi',
memory='cache', memory_level=1)
labels = np.recfromcsv(data_files.session_target[0], delimiter=" ")
# Restrict to face and house conditions
target = labels['labels']
condition_mask = np.logical_or(target == b"face", target == b"house")
# Split data into train and test samples, using the chunks
condition_mask_train = np.logical_and(condition_mask, labels['chunks'] <= 6)
condition_mask_test = np.logical_and(condition_mask, labels['chunks'] > 6)
X_masked = masker.fit_transform(data_files['func'][0])
X_train = X_masked[condition_mask_train]
X_test = X_masked[condition_mask_test]
y_train = target[condition_mask_train]
y_test = target[condition_mask_test]
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer(pos_label=1, neg_label=-1)
y_train = lb.fit_transform(y_train).ravel()
y_test = lb.transform(y_test).ravel()
In [3]:
# Fit model on train data and predict on test data
from sklearn.linear_model import LogisticRegressionCV
clf = LogisticRegressionCV(Cs=10, penalty='l2')
ti = time.time()
clf.fit(X_train, y_train)
to_raw = time.time() - ti
y_pred = clf.predict(X_test)
accuracy = (y_pred == y_test).mean() * 100.
raw_coef = masker.inverse_transform(clf.coef_)
print("classification accuracy : %g%%, time %.4fs" % (accuracy, to_raw))
In [4]:
from sklearn.kernel_approximation import Nystroem
class LinearNistroem(Nystroem):
"""We are using a linear kernel only and adding the invertion method.
Parameters
-----------
n_components: int, the number of components should be at most n
random_state: int, the random seed (optional)
"""
def __init__(self, n_components=100, random_state=None):
super(LinearNistroem, self).__init__(
n_components=n_components, kernel='linear',
random_state=random_state)
def fit_transform(self, X, y=None):
self.fit(X)
return self.transform(X)
def inverse_transform(self, X):
return X.dot(self.normalization_).dot(self.components_)
In [5]:
nystroem = LinearNistroem(n_components=80)
X_train_nys = nystroem.fit_transform(X_train)
X_test_nys = nystroem.transform(X_test)
ti = time.time()
clf.fit(X_train_nys, y_train)
to_nys = time.time() - ti
y_pred = clf.predict(X_test_nys)
accuracy = (y_pred == y_test).mean() * 100.
nys_coef = masker.inverse_transform(nystroem.inverse_transform(clf.coef_))
print("classification accuracy : %g%%, time %.4fs" % (accuracy, to_nys))
In [6]:
from nilearn.plotting import plot_stat_map
bg_img = data_files['anat'][0]
plot_stat_map(raw_coef, display_mode='yz', bg_img=bg_img, title=r'$non-reduced$', cut_coords=(-34, -16))
plot_stat_map(nys_coef, display_mode='yz', bg_img=bg_img, title=r'$Nystr\"om$', cut_coords=(-34, -16))
from scipy.stats import pearsonr
raw_masked = masker.transform(raw_coef).squeeze()
nys_masked = masker.transform(nys_coef).squeeze()
correlation = pearsonr(raw_masked, nys_masked)[0]
print("correlation %.4f" % correlation)
Summary: