Dependencies:
In [ ]:
%matplotlib inline
In [ ]:
# Nilearn for neuro-imaging-specific machine learning
from nilearn import datasets
from nilearn.input_data import NiftiMasker
from nilearn import image
# Nibabel for general neuro-imaging tools
import nibabel
# Scikit-learn for machine learning
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.dummy import DummyClassifier
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import LeaveOneLabelOut, cross_val_score
import numpy as np
import pandas as pd
# Plotting
import matplotlib.pyplot as plt
from nilearn import plotting
import seaborn as sns
sns.set(context="poster", style="ticks", font="Arial")
In [ ]:
haxby_dataset = datasets.fetch_haxby(n_subjects=1)
Let's take a quick look at what's been imported:
In [ ]:
haxby_dataset
The Haxby dataset includes anatomical (3D; x $\times$ y $\times$ z) and functional timeseries (4D; x $\times$ y $\times$ x $\times$ TR) data, as well as some functionally & anatomically defined masks, and label data (i.e., which TRs correspond to which conditions).
We can grab the filepath for the anatomical image like this:
In [ ]:
haxby_dataset.anat[0]
And then use this path to import the anatomical image
In [ ]:
anat_img = nibabel.load(haxby_dataset.anat[0])
anat_data = anat_img.get_data()
In [ ]:
print 'Anatomical data has shape:', anat_data.shape
In [ ]:
cut_coords = (0, 0, 0) # MNI coordinates to view
plotting.plot_anat(anat_img, cut_coords)
In [ ]:
mask_img = nibabel.load(haxby_dataset.mask_vt[0])
mask_data = mask_img.get_data()
plotting.plot_roi(mask_img, bg_img=anat_img, cmap='autumn')
In [ ]:
func_img_mean = image.mean_img(haxby_dataset.func[0])
plotting.plot_epi(func_img_mean, cut_coords, cmap='Reds')
In [ ]:
func_img = nibabel.load(haxby_dataset.func[0])
func_data = func_img.get_data()
func_img.shape
So, each TR a slice 40, 64, 64 was collected, and there were 1452 TRs total
Here's a list of some masks we might want to use:
In [12]:
mask_names = ['mask_vt', 'mask_face', 'mask_house']
In order to understand what's going on in the functional data over time, we can import the labels (i.e., condition) for each TR. In your own data, you might assign these labels by labeling the TRs from 4-12 s post-trial onset (e.g., TRs 3-6 if you have a 2 second TR) with the corresponding trial label, and then take the average value for each voxel over those TRs. As a result, for each trial you will have one value for each voxel, yielding a trial $\times$ voxel data matrix. You'll also have a label vector, that is the length of the number of trials.
In [13]:
labels = np.recfromcsv(haxby_dataset.session_target[0], delimiter=" ")
stimuli = labels['labels']
Make sure the number of stimuli labels matches the number of expected TRs (1452 from above)
In [14]:
stimuli.shape
Out[14]:
What are the unique labels?
In [15]:
np.unique(stimuli)
Out[15]:
We might want to remove a stimulus category, such as "rest"
In [16]:
resting_state = stimuli == "rest"
categories = np.unique(stimuli[np.logical_not(resting_state)])
print categories
Now identify which run each stimulus tag corresponds to:
In [17]:
session_labels = labels["chunks"][np.logical_not(resting_state)]
In [18]:
lr_classifier = LogisticRegression(penalty='l2', C=1.)
In [19]:
svc_classifier = SVC(C=1., kernel="linear")
In [20]:
dummy_classifier = DummyClassifier()
Leave one run out
In [21]:
cv = LeaveOneLabelOut(session_labels)
In [23]:
# Initialize storage dataframe
df = pd.DataFrame(columns=['subid', 'mask_name', 'category',
'type', 'mean', 'sd'])
func_filename = haxby_dataset.func[0]
# Determine type of classifier
classifier = lr_classifier
# Loop through masks & categories for classification
for mask_name in mask_names:
# Just get timecourses from voxels within mask
mask_filename = haxby_dataset[mask_name][0]
masker = NiftiMasker(mask_img=mask_filename, standardize=True)
masked_timecourses = masker.fit_transform(func_filename)[np.logical_not(resting_state)]
for category in categories:
print "Processing %s %s" % (mask_name, category)
classification_target = stimuli[np.logical_not(resting_state)] == category
for acc_type, classify in zip(['accuracy', 'chance'],
[classifier, dummy_classifier]):
scores = cross_val_score(classify,
masked_timecourses,
classification_target,
cv=cv, scoring="f1")
df = df.append(pd.DataFrame({'subid': 'subj1',
'mask_name': mask_name,
'category': category,
'type': acc_type,
'mean': scores.mean(),
'sd': scores.std()}, index=[0]))
In [24]:
df.head()
Out[24]:
In [25]:
with sns.plotting_context("notebook"):
g = sns.FacetGrid(df, col="mask_name", aspect=.66, size=5)
g.map(sns.stripplot, "mean", "category", "type", orient="h",
palette="deep", size=10)
g.add_legend(title="")
In [26]:
np.logical_or(stimuli == 'face', stimuli == 'house')
Out[26]:
In [27]:
mask = 'mask_vt'
smoothing = 4
standardize_ts = True # center & norm timeseries
In [28]:
mask_filename = haxby_dataset[mask][0]
session_labels = labels["chunks"] # runs, to be detrended separately
nifti_masker = NiftiMasker(mask_img=mask_filename, sessions=session_labels,
smoothing_fwhm=smoothing, standardize=standardize_ts,
memory="nilearn_cache", memory_level=1)
func_filename = haxby_dataset.func[0]
X = nifti_masker.fit_transform(func_filename)
TR $\times$ voxel matrix:
In [29]:
X.shape
Out[29]:
In [30]:
stim_mask = np.logical_or(stimuli == 'face', stimuli == 'house')
stimuli = stimuli[stim_mask]
session_labels = session_labels[stim_mask]
X = X[stim_mask]
print X.shape
Univariate feature selection based on ANOVA (F-test). Select the number of best features to be selected.
In [31]:
num_feat = 500
anova = SelectKBest(f_classif, k=num_feat)
In [32]:
classifier = lr_classifier
featsel_classify = Pipeline([('anova', anova), ('classification', classifier)])
In [33]:
featsel_classify.fit(X, stimuli)
stim_pred = featsel_classify.predict(X)
In [34]:
### Look at the classifiers estimates
coef = classifier.coef_
# reverse feature selection
coef = anova.inverse_transform(coef)
# reverse masking
weight_img = nifti_masker.inverse_transform(coef)
In [35]:
plotting.plot_stat_map(weight_img, func_img_mean, title='LR weights')
Out[35]:
In [36]:
cv = LeaveOneLabelOut(session_labels)
cv_scores = []
house_prob = []
face_prob = []
df = pd.DataFrame(columns=['category', 'probability'])
for train, test in cv:
featsel_classify.fit(X[train], stimuli[train])
y_pred = featsel_classify.predict(X[test])
cv_scores.append(np.sum(y_pred == stimuli[test]) / float(np.size(stimuli[test])))
house_prob = np.array([prob[1] for prob in featsel_classify.predict_proba(X[test])[stimuli[test] == 'house']]).mean()
face_prob = np.array([prob[0] for prob in featsel_classify.predict_proba(X[test])[stimuli[test] == 'face']]).mean()
df = df.append({'category': 'house', 'probability': house_prob}, ignore_index=True)
df = df.append({'category': 'face', 'probability': face_prob}, ignore_index=True)
In [37]:
df.head()
Out[37]:
In [38]:
sns.violinplot(x='category', y='probability', data=df)
Out[38]:
In [39]:
sns.violinplot(cv_scores)
Out[39]:
In [41]:
cv_scores
Out[41]:
In [ ]: