Neurolearn

This notebook downloads a private collection from neurovault and runs an SVM.


To Do:

  1. Need to figure out how to tag single subject data with meta-data in neurovault
  2. Need to validate Python results on Matlab
  3. Need to recreate ROC and accuracy from Matlab in Python
  4. Need to create analysis class

Function Definitions


In [1]:
""" Code to grab a private collection from NeuroVault, and compute an SVM discriminating between negative and neutral images
"""
# Authors: Luke Chang, Chris Filo Gorgolewski, Gael Varoquaux
# License: BSD

import os
import json
import urllib, os, errno
from urllib2 import Request, urlopen, HTTPError
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
from nipype.utils.filemanip import split_filename
import nibabel as nb
from joblib import Memory
from nilearn.input_data import NiftiMasker
from nilearn.plotting import *
from nilearn.image import resample_img, mean_img
from nilearn.masking import compute_background_mask, _extrapolate_out_mask
from sklearn.svm import SVC
from sklearn.cross_validation import KFold, StratifiedKFold, LeaveOneLabelOut 
from sklearn.metrics import roc_curve, auc
from scipy import interp
import matplotlib.pyplot as plt
import time
import seaborn as sns
from nltools.analysis import Predict
%matplotlib inline

# Github Project Root
basedir = os.path.abspath('..')

# Set Paths
dest_dir = "/Users/lukechang/Dropbox/NEPA/Test_Analysis"
canonical = os.path.join(basedir,"resources/MNI152_T1_2mm.nii.gz")

# Use a joblib memory, to avoid depending on an Internet connection
mem = Memory(cachedir='/tmp/neurovault_analysis/cache')

def url_get(url):
    request = Request(url)
    response = urlopen(request)
    return response.read()

def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc: # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else: raise
      
def get_collections_df(key = None):
    """Downloads metadata about collections/papers stored in NeuroVault and
    return it as a pandas DataFrame"""

    if key is None:
        request = Request('http://neurovault.org/api/collections/?format=json')
    else:
        request=Request('http://neurovault.org/api/collections/' + key + '?format=json')
    response = urlopen(request)
    elevations = response.read()
    data = json.loads(elevations)
    collections_df = json_normalize(data)
    collections_df.rename(columns={'id':'collection_id'}, inplace=True)
    collections_df.set_index("collection_id")

    return collections_df

def get_images_df():
    """Downloads metadata about images/statistical maps stored in NeuroVault and 
    return it as a pandas DataFrame"""

    request = Request('http://neurovault.org/api/images/?format=json')
    response = urlopen(request)
    elevations = response.read()
    data = json.loads(elevations)
    images_df = json_normalize(data)
    images_df['collection'] = images_df['collection'].apply(lambda x: int(x.split("/")[-2]))
    images_df['image_id'] = images_df['url'].apply(lambda x: int(x.split("/")[-2]))
    images_df.rename(columns={'collection':'collection_id'}, inplace=True)
    return images_df

def get_images_with_private_collections_df(key=None):
    """Downloads metadata about images/statistical maps stored in NeuroVault and
    and enriches it with metadata of the corresponding collections. The result 
    is returned as a pandas DataFrame"""
    
    collections_df = get_collections_df(key=key)
    images_df = get_images_df()

    combined_df = pd.merge(images_df, collections_df, how='inner', on='collection_id',
                      suffixes=('_image', '_collection'))
    return combined_df

def get_images_with_collections_df():
    """Downloads metadata about images/statistical maps stored in NeuroVault and
    and enriches it with metadata of the corresponding collections. The result 
    is returned as a pandas DataFrame"""
    
    collections_df = get_collections_df()
    images_df = get_images_df()

    combined_df = pd.merge(images_df, collections_df, how='left', on='collection_id',
                      suffixes=('_image', '_collection'))
    return combined_df

def download_and_resample(images_df, dest_dir, target):
    """Downloads all stat maps and resamples them to a common space.
    """

    target_nii = nb.load(target)
    orig_path = os.path.join(dest_dir, "original")
    mkdir_p(orig_path)
    resampled_path = os.path.join(dest_dir, "resampled")
    mkdir_p(resampled_path)
    out_df = combined_df.copy()

    for row in combined_df.iterrows():
        # Downloading the file to the "original" subfolder
        _, _, ext = split_filename(row[1]['file'])
        orig_file = os.path.join(orig_path, "%04d%s" % (row[1]['image_id'], ext))
        if not os.path.exists(orig_file):
            print "Downloading %s" % orig_file
            urllib.urlretrieve(row[1]['file'], orig_file)

        # Compute the background and extrapolate outside of the mask
        print "Extrapolating %s" % orig_file
        niimg = nb.load(orig_file)
        data = niimg.get_data().squeeze()
        niimg = nb.Nifti1Image(data, niimg.affine,
                               header=niimg.get_header())
        bg_mask = compute_background_mask(niimg).get_data()
        # Test if the image has been masked:
        out_of_mask = data[np.logical_not(bg_mask)]
        if np.all(np.isnan(out_of_mask)) or len(np.unique(out_of_mask)) == 1:
            # Need to extrapolate
            data = _extrapolate_out_mask(data.astype(np.float), bg_mask,
                                         iterations=3)[0]
        niimg = nb.Nifti1Image(data, niimg.affine,
                               header=niimg.get_header())
        del out_of_mask, bg_mask
        # Resampling the file to target and saving the output in the "resampled"
        # folder
        resampled_file = os.path.join(resampled_path,
                                      "%06d%s" % (row[1]['image_id'], ext))

        print "Resampling %s" % orig_file
        resampled_nii = resample_img(niimg, target_nii.get_affine(),
            target_nii.shape)
        resampled_nii = nb.Nifti1Image(resampled_nii.get_data().squeeze(),
                                       resampled_nii.get_affine(),
                                       header=niimg.get_header())
        if len(resampled_nii.shape) == 3:
            resampled_nii.to_filename(resampled_file)
        else:
            # We have a 4D file
            assert len(resampled_nii.shape) == 4
            resampled_data = resampled_nii.get_data()
            affine = resampled_nii.affine
            for index in range(resampled_nii.shape[-1]):
                # First save the files separately
                this_nii = nb.Nifti1Image(resampled_data[..., index],
                                            affine)
                this_id = int("%i%i" % (-row[1]['image_id'], index))
                this_file = os.path.join(resampled_path,
                    "%06d%s" % (this_id, ext))
                this_nii.to_filename(this_file)

                # Second, fix the dataframe
                out_df = out_df[out_df.image_id != row[1]['image_id']]
                this_row = row[1].copy()
                this_row.image_id = this_id
                out_df = out_df.append(this_row)

    return out_df

def roc_plot(tpr,fpr):
    mean_auc = auc(fpr, tpr)
    fig = plt.plot(fpr, tpr, 'r-', label='Mean ROC (area = %0.2f)' % mean_auc, lw=4)
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic ')
    return fig

def dist_from_hyperplane_plot(stats_output):
    fig = sns.factorplot("SubID", "xval_dist_from_hyperplane", hue="Y", data=stats_out,kind='point')
    plt.xlabel('Subject')
    plt.ylabel('Distance from Hyperplane')
    plt.title('SVM Classification')
    return fig


Couldn't import dot_parser, loading of dot files will not be possible.

Get Test Data


In [6]:
# Test Data collection Key: /collections/NNGSIZTQ/

# Get Meta-Data associated with private key
k = 'NNGSIZTQ' # Private Key
coll_dat = get_collections_df(key = k)
combined_df = get_images_with_private_collections_df(k)

# Download and resample
combined_df = mem.cache(download_and_resample)(combined_df,dest_dir, canonical)
combined_df.to_csv('%s/metadata.csv' % dest_dir, encoding='utf8')


________________________________________________________________________________
[Memory] Calling __main__--Users-lukechang-Github-neurolearn-scripts-__ipython-input__.download_and_resample...
download_and_resample(              add_date_image  collection_id contrast_definition  \
0   2015-01-04T21:37:16.715Z            302                None   
1   2015-01-04T21:37:16.746Z            302                None   
2   2015-01-04T21:37:16.771Z            302                None   
3   2015-01-04T21:37:16.797Z            302                None   
4   2015-01-04T21:37:16.823Z            302                None   
5   2015-01-04T21:37:16.849Z            302                None   
6   2015-01-04T21:37:16.874Z            302                None   
7   2015-01-04T21:37:16.899Z            302                None   
8   2015-01-04T21:37:16.923Z            302                None   
9   2015-01-04T21:37:16.948Z  ..., 
'/Users/lukechang/Dropbox/NEPA/Test_Analysis', '/Users/lukechang/Github/neurolearn/resources/MNI152_T1_2mm.nii.gz')
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1649.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1649.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1650.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1650.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1651.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1651.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1652.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1652.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1653.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1653.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1654.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1654.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1655.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1655.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1656.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1656.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1657.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1657.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1658.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1658.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1659.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1659.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1660.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1660.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1661.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1661.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1662.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1662.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1663.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1663.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1664.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1664.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1665.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1665.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1666.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1666.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1667.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1667.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1668.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1668.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1669.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1669.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1670.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1670.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1671.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1671.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1672.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1672.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1673.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1673.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1674.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1674.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1675.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1675.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1676.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1676.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1677.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1677.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1678.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1678.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1679.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1679.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1680.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1680.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1681.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1681.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1682.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1682.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1683.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1683.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1684.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1684.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1685.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1685.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1686.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1686.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1687.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1687.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1688.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1688.nii.gz
___________________________________________download_and_resample - 18.8s, 0.3min

Run Machine Learning Analysis


In [14]:
tic = time.time() #Start Timer

# Separate image types and sort
combined_df = pd.read_csv('%s/metadata.csv' % dest_dir, encoding='utf8')
neg_list = combined_df.ix[combined_df.name_image.str.contains('Neg'),].sort(columns='name_image')
neu_list = combined_df.ix[combined_df.name_image.str.contains('Neu'),].sort(columns='name_image')

# Load data using nibabel
neg_file_list = [dest_dir + '/resampled/00' + str(x) + '.nii.gz' for x in neg_list.image_id]
neu_file_list = [dest_dir + '/resampled/00' + str(x) + '.nii.gz' for x in neu_list.image_id]
dat = nb.funcs.concat_images(neg_file_list+neu_file_list)

# Mask data
mask_img = nb.load(os.path.join(basedir,'resources/MNI152_T1_2mm_brain_mask_dil.nii.gz'))
mn_img = mean_img(dat)
nifti_masker = NiftiMasker(mask_img=mask_img)
dat_masked = nifti_masker.fit_transform(dat)

# Classification
nSub = 20
Y = np.array([1]*nSub+[0]*nSub)
svc = SVC(kernel='linear')
svc.fit(dat_masked, Y)

# Save Weights: Write out weight map to nifti image
coef_ = svc.coef_
intercept_ = svc.intercept_
coef_img = nifti_masker.inverse_transform(coef_)
nb.save(coef_img, dest_dir + '/Neg_vs_Neu_SVM_weights.nii')

# Cross-validation
cv = StratifiedKFold(Y, n_folds=5)
cv_scores = []
mean_tpr = 0.0
all_tpr = []
mean_fpr = np.linspace(0, 1, 100)
yfit = np.array([0]*nSub*2)
xval_dist_from_hyperplane = np.array([0]*nSub*2)
for train, test in cv:
    svc.fit(dat_masked[train], Y[train])
    yfit[test] = svc.predict(dat_masked[test])
    xval_dist_from_hyperplane[test] = svc.decision_function(dat_masked[test])
    
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(Y[test], yfit[test])
    mean_tpr += interp(mean_fpr, fpr, tpr)
mean_tpr /= len(cv)
mean_tpr[0] = 0.0
mean_tpr[-1] = 1.0

# Save Stats Output
stats_out = pd.DataFrame({
                'SubID' : range(0,nSub)*2, 
                'xval_dist_from_hyperplane' : xval_dist_from_hyperplane, 
                'Y' : Y, 
                'yfit' : yfit })
stats_out.to_csv(dest_dir + 'SVM_Stats_Output.csv')

# Display results
print 'overall CV accuracy: %.2f' % np.mean(yfit==Y)
print 'AUC: %.2f' % auc(mean_fpr,mean_tpr)
print 'Forced Choice Accuracy: %.2f' % np.mean((xval_dist_from_hyperplane[0:nSub] - xval_dist_from_hyperplane[nSub:]) > 0)

# Plots
fig3 = roc_plot(mean_tpr, mean_fpr)
plt.savefig(dest_dir + '/Neg_vs_Neu_SVM_ROC_plot.png')

fig1 = plot_roi(nifti_masker.mask_img_, mn_img, title="Mask", cut_coords=range(-40, 40, 10), display_mode='z')
fig1.savefig(dest_dir + '/Neg_vs_Neu_SVM_mask.png')
fig2 = plot_stat_map(coef_img, mn_img, title="SVM weights", cut_coords=range(-40, 40, 10), display_mode='z')
fig2.savefig(dest_dir + '/Neg_vs_Neu_SVM_weightmap.png')
fig4 = dist_from_hyperplane_plot(stats_out)
fig4.savefig(dest_dir + '/Neg_vs_Neu_SVM_xVal_Distance_from_Hyperplane.png')

print 'Elapsed: %.2f seconds' % (time.time() - tic)


overall CV accuracy: 0.78
AUC: 0.77
Forced Choice Accuracy: 0.90
Elapsed: 7.08 seconds

Get Weight Map


In [8]:
#PINES key /collections/TMTKHDFM/

### Download Weight Map ###
# Get Meta-Data associated with private key
k = 'TMTKHDFM' # Private Key
coll_pines = get_collections_df(key = k)
pines_df = get_images_with_private_collections_df(k)

# Download and resample
dest_dir = "/Users/lukechang/Dropbox/NEPA/Test_Analysis"
target = os.path.join(basedir,"resources/MNI152_T1_2mm.nii.gz")
pines_df = mem.cache(download_and_resample)(pines_df,dest_dir, target)
pines_df.to_csv('%s/pines_metadata.csv' % dest_dir, encoding='utf8')


________________________________________________________________________________
[Memory] Calling __main__--Users-lukechang-Github-neurolearn-scripts-__ipython-input__.download_and_resample...
download_and_resample(             add_date_image  collection_id contrast_definition  \
0  2015-01-06T05:34:33.909Z            306                       

  contrast_definition_cogatlas                description_image  \
0                               Picture induce negative emotion   

                                                file label_description_file  \
0  http://neurovault.org/media/images/306/Rating_...                    NaN   

  map_type         modify_date_image     name_image            ...             \
0    Other  2015-01-06T05:34:33.910Z  PINES_Weights            ...              

  used_high_pass_filter  used_intersubject_registration  \
0                   NaN                             NaN   

   used_motion_correction used_motion_regressors  \
0                     NaN                    NaN   

   used_motion_susceptibiity_correction used_orthogonalization  \
0                                   NaN                    NaN   

  used_reaction_time_regressor used_slice_timing_correction used_smoothing  \
0                          NaN                          NaN            NaN   

  used_temporal_derivatives  
0                       NaN  

[1 rows x 109 columns], 
'/Users/lukechang/Dropbox/NEPA/Test_Analysis', '/Users/lukechang/Github/neurolearn/resources/MNI152_T1_2mm.nii.gz')
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1649.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1649.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1650.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1650.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1651.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1651.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1652.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1652.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1653.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1653.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1654.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1654.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1655.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1655.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1656.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1656.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1657.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1657.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1658.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1658.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1659.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1659.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1660.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1660.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1661.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1661.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1662.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1662.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1663.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1663.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1664.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1664.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1665.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1665.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1666.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1666.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1667.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1667.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1668.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1668.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1669.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1669.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1670.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1670.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1671.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1671.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1672.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1672.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1673.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1673.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1674.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1674.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1675.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1675.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1676.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1676.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1677.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1677.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1678.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1678.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1679.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1679.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1680.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1680.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1681.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1681.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1682.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1682.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1683.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1683.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1684.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1684.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1685.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1685.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1686.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1686.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1687.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1687.nii.gz
Extrapolating /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1688.nii.gz
Resampling /Users/lukechang/Dropbox/NEPA/Test_Analysis/original/1688.nii.gz
___________________________________________download_and_resample - 19.9s, 0.3min

Test Pattern Expression


In [9]:
tic = time.time() #Start Timer

# Load data using nibabel
dat = nb.funcs.concat_images(neg_file_list+neu_file_list)
pines = nb.load(os.path.abspath(dest_dir + '/resampled/00' + str(combined_df.image_id[0]) + '.nii.gz'))

# Mask data
nifti_masker = NiftiMasker(mask_img=mask_img)
dat_masked = nifti_masker.fit_transform(dat)
pines_masked = nifti_masker.fit_transform(pines)

# Calculate pattern expression
pexp = np.dot(dat_masked,np.transpose(pines_masked)) + intercept_

# Calculate ROC plot
mean_fpr = np.linspace(0, 1, 100)
fpr, tpr, thresholds = roc_curve(Y, pexp)
stats_out.xval_dist_from_hyperplane = pexp

# Display results
print 'overall CV accuracy: %.2f' % np.mean(pexp + 1.1236 > 0)
print 'AUC: %.2f' % auc(fpr,tpr)
print 'Forced Choice Accuracy: %.2f' % np.mean((pexp[0:nSub] - pexp[nSub:]) > 0)

fig1 = roc_plot(tpr, fpr)
plt.savefig(dest_dir + '/PINES_PExp_Neg_vs_Neu_SVM_ROC_plot.png')

fig2 = dist_from_hyperplane_plot(stats_out)
fig2.savefig(dest_dir + '/PINES_PExp_Neg_vs_Neu_SVM_xVal_Distance_from_Hyperplane.png')

print 'Elapsed: %.2f seconds' % (time.time() - tic) #Stop timer


overall CV accuracy: 0.62
AUC: 0.43
Forced Choice Accuracy: 0.30
Elapsed: 2.47 seconds

In [14]:

Extra Code


In [7]:
tic = time.time() #Start Timer

# Separate image types and sort
combined_df = pd.read_csv('%s/metadata.csv' % dest_dir, encoding='utf8')
neg_list = combined_df.ix[combined_df.name_image.str.contains('Neg'),].sort(columns='name_image')
neu_list = combined_df.ix[combined_df.name_image.str.contains('Neu'),].sort(columns='name_image')

# Load data using nibabel
neg_file_list = [dest_dir + '/resampled/00' + str(x) + '.nii.gz' for x in neg_list.image_id]
neu_file_list = [dest_dir + '/resampled/00' + str(x) + '.nii.gz' for x in neu_list.image_id]
dat = nb.funcs.concat_images(neg_file_list+neu_file_list)
Y = np.array([1]*len(neg_file_list)+[0]*len(neu_file_list))

# negvneu = Predict(dat,Y,algorithm='svm',output_dir=dest_dir, kernel="linear")
# negvneu = Predict(dat,Y,algorithm='linear',output_dir=dest_dir)
negvneu = Predict(dat,Y,algorithm='svm',output_dir=dest_dir)
negvneu.predict()

print 'Elapsed: %.2f seconds' % (time.time() - tic) #Stop timer


/usr/local/lib/python2.7/site-packages/sklearn/linear_model/ridge.py:138: DeprecationWarning: Implicitly casting between incompatible kinds. In a future numpy release, this will raise an error. Use casting="unsafe" if this is intentional.
  K.flat[::n_samples + 1] += alpha[0]
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-7-8ed49135ea09> in <module>()
     15 # negvneu = Predict(dat,Y,algorithm='linear',output_dir=dest_dir)
     16 negvneu = Predict(dat,Y,algorithm='ridge',output_dir=dest_dir)
---> 17 negvneu.predict()
     18 
     19 print 'Elapsed: %.2f seconds' % (time.time() - tic) #Stop timer

/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nltools-0.1-py2.7.egg/nltools/analysis.pyc in predict(self, algorithm, cv, save_images, save_output, save_plot, **kwargs)
     94         # Overall Fit for weight map
     95         predicter = self.predicter
---> 96         predicter.fit(self.data, self.Y)
     97         self.yfit = predicter.predict(self.data)
     98 

/usr/local/lib/python2.7/site-packages/sklearn/linear_model/ridge.pyc in fit(self, X, y, sample_weight)
    485         self : returns an instance of self.
    486         """
--> 487         return super(Ridge, self).fit(X, y, sample_weight=sample_weight)
    488 
    489 

/usr/local/lib/python2.7/site-packages/sklearn/linear_model/ridge.pyc in fit(self, X, y, sample_weight)
    374                                       max_iter=self.max_iter,
    375                                       tol=self.tol,
--> 376                                       solver=solver)
    377         self._set_intercept(X_mean, y_mean, X_std)
    378         return self

/usr/local/lib/python2.7/site-packages/sklearn/linear_model/ridge.pyc in ridge_regression(X, y, alpha, sample_weight, solver, max_iter, tol)
    318             try:
    319                 dual_coef = _solve_cholesky_kernel(K, y, alpha,
--> 320                                                          sample_weight)
    321 
    322                 coef = safe_sparse_dot(X.T, dual_coef, dense_output=True).T

/usr/local/lib/python2.7/site-packages/sklearn/linear_model/ridge.pyc in _solve_cholesky_kernel(K, y, alpha, sample_weight)
    136     if one_alpha:
    137         # Only one penalty, we can solve multi-target problems in one time.
--> 138         K.flat[::n_samples + 1] += alpha[0]
    139 
    140         dual_coef = linalg.solve(K, y, sym_pos=True, overwrite_a=True)

TypeError: unsupported operand type(s) for +: 'float' and 'dict'

In [ ]:


In [4]:
sklearn.svm.LinearSVC

In [4]:


In [ ]: