In [128]:
import pandas as pd
import numpy as np
import pyneurovault.api as pnv
import subprocess
import urllib2
import urllib
import nibabel as nib
import os.path
import nilearn
import sklearn.svm
import sklearn.feature_selection
import sklearn.pipeline
import sklearn.metrics
import matplotlib.pyplot as plt
import sklearn.cross_validation

In [2]:
nv = pd.read_csv('neurovault_labeled.csv')

In [3]:
cols = ['id','file', 'collection_id', 'map_type', 'modality','not_mni',
       'cognitive_contrast_cogatlas','cognitive_contrast_cogatlas_id',
       'cognitive_paradigm_cogatlas', 'cognitive_paradigm_cogatlas_id', 
       'contrast_definition', 'contrast_definition_cogatlas']
nv = nv.loc[:,cols]

In [4]:
for col in nv.columns:
    print col
print nv.shape


id
file
collection_id
map_type
modality
not_mni
cognitive_contrast_cogatlas
cognitive_contrast_cogatlas_id
cognitive_paradigm_cogatlas
cognitive_paradigm_cogatlas_id
contrast_definition
contrast_definition_cogatlas
(355, 12)

In [5]:
nv.rename(columns={"id":"image_id"},inplace=True)

In [6]:
nv['not_mni'].value_counts()


Out[6]:
False    353
True       2
Name: not_mni, dtype: int64

In [10]:
nv = pnv.download_images("./images/",nv,resample=True,target='/home/kesslerd/repos/cogfusion/nv_tagging/images/space.nii.gz')

In [11]:
template = os.path.join('.','images','resampled','{0:06d}.nii.gz')
nv['handle'] = [nib.load(template.format(path)) for path in nv['image_id'] ]


Out[11]:
0      (91, 109, 91)
1      (91, 109, 91)
2      (91, 109, 91)
3      (91, 109, 91)
4      (91, 109, 91)
5      (91, 109, 91)
6      (91, 109, 91)
7      (91, 109, 91)
8      (91, 109, 91)
9      (91, 109, 91)
10     (91, 109, 91)
11     (91, 109, 91)
12     (91, 109, 91)
13     (91, 109, 91)
14     (91, 109, 91)
15     (91, 109, 91)
16     (91, 109, 91)
17     (91, 109, 91)
18     (91, 109, 91)
19     (91, 109, 91)
20     (91, 109, 91)
21     (91, 109, 91)
22     (91, 109, 91)
23     (91, 109, 91)
24     (91, 109, 91)
25     (91, 109, 91)
26     (91, 109, 91)
27     (91, 109, 91)
28     (91, 109, 91)
29     (91, 109, 91)
           ...      
325    (91, 109, 91)
326    (91, 109, 91)
327    (91, 109, 91)
328    (91, 109, 91)
329    (91, 109, 91)
330    (91, 109, 91)
331    (91, 109, 91)
332    (91, 109, 91)
333    (91, 109, 91)
334    (91, 109, 91)
335    (91, 109, 91)
336    (91, 109, 91)
337    (91, 109, 91)
338    (91, 109, 91)
339    (91, 109, 91)
340    (91, 109, 91)
341    (91, 109, 91)
342    (91, 109, 91)
343    (91, 109, 91)
344    (91, 109, 91)
345    (91, 109, 91)
346    (91, 109, 91)
347    (91, 109, 91)
348    (91, 109, 91)
349    (91, 109, 91)
350    (91, 109, 91)
351    (91, 109, 91)
352    (91, 109, 91)
353    (91, 109, 91)
354    (91, 109, 91)
Name: shape, dtype: object

In [98]:
nv['shape']=[handle.shape for handle in nv['handle']]
nv['shape'].value_counts()


Out[98]:
(91, 109, 91)    355
Name: shape, dtype: int64

In [99]:
X = np.array( [handle.get_data().ravel() for handle in nv['handle']])

In [110]:
y2 = pd.DataFrame(y)

In [111]:
newmask = np.array(y2.isin(["0-back","Math"]))

In [112]:
newmask.squeeze().shape


Out[112]:
(301,)

In [113]:
X = X[newmask.squeeze(),:]
y = y[newmask.squeeze()]


/home/kesslerd/repos/cogfusion/env/lib/python2.7/site-packages/ipykernel/__main__.py:1: VisibleDeprecationWarning: boolean index did not match indexed array along dimension 0; dimension is 355 but corresponding boolean dimension is 301
  if __name__ == '__main__':

In [118]:
y.value_counts()


Out[118]:
0-back    15
Math      13
Name: cognitive_contrast_cogatlas, dtype: int64

In [14]:
X.shape


Out[14]:
(355, 902629)

In [15]:
y = nv['cognitive_contrast_cogatlas']

In [119]:
filt = sklearn.feature_selection.SelectKBest(k=1000)

In [120]:
pd.value_counts(y)


Out[120]:
0-back    15
Math      13
Name: cognitive_contrast_cogatlas, dtype: int64

In [121]:
clf = sklearn.svm.SVC(kernel='linear',class_weight='balanced')
clf = sklearn.svm.LinearSVC(class_weight={"0-back":1e4,"Math":1e4},C=1)

In [122]:
nv_pipe = sklearn.pipeline.Pipeline([('selector',filt),('classifier',clf)])

In [123]:
nv_pipe.fit(X,y)


/home/kesslerd/repos/cogfusion/env/local/lib/python2.7/site-packages/sklearn/feature_selection/univariate_selection.py:113: UserWarning: Features [     0      1      2 ..., 902626 902627 902628] are constant.
  UserWarning)
Out[123]:
Pipeline(steps=[('selector', SelectKBest(k=1000, score_func=<function f_classif at 0x48d79b0>)), ('classifier', LinearSVC(C=1, class_weight={'Math': 10000.0, '0-back': 10000.0}, dual=True,
     fit_intercept=True, intercept_scaling=1, loss='squared_hinge',
     max_iter=1000, multi_class='ovr', penalty='l2', random_state=None,
     tol=0.0001, verbose=0))])

In [124]:
nv_pipe.score(X,y)


Out[124]:
0.9642857142857143

In [ ]:


In [125]:
pd.value_counts( nv_pipe.predict(X))


Out[125]:
0-back    14
Math      14
dtype: int64

In [127]:
%matplotlib inline
plt.matshow( sklearn.metrics.confusion_matrix(y,nv_pipe.predict(X)), cmap = 'bone')


Out[127]:
<matplotlib.image.AxesImage at 0x9fcda90>

In [129]:
sklearn.cross_validation.cross_val_score(nv_pipe,X,y)


Out[129]:
array([ 0.6       ,  0.33333333,  0.77777778])

In [133]:
mycv = sklearn.cross_validation.StratifiedKFold(y)

In [132]:
sklearn.cross_validation.permutation_test_score(nv_pipe,X,y,cv=mycv)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-132-811e73b6855e> in <module>()
----> 1 sklearn.cross_validation.permutation_test_score(nv_pipe,X,y,cv=mycv)

/home/kesslerd/repos/cogfusion/env/local/lib/python2.7/site-packages/sklearn/cross_validation.pyc in permutation_test_score(estimator, X, y, cv, n_permutations, n_jobs, labels, random_state, verbose, scoring)
   1776     # We clone the estimator to make sure that all the folds are
   1777     # independent, and that it is pickle-able.
-> 1778     score = _permutation_test_score(clone(estimator), X, y, cv, scorer)
   1779     permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
   1780         delayed(_permutation_test_score)(

/home/kesslerd/repos/cogfusion/env/local/lib/python2.7/site-packages/sklearn/cross_validation.pyc in _permutation_test_score(estimator, X, y, cv, scorer)
   1615     avg_score = []
   1616     for train, test in cv:
-> 1617         estimator.fit(X[train], y[train])
   1618         avg_score.append(scorer(estimator, X[test], y[test]))
   1619     return np.mean(avg_score)

/home/kesslerd/repos/cogfusion/env/local/lib/python2.7/site-packages/sklearn/pipeline.pyc in fit(self, X, y, **fit_params)
    162             the pipeline.
    163         """
--> 164         Xt, fit_params = self._pre_transform(X, y, **fit_params)
    165         self.steps[-1][-1].fit(Xt, y, **fit_params)
    166         return self

/home/kesslerd/repos/cogfusion/env/local/lib/python2.7/site-packages/sklearn/pipeline.pyc in _pre_transform(self, X, y, **fit_params)
    143         for name, transform in self.steps[:-1]:
    144             if hasattr(transform, "fit_transform"):
--> 145                 Xt = transform.fit_transform(Xt, y, **fit_params_steps[name])
    146             else:
    147                 Xt = transform.fit(Xt, y, **fit_params_steps[name]) \

/home/kesslerd/repos/cogfusion/env/local/lib/python2.7/site-packages/sklearn/base.pyc in fit_transform(self, X, y, **fit_params)
    456         else:
    457             # fit method of arity 2 (supervised transformation)
--> 458             return self.fit(X, y, **fit_params).transform(X)
    459 
    460 

/home/kesslerd/repos/cogfusion/env/local/lib/python2.7/site-packages/sklearn/feature_selection/univariate_selection.pyc in fit(self, X, y)
    329         self._check_params(X, y)
    330 
--> 331         self.scores_, self.pvalues_ = self.score_func(X, y)
    332         self.scores_ = np.asarray(self.scores_)
    333         self.pvalues_ = np.asarray(self.pvalues_)

/home/kesslerd/repos/cogfusion/env/local/lib/python2.7/site-packages/sklearn/feature_selection/univariate_selection.pyc in f_classif(X, y)
    147     X, y = check_X_y(X, y, ['csr', 'csc', 'coo'])
    148     args = [X[safe_mask(X, y == k)] for k in np.unique(y)]
--> 149     return f_oneway(*args)
    150 
    151 

/home/kesslerd/repos/cogfusion/env/local/lib/python2.7/site-packages/sklearn/feature_selection/univariate_selection.pyc in f_oneway(*args)
     94     n_samples_per_class = np.array([a.shape[0] for a in args])
     95     n_samples = np.sum(n_samples_per_class)
---> 96     ss_alldata = sum(safe_sqr(a).sum(axis=0) for a in args)
     97     sums_args = [np.asarray(a.sum(axis=0)) for a in args]
     98     square_of_sums_alldata = sum(sums_args) ** 2

/home/kesslerd/repos/cogfusion/env/local/lib/python2.7/site-packages/sklearn/feature_selection/univariate_selection.pyc in <genexpr>((a,))
     94     n_samples_per_class = np.array([a.shape[0] for a in args])
     95     n_samples = np.sum(n_samples_per_class)
---> 96     ss_alldata = sum(safe_sqr(a).sum(axis=0) for a in args)
     97     sums_args = [np.asarray(a.sum(axis=0)) for a in args]
     98     square_of_sums_alldata = sum(sums_args) ** 2

/home/kesslerd/repos/cogfusion/env/local/lib/python2.7/site-packages/sklearn/utils/__init__.pyc in safe_sqr(X, copy)
    352     X ** 2 : element wise square
    353     """
--> 354     X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], ensure_2d=False)
    355     if issparse(X):
    356         if copy:

/home/kesslerd/repos/cogfusion/env/local/lib/python2.7/site-packages/sklearn/utils/validation.pyc in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    405                              " minimum of %d is required%s."
    406                              % (n_samples, shape_repr, ensure_min_samples,
--> 407                                 context))
    408 
    409     if ensure_min_features > 0 and array.ndim == 2:

ValueError: Found array with 0 sample(s) (shape=(0, 902629)) while a minimum of 1 is required.