In [1]:
    
from nipype.interfaces import afni as afni
import numpy as np
import pandas as pd
import os as os
import re as re
import glob as glob
import nibabel as nibabel
from mvpa2.tutorial_suite import *
    
In [ ]:
    
    
In [2]:
    
import nipype.interfaces.afni as afni
import os as os
def makeMask(subPrefix, labelFile, maskSuffix, fsLabels):
    
    """
    function to create a mask file for a list of freesurfer regions (fs)
        subPrefix (string):  ubject name to use
        
        maksSuffix (string): name to give restulting mask
    
        fsLabels (list of ints): set of regions to use in mask.
    """
    ## calc mask
    masksCalc = afni.Calc()
    masksCalc.inputs.in_file_a = labelFile
    masksCalc.inputs.expr = 'amongst(a, '+ ', '.join(str(l) for l in fsLabels)+ ')'
    masksCalc.inputs.out_file =  'calcmask'
    masksCalc.inputs.outputtype = "AFNI"
    print(masksCalc.cmdline)
    masksCalc.run()
    ## resample
    resamp = afni.Resample()
    resamp.inputs.in_file = 'calcmask+orig.BRIK'
    resamp.inputs.resample_mode = 'NN'
    resamp.inputs.master =  subPrefix + '.LSSbetas.BLOCK6.study.uber.nii' # resampling to func space
    resamp.inputs.out_file = subPrefix + '.' + maskSuffix +  '.nii'
    resamp.inputs.outputtype = "NIFTI"
    print(resamp.cmdline)
    resamp.run()
    os.remove('calcmask+orig.BRIK')
    os.remove('calcmask+orig.HEAD')
    
In [3]:
    
### SET Working directory
% cd /Users/Jim/PARC_data_special/mvpa_files/
    
    
In [4]:
    
bothTest = pd.read_csv("/Users/Jim/Dropbox/Dissertation/data_files/bothTest_toMakeRegressors.csv")
subsFile = np.array(bothTest.Subject.unique())
subsFile_prefix  = np.array(['PARC_sub_' + str(s) for s in subsFile])
    
In [5]:
    
### get list of subs w/ file some file pattern
filePattern = ".labelVolume.nii.gz"
betaFiles = glob.glob('*' + filePattern  + '*')
subPrefixes = map(lambda f: re.sub(filePattern,"",f), betaFiles)
subNums = map(lambda f: re.sub("PARC_sub_","",f), subPrefixes)
print subPrefixes[0]
print subNums[0]
    
    
In [6]:
    
## who is missing from data file? 
print "in file folder but not behave file" + str(subsFile_prefix[np.in1d(subPrefixes, subsFile_prefix, invert = True)])
print "in behave file but no filein folder " + str(subsFile_prefix[np.in1d(subsFile_prefix, subPrefixes, invert = True)])
    
    
In this notebook I've made the masks already (should be of format .nii)
In [ ]:
    
    
In [7]:
    
#### set up data ####
# subjects = ['PARC_sub_2699', 'PARC_sub_2718', 'PARC_sub_2726', 'PARC_sub_2747', 'PARC_sub_2754', 'PARC_sub_2761', 'PARC_sub_2784', 'PARC_sub_2786', 'PARC_sub_2787',
#             'PARC_sub_2788', 'PARC_sub_2792', 'PARC_sub_2796', 'PARC_sub_2799', 'PARC_sub_2778', 'PARC_sub_2825', 'PARC_sub_2838', 'PARC_sub_2759']
# subjects = subPrefixes # subjects defined above
## files to use
# filePattern = ".labelVolume.nii.gz"
study_beta_prefix = 'LSSbetas.BLOCK6.study.uber'
test_beta_prefix = 'LSSbetas.GAM.test.uber'
mask_prefix = 'parafusi'
mask_labels = [1007, 2007, 1016, 2016]
csvName = 'mvpa_'+ mask_prefix +'_diss1_.csv'
subPrefixes.sort()
subjects = subPrefixes
# in scanner clarity cutoff
confidence_cutoff = 3
subjects
    
    Out[7]:
In [8]:
    
for subj_n, subj in enumerate(subjects):
    study_beta_name = subj + '.' + study_beta_prefix + '.nii'
    mask_name = subj + '.' + mask_prefix + '.nii'
    test_beta_name = subj + '.' + test_beta_prefix + '.nii'
    if not os.path.exists(study_beta_name):
        print "missing Study beta file for "  + subj 
    
    if not os.path.exists(test_beta_name):
        print "missing Test beta file for "  + subj 
        
    if not os.path.exists(mask_name):
        print "missing mask file for "  + subj
    
    
In [9]:
    
# set subject list and remove the ones not wanted
subjects = subPrefixes
# sub 2908 - lots of movement
# 2829 - also lots of moevement? single beta estimation for run 5 of recall failed.
probSubs = ['PARC_sub_2908','PARC_sub_2844','PARC_sub_2829']
for s in probSubs:
    if(np.in1d(s,subjects)):
        print "removing " + str(s)
        subjects.remove(s)
    
    
In [23]:
    
# I'm going to make masks with a script on the desktop.
# for subPrefix in subjects:
# #     subPrefix = 'PARC_sub_' + str(s)
#     maskName = subPrefix + '.' + mask_prefix + '.nii'
#     labelFileName = subPrefix + filePattern #output name
    
# #     print str(mask_labels)
#     if(os.path.exists(maskName)):
#         print maskName  + " Mask already exists for " + subPrefix
#     else: makeMask(subPrefix, labelFileName, mask_prefix, mask_labels)
    
In [10]:
    
behave_acc = []
inScan_acc = []
for subj_n, subj in enumerate(subjects):
    # load behavioral data
    test_csv_name = subj + '_test_timingdata.csv'
    test_data = pd.read_csv(test_csv_name, sep=',')
    print subj
    print float(sum(test_data.finaltest_correct))/96
    behave_acc.append(float(sum(test_data.finaltest_correct))/96)
    inScan_acc.append(float(sum(test_data.corrected_resp > confidence_cutoff))/96)
    
    
In [11]:
    
study_accuracy = []
for subj_n, subj in enumerate(subjects):
    
    # load behavioral data
    study_csv_name = subj + '_study_timingdata.csv'
    test_csv_name = subj + '_test_timingdata.csv'
    study_data = pd.read_csv(study_csv_name, sep=',')
    test_data = pd.read_csv(test_csv_name, sep=',')
    
    # make variables to load neural data
    study_labels = list(study_data.imgType)
    test_labels = list(test_data.imgType)
    trials = np.array(range(1,97))
    runs = np.repeat(range(1,7),16, axis= 0)
    
    # load neural data 
    print 'loading neural data...' 
    study_beta_name = subj + '.' + study_beta_prefix + '.nii'
    mask_name = subj + '.' + mask_prefix + '.nii'
    test_beta_name = subj + '.' + test_beta_prefix + '.nii'
 
    ds_study = fmri_dataset(samples = study_beta_name, mask = mask_name, chunks= runs, targets=study_labels)
#    ds_test = fmri_dataset(samples = test_beta_name, mask = mask_name, chunks= runs, targets=test_labels)
    zscore(ds_study)
    
    ### CV study
    print 'CV study/n'
    clf = LinearCSVMC(C=-1)
    cvte = CrossValidation(clf, NFoldPartitioner(),errorfx=lambda p, t: np.mean(p == t),enable_ca=['stats'])
    cv_results = cvte(ds_study)
    print subj + '  ' + str(np.mean(cv_results))
    study_accuracy.append(np.mean(cv_results))
    
    
In [ ]:
    
    
In [12]:
    
trainstudy_test_hicon_accuracy = []
for subj_n, subj in enumerate(subjects):
    # load behavioral data
    study_csv_name = subj + '_study_timingdata.csv'
    test_csv_name = subj + '_test_timingdata.csv'
    study_data = pd.read_csv(study_csv_name, sep=',')
    test_data = pd.read_csv(test_csv_name, sep=',')
    
    # make variables to load neural data
    study_labels = list(study_data.imgType)
    test_labels = list(test_data.imgType)
    trials = np.array(range(1,97))
    runs = np.repeat(range(1,7),16, axis= 0)
    
    # load neural data 
    print 'loading neural data...' 
    study_beta_name = subj + '.' + study_beta_prefix + '.nii'
    mask_name = subj + '.' + mask_prefix + '.nii'
    test_beta_name = subj + '.' + test_beta_prefix + '.nii'
 
    ds_study = fmri_dataset(samples = study_beta_name, mask = mask_name, chunks= runs, targets=study_labels)
    ds_test = fmri_dataset(samples = test_beta_name, mask = mask_name, chunks= runs, targets=test_labels)
    zscore(ds_study)
    zscore(ds_test)
    
    print 'CV study/n'
    clf = LinearCSVMC(C=-1)
    
    ds_test.sa['corrected_resp'] = test_data.corrected_resp
    ds_totest = ds_test[np.array(ds_test.sa.corrected_resp > confidence_cutoff)]
    
    clf = LinearCSVMC(C=-1)
    clf.train(ds_study)
    predictions = clf.predict(ds_totest.samples)
    results = np.mean(predictions == ds_totest.sa.targets) 
    
    print subj +  'N trials correct: ' + str(len(predictions)) + ' CV acc: ' + str(results) 
    trainstudy_test_hicon_accuracy.append(results)
    
    
In [13]:
    
ds_test.sa['corrected_resp'] = test_data.corrected_resp
ds_totest = ds_test[np.array(ds_test.sa.corrected_resp > confidence_cutoff)]
    
In [14]:
    
test_accuracy = []
for subj_n, subj in enumerate(subjects):
    # load behavioral data
    study_csv_name = subj + '_study_timingdata.csv'
    test_csv_name = subj + '_test_timingdata.csv'
    study_data = pd.read_csv(study_csv_name, sep=',')
    test_data = pd.read_csv(test_csv_name, sep=',')
    
    # make variables to load neural data
    study_labels = list(study_data.imgType)
    test_labels = list(test_data.imgType)
    trials = np.array(range(1,97))
    runs = np.repeat(range(1,7),16, axis= 0)
    
    # load neural data 
    print 'loading neural data...' 
    study_beta_name = subj + '.' + study_beta_prefix + '.nii'
    mask_name = subj + '.' + mask_prefix + '.nii'
    test_beta_name = subj + '.' + test_beta_prefix + '.nii'
 
#    ds_study = fmri_dataset(samples = study_beta_name, mask = mask_name, chunks= runs, targets=study_labels)
    ds_test = fmri_dataset(samples = test_beta_name, mask = mask_name, chunks= runs, targets=test_labels)
    zscore(ds_test)
 
    print 'CV study/n'
    clf = LinearCSVMC(C=-1)
    cvte = CrossValidation(clf, NFoldPartitioner(),errorfx=lambda p, t: np.mean(p == t),enable_ca=['stats'])
    cv_results = cvte(ds_test)
    print subj +  'N trials correct: ' + str(sum(test_data.finaltest_correct)) + ' CV acc: ' + str(np.mean(cv_results)) 
    test_accuracy.append(np.mean(cv_results))
    
    
In [15]:
    
trainstudy_test_cor_accuracy = []
for subj_n, subj in enumerate(subjects):
    # load behavioral data
    study_csv_name = subj + '_study_timingdata.csv'
    test_csv_name = subj + '_test_timingdata.csv'
    study_data = pd.read_csv(study_csv_name, sep=',')
    test_data = pd.read_csv(test_csv_name, sep=',')
    
    # make variables to load neural data
    study_labels = list(study_data.imgType)
    test_labels = list(test_data.imgType)
    trials = np.array(range(1,97))
    runs = np.repeat(range(1,7),16, axis= 0)
    
    # load neural data 
    print 'loading neural data...' 
    study_beta_name = subj + '.' + study_beta_prefix + '.nii'
    mask_name = subj + '.' + mask_prefix + '.nii'
    test_beta_name = subj + '.' + test_beta_prefix + '.nii'
 
    ds_study = fmri_dataset(samples = study_beta_name, mask = mask_name, chunks= runs, targets=study_labels)
    ds_test = fmri_dataset(samples = test_beta_name, mask = mask_name, chunks= runs, targets=test_labels)
    zscore(ds_study)
    zscore(ds_test)
    ### CV TEST
    print 'CV study/n'
    clf = LinearCSVMC(C=-1)
    
    
    correctonly_test = test_data[np.array(test_data.finaltest_correct==1)]
    correct_test_labels = correctonly_test.imgType
    ds_test.sa['final_corect'] = test_data.finaltest_correct
    ds_totest = ds_test[np.array(ds_test.sa.final_corect==1)]
    
    clf = LinearCSVMC(C=-1)
    clf.train(ds_study)
    predictions = clf.predict(ds_totest.samples)
    results = np.mean(predictions == ds_totest.sa.targets) 
    
    print subj +  'N trials correct: ' + str(sum(test_data.finaltest_correct)) + ' CV acc: ' + str(results) 
    trainstudy_test_cor_accuracy.append(results)
    
    
In [16]:
    
trainstudy_test_incor_accuracy = []
for subj_n, subj in enumerate(subjects):
    # load behavioral data
    study_csv_name = subj + '_study_timingdata.csv'
    test_csv_name = subj + '_test_timingdata.csv'
    study_data = pd.read_csv(study_csv_name, sep=',')
    test_data = pd.read_csv(test_csv_name, sep=',')
    
    # make variables to load neural data
    study_labels = list(study_data.imgType)
    test_labels = list(test_data.imgType)
    trials = np.array(range(1,97))
    runs = np.repeat(range(1,7),16, axis= 0)
    
    # load neural data 
    print 'loading neural data...' 
    study_beta_name = subj + '.' + study_beta_prefix + '.nii'
    mask_name = subj + '.' + mask_prefix + '.nii'
    test_beta_name = subj + '.' + test_beta_prefix + '.nii'
 
    ds_study = fmri_dataset(samples = study_beta_name, mask = mask_name, chunks= runs, targets=study_labels)
    ds_test = fmri_dataset(samples = test_beta_name, mask = mask_name, chunks= runs, targets=test_labels)
    zscore(ds_study)
    zscore(ds_test)
    ### CV TEST
    print 'CV study/n'
    clf = LinearCSVMC(C=-1)
    
    
    correctonly_test = test_data[test_data.finaltest_correct==0]
    correct_test_labels = correctonly_test.imgType
    ds_test.sa['final_corect'] = test_data.finaltest_correct
    ds_totest = ds_test[np.array(ds_test.sa.final_corect==0)]
    
    clf = LinearCSVMC(C=-1)
    clf.train(ds_study)
    predictions = clf.predict(ds_totest.samples)
    results = np.mean(predictions == ds_totest.sa.targets) 
    
    print subj +  'N trials correct: ' + str(len(predictions)) + ' CV acc: ' + str(results) 
    trainstudy_test_incor_accuracy.append(results)
    
    
In [17]:
    
#### accurate and confident 
trainstudy_test_hicon_cor = []
for subj_n, subj in enumerate(subjects):
    # load behavioral data
    study_csv_name = subj + '_study_timingdata.csv'
    test_csv_name = subj + '_test_timingdata.csv'
    study_data = pd.read_csv(study_csv_name, sep=',')
    test_data = pd.read_csv(test_csv_name, sep=',')
    
    # make variables to load neural data
    study_labels = list(study_data.imgType)
    test_labels = list(test_data.imgType)
    trials = np.array(range(1,97))
    runs = np.repeat(range(1,7),16, axis= 0)
    
    # load neural data 
    print 'loading neural data...' 
    study_beta_name = subj + '.' + study_beta_prefix + '.nii'
    mask_name = subj + '.' + mask_prefix + '.nii'
    test_beta_name = subj + '.' + test_beta_prefix + '.nii'
 
    ds_study = fmri_dataset(samples = study_beta_name, mask = mask_name, chunks= runs, targets=study_labels)
    ds_test = fmri_dataset(samples = test_beta_name, mask = mask_name, chunks= runs, targets=test_labels)
    zscore(ds_study)
    zscore(ds_test)
    ### CV TEST
    print 'CV study/n'
    clf = LinearCSVMC(C=-1)
    
    
    ds_test.sa['corrected_resp'] = test_data.corrected_resp
    ds_test.sa['final_corect'] = test_data.finaltest_correct
    ds_totest = ds_test[np.array(ds_test.sa.corrected_resp > confidence_cutoff)]
    ds_totest = ds_totest[np.array(ds_totest.sa.final_corect==1)]
#     correct_test_labels = ds_totest.sa.imgType
    
    clf = LinearCSVMC(C=-1)
    clf.train(ds_study)
    predictions = clf.predict(ds_totest.samples)
    results = np.mean(predictions == ds_totest.sa.targets) 
    
    print subj +  'N trials correct: ' +  str(len(predictions)) + ' CV acc: ' + str(results) 
    trainstudy_test_hicon_cor.append(results)
    
    
In [18]:
    
# ds_test.sa['corrected_resp'] = test_data.corrected_resp
# ds_test.sa['final_corect'] = test_data.finaltest_correct
# ds_totest = ds_test[np.array(ds_test.sa.corrected_resp > confidence_cutoff)]
# ds_totest = ds_totest[np.array(ds_totest.sa.final_corect==1)]
# ds_totest.sa
    
In [19]:
    
trainstudy_test_all_accuracy = []
for subj_n, subj in enumerate(subjects):
    # load behavioral data
    study_csv_name = subj + '_study_timingdata.csv'
    test_csv_name = subj + '_test_timingdata.csv'
    study_data = pd.read_csv(study_csv_name, sep=',')
    test_data = pd.read_csv(test_csv_name, sep=',')
    
    # make variables to load neural data
    study_labels = list(study_data.imgType)
    test_labels = list(test_data.imgType)
    trials = np.array(range(1,97))
    runs = np.repeat(range(1,7),16, axis= 0)
    
    # load neural data 
    print 'loading neural data...' 
    study_beta_name = subj + '.' + study_beta_prefix + '.nii'
    mask_name = subj + '.' + mask_prefix + '.nii'
    test_beta_name = subj + '.' + test_beta_prefix + '.nii'
 
    ds_study = fmri_dataset(samples = study_beta_name, mask = mask_name, chunks= runs, targets=study_labels)
    ds_test = fmri_dataset(samples = test_beta_name, mask = mask_name, chunks= runs, targets=test_labels)
    zscore(ds_study)
    zscore(ds_test)
    ### CV TEST
    print 'CV study/n'
    clf = LinearCSVMC(C=-1)
    
    
#     correctonly_test = test_data[test_data.finaltest_correct==0]
#     correct_test_labels = correctonly_test.imgType
#     ds_test.sa['final_corect'] = test_data.finaltest_correct
#     ds_totest = ds_test[ds_test.sa.final_corect==0]
    
    clf = LinearCSVMC(C=-1)
    clf.train(ds_study)
    predictions = clf.predict(ds_test.samples)
    results = np.mean(predictions == ds_test.sa.targets) 
    
    print subj +  'N trials correct: ' + str(len(predictions)) + ' CV acc: ' + str(results) 
    trainstudy_test_all_accuracy.append(results)
    
    
In [20]:
    
# ds_totest[np.array(ds_totest.sa.final_corect==1)]
    
In [21]:
    
test_cor_accuracy = []
for subj_n, subj in enumerate(subjects):
    # load behavioral data
    study_csv_name = subj + '_study_timingdata.csv'
    test_csv_name = subj + '_test_timingdata.csv'
    study_data = pd.read_csv(study_csv_name, sep=',')
    test_data = pd.read_csv(test_csv_name, sep=',')
    
    # make variables to load neural data
    study_labels = list(study_data.imgType)
    test_labels = list(test_data.imgType)
    trials = np.array(range(1,97))
    runs = np.repeat(range(1,7),16, axis= 0)
    
    # load neural data 
    print 'loading neural data...' 
    study_beta_name = subj + '.' + study_beta_prefix + '.nii'
    mask_name = subj + '.' + mask_prefix + '.nii'
    test_beta_name = subj + '.' + test_beta_prefix + '.nii'
 
#    ds_study = fmri_dataset(samples = study_beta_name, mask = mask_name, chunks= runs, targets=study_labels)
    ds_test = fmri_dataset(samples = test_beta_name, mask = mask_name, chunks= runs, targets=test_labels)
    zscore(ds_test)
    ### CV TEST
#     print 'CV study/n'
#     clf = LinearCSVMC(C=-1)
#     cvte = CrossValidation(clf, NFoldPartitioner(),errorfx=lambda p, t: np.mean(p == t),enable_ca=['stats'])
#     cv_results = cvte(ds_test)
#     print subj + '  ' + str(np.mean(cv_results))
    correctonly_test = test_data[test_data.finaltest_correct==1]
    correct_test_labels = correctonly_test.imgType
    ds_test.sa['final_corect'] = test_data.finaltest_correct
    ds_totest = ds_test[np.array(ds_test.sa.final_corect==1)]
    
    print 'CV study/n'
    clf = LinearCSVMC(C=-1)
    cvte = CrossValidation(clf, NFoldPartitioner(),errorfx=lambda p, t: np.mean(p == t),enable_ca=['stats'])
    cv_results = cvte(ds_totest)
    print subj +  'N trials correct: ' + str(sum(test_data.finaltest_correct)) + ' CV acc: ' + str(np.mean(cv_results)) 
    test_cor_accuracy.append(np.mean(cv_results))
    
    
In [22]:
    
trainstudy_test_lowcon_accuracy = []
for subj_n, subj in enumerate(subjects):
    # load behavioral data
    study_csv_name = subj + '_study_timingdata.csv'
    test_csv_name = subj + '_test_timingdata.csv'
    study_data = pd.read_csv(study_csv_name, sep=',')
    test_data = pd.read_csv(test_csv_name, sep=',')
    
    # make variables to load neural data
    study_labels = list(study_data.imgType)
    test_labels = list(test_data.imgType)
    trials = np.array(range(1,97))
    runs = np.repeat(range(1,7),16, axis= 0)
    
    # load neural data 
    print 'loading neural data...' 
    study_beta_name = subj + '.' + study_beta_prefix + '.nii'
    mask_name = subj + '.' + mask_prefix + '.nii'
    test_beta_name = subj + '.' + test_beta_prefix + '.nii'
 
    ds_study = fmri_dataset(samples = study_beta_name, mask = mask_name, chunks= runs, targets=study_labels)
    ds_test = fmri_dataset(samples = test_beta_name, mask = mask_name, chunks= runs, targets=test_labels)
    zscore(ds_study)
    zscore(ds_test)
    ### CV TEST
    print 'CV study/n'
    clf = LinearCSVMC(C=-1)
    
    correctonly_test = test_data[test_data.corrected_resp < confidence_cutoff]
    correct_test_labels = correctonly_test.imgType
    ds_test.sa['corrected_resp'] = test_data.corrected_resp
    ds_totest = ds_test[np.array(ds_test.sa.corrected_resp < confidence_cutoff)]
    
    clf = LinearCSVMC(C=-1)
    clf.train(ds_study)
    predictions = clf.predict(ds_totest.samples)
    results = np.mean(predictions == ds_totest.sa.targets) 
    
    print subj +  'N trials correct: ' + str(len(predictions))+ ' CV acc: ' + str(results) 
    trainstudy_test_lowcon_accuracy.append(results)
    
    
In [23]:
    
# combine  accuracy information 
mvpa_data_dict = {'subject' : subjects,
                  'encode_acc' : study_accuracy,
                  'inScan_hiClear_pct': inScan_acc,
                  'recall_cor_acc' : test_cor_accuracy,
                  'ENCrec_all_cor_acc': trainstudy_test_cor_accuracy,
                  'behave_acc' : behave_acc ,
                  'ENCrec_all_incor_acc' : trainstudy_test_incor_accuracy ,
                  'recall_all_acc' : test_accuracy,
                  'ENCrec_all': trainstudy_test_all_accuracy,
                  'hicon_ENCrec' : trainstudy_test_hicon_accuracy,
                  'lowcon_ENCrec' : trainstudy_test_lowcon_accuracy,
                  'ENCrec_hicon3_cor': trainstudy_test_hicon_cor
                  }
mvpa_data = pd.DataFrame(mvpa_data_dict)
mvpa_data
    
    Out[23]:
In [ ]:
    
    
In [63]:
    
# mvpa_data_old = mvpa_data
# mvpa_data_old = mvpa_data_old.append(mvpa_data)
# mvpa_data = mvpa_data_old
    
In [24]:
    
mvpa_data['aparcType'] = ['Marissa']*mvpa_data.shape[0]
mvpa_data['roi'] = [mask_prefix]* mvpa_data.shape[0]
    
In [25]:
    
# csvname = 'mvpa_'+ maskSuffix +'_forSRP_.csv'
csvName = 'mvpa_'+ mask_prefix +'_newAlign_fix2736_.csv'
pd.DataFrame.to_csv(mvpa_data,csvName)
    
In [77]:
    
csvName
    
    Out[77]:
In [26]:
    
%matplotlib inline
import seaborn as sns
    
In [ ]:
    
    
In [28]:
    
# subInfo = pd.read_csv("/Users/Jim/Dropbox/Dissertation/analysis tracking/subInfo.csv")
# mvpa_withSub = mvpa_data.merge(subInfo)
    
In [29]:
    
pal = sns.cubehelix_palette(4, 1.5, .75, light=.6, dark=.2)
g = sns.lmplot("behave_acc", "ENCrec_hicon3_cor", data=mvpa_data,
               palette=pal, size=6)
    
    
In [1]:
    
g = sns.lmplot("behave_acc", "encode_acc", data=mvpa_data,
               palette=pal, size=6)
# that's nice.
    
    
In [31]:
    
g = sns.lmplot("behave_acc","recall_all_acc",  data=mvpa_data,
               palette=pal, size=6)
    
    
this is pretty interesting. it looks like above chance CV classification for participants who did not so great a job on the final memory test!
In [32]:
    
# g = sns.lmplot("behave_acc","inScan_hiClear_pct",  data=mvpa_data,
#                palette=pal, size=6)
g = sns.lmplot("inScan_hiClear_pct","recall_all_acc",  data=mvpa_data,
               palette=pal, size=6)
    
    
In [ ]:
    
    
In [89]:
    
longForm = pd.DataFrame()
for subj_n, subj in enumerate(subjects):
    predDf = pd.DataFrame()
    
    # load behavioral data
    study_csv_name = subj + '_study_timingdata.csv'
    test_csv_name = subj + '_test_timingdata.csv'
    study_data = pd.read_csv(study_csv_name, sep=',')
    test_data = pd.read_csv(test_csv_name, sep=',')
    
    # make variables to load neural data
    study_labels = list(study_data.imgType)
    test_labels = list(test_data.imgType)
    trials = np.array(range(1,97))
    runs = np.repeat(range(1,7),16, axis= 0)
    
    # load neural data 
    print 'loading neural data...' 
    study_beta_name = subj + '.' + study_beta_prefix + '.nii'
    mask_name = subj + '.' + mask_prefix + '.nii'
    test_beta_name = subj + '.' + test_beta_prefix + '.nii'
 
    ds_study = fmri_dataset(samples = study_beta_name, mask = mask_name, chunks= runs, targets=study_labels)
    ds_test = fmri_dataset(samples = test_beta_name, mask = mask_name, chunks= runs, targets=test_labels)
    zscore(ds_study)
    zscore(ds_test)
    ### CV TEST
    print 'CV study/n'
    clf = LinearCSVMC(C=-1)
    
    
#     correctonly_test = test_data[test_data.finaltest_correct==0]
#     correct_test_labels = correctonly_test.imgType
#     ds_test.sa['final_corect'] = test_data.finaltest_correct
#     ds_totest = ds_test[ds_test.sa.final_corect==0]
    
    clf = LinearCSVMC(C=-1)
    clf.train(ds_study)
    predictions = clf.predict(ds_test.samples)
    results = np.mean(predictions == ds_test.sa.targets)
    
    predDf['predictions'] = np.array(predictions)
    predDf['subID'] = subj
    predDf['imgFile'] = np.array(test_data.imgFile)
    predDf['trueLabel'] = np.array(test_data.imgType)
    
    
    
    print subj +  'N trials correct: ' + str(len(predictions)) + ' CV acc: ' + str(results) 
    print subj +  'N trials correct: (from dF)' + str(sum(predDf.predictions == predDf.trueLabel)) + ' CV acc: ' + str(results) 
    
    longForm = longForm.append(predDf)
    
    
In [93]:
    
pd.DataFrame.to_csv(longForm,"longForm_svm.csv")
    
In [81]:
    
    
    Out[81]:
In [ ]:
    
    
play around w/ pymvpa classifier...
In [94]:
    
clf = LinearCSVMC(C=-1, probability = 1)
    
In [95]:
    
clf.train(ds_study)
    
In [98]:
    
clf.predict(ds_test.samples)
    
    Out[98]:
In [ ]:
    
    
In [118]:
    
### let's try switching the way that the classifier works. try using stuff from sci-kit
subj = 'PARC_sub_2699'
# load behavioral data
study_csv_name = subj + '_study_timingdata.csv'
test_csv_name = subj + '_test_timingdata.csv'
study_data = pd.read_csv(study_csv_name, sep=',')
test_data = pd.read_csv(test_csv_name, sep=',')
# make variables to load neural data
study_labels = list(study_data.imgType)
test_labels = list(test_data.imgType)
trials = np.array(range(1,97))
runs = np.repeat(range(1,7),16, axis= 0)
# load neural data 
print 'loading neural data...' 
study_beta_name = subj + '.' + study_beta_prefix + '.nii'
mask_name = subj + '.' + mask_prefix + '.nii'
test_beta_name = subj + '.' + test_beta_prefix + '.nii'
ds_study = fmri_dataset(samples = study_beta_name, mask = mask_name, chunks= runs, targets=study_labels)
zscore(ds_study)
ds_test = fmri_dataset(samples = test_beta_name, mask = mask_name, chunks= runs, targets=test_labels)
zscore(ds_test)
    
    
In [60]:
    
from sklearn.linear_model import LogisticRegression
from mvpa2.clfs.skl.base import SKLLearnerAdapter
classifier = SKLLearnerAdapter(LogisticRegression(penalty='l2', C=1.))
# ### CV study
# print 'CV study/n'
# clf = LinearCSVMC(C=-1)
# cvte = CrossValidation(clf, NFoldPartitioner(),errorfx=lambda p, t: np.mean(p == t),enable_ca=['stats'])
# cv_results = cvte(ds_study)
# print subj + '  ' + str(np.mean(cv_results))
# study_accuracy.append(np.mean(cv_results))
    
In [115]:
    
# clf = LinearCSVMC(C=-1)
cvte = CrossValidation(classifier, NFoldPartitioner(),errorfx=lambda p, t: np.mean(p == t),enable_ca=['stats'])
cvte(ds_study)
cvte.ca.stats.stats
    
    Out[115]:
In [116]:
    
clf = LinearCSVMC(C=-1)
clf.get_sensitivity_analyzer()
cvte = CrossValidation(clf, NFoldPartitioner(),errorfx=lambda p, t: np.mean(p == t),enable_ca=['stats'])
cvte(ds_study)
cvte.ca.stats.stats
    
    Out[116]: