Establish baseline classification:

  • Classify FTD vs. Neurological, FTD vs. Psychiatry and FTD vs. Neurological vs. Psychiatry
  • Use LOO (just for the baseline)
  • Use (or not) PCA
  • Use smoothed GM maps (we don't currently have the unsmoothed ones)

In [1]:
import nibabel as nib
import numpy as np
from glob import glob
import os.path as osp
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import recall_score
from time import time

In [2]:
PARENT_DIR = '/data/shared/bvFTD/VBM/default/data'
SIZE_VOXELS = 121 * 145 * 121

In [3]:
ftd_files = glob(osp.join(PARENT_DIR, 'bvFTD', '*', 'structural', 'mri', 's*'))
psych_files = glob(osp.join(PARENT_DIR, 'psychiatric', '*', 'structural', 'mri', 's*'))
neurol_files = glob(osp.join(PARENT_DIR, 'neurological', '*', 'structural', 'mri', 's*'))

Potential ToDo: one of the subjects in FTD has a weird/possibly broken GM pattern. Might need to exclude


In [4]:
print '#FTD: {}; #Psychiatry: {}; #Neurological: {}'.format(len(ftd_files), len(psych_files), len(neurol_files))


#FTD: 16; #Psychiatry: 24; #Neurological: 27

In [6]:
def load_data(X, files_to_load, start_id=0):
    for id_file, file_to_load in enumerate(files_to_load):
        X[id_file + start_id] = nib.load(file_to_load).get_data().ravel().astype(np.float64)

In [14]:
def accuracy(y_true, y_pred):
    return (y_true == y_pred).mean()
    
def balanced_accuracy(y_true, y_pred):
    return 0.5 * (((y_true == 1) & (y_pred == 1)).sum()/float(y_true.sum()) + 
                  ((y_true == 0) & (y_pred == 0)).sum()/float(y_true.size - y_true.sum()))

def specificity(y_true, y_pred):
    return np.sum((y_true == 0) & (y_pred == 0)) / float(np.sum(y_true == 0))

def sensitivity(y_true, y_pred):
    return recall_score(y_true, y_pred)

In [15]:
def make_classification(first_class_files, other_files, use_pca=False, additional_files=None):
    if additional_files is None:        
        X = np.zeros((len(first_class_files) + len(other_files), SIZE_VOXELS))
        y = np.concatenate((np.ones(len(first_class_files), dtype=np.int), np.zeros(len(other_files), dtype=np.int)))
        load_data(X, first_class_files)
        load_data(X, other_files, start_id=len(first_class_files))
    else:
        X = np.zeros((len(first_class_files) + len(other_files) + len(additional_files), SIZE_VOXELS))
        y = np.concatenate((np.ones(len(first_class_files) * 2, dtype=np.int), 
                            np.ones(len(other_files), dtype=np.int),
                            np.zeros(len(additional_files), dtype=np.int)))

        load_data(X, first_class_files)
        load_data(X, other_files, start_id=len(ftd_files))
        load_data(X, additional_files, start_id=len(first_class_files) + len(other_files))

    # Since we don't have a mask, we create one for ourselves by checking which voxels are zero across all the subjects
    id_keep = ~np.all(X == 0, axis=0)
    X = X[:, id_keep]
    y_pred = np.zeros_like(y)
    pca = PCA(n_components=0.9)
    
    # other_files_equal_sampled = np.random.choice(other_files, size = len(first_class_files), replace = False)
    loo = LeaveOneOut()
    t1, t2 = 0., 0.
    for id_split, (train_id, test_id) in enumerate(loo.split(X)):
        print '{}/{} {}'.format(id_split + 1, loo.get_n_splits(X), t2 - t1)
        t1 = time()
        X_train, y_train = X[train_id, :], y[train_id]
        X_test, y_test = X[test_id, :], y[test_id]
        
        if use_pca:
            X_train = pca.fit_transform(X_train)
            print 'Number PCA Components: {}'.format(pca.components_.shape[0])
            X_test = pca.transform(X_test)
        
        # for multiclass: one-vs-one classification as done in libsvm
        svm = SVC(kernel='linear', class_weight='balanced', decision_function_shape='ovo')
        svm.fit(X_train, y_train)
        y_pred[test_id] = svm.predict(X_test)
        t2 = time()

    print 'Accuracy: {}, Balanced Accuracy: {}, Sensitivity: {}, Specificity: {}'.format(accuracy(y, y_pred),
                                                                                     balanced_accuracy(y, y_pred),
                                                                                     sensitivity(y, y_pred),
                                                                                     specificity(y, y_pred))

In [8]:
print 'No PCA'

print 'FTD vs. Psychiatry'
make_classification(ftd_files, psych_files)
print 'Ratio: #Psychiatry/#FTD {}'.format(float(len(psych_files))/len(ftd_files))

print 'FTD vs. Neurlogical'
make_classification(ftd_files, neurol_files)
print 'Ratio: #Neurological/#FTD {}'.format(float(len(neurol_files))/len(ftd_files))

print 'Psychiatry vs. Neurlogical'
make_classification(psych_files, neurol_files)
print 'Ratio: #Neurological/#Psychiatry {}'.format(float(len(psych_files))/len(neurol_files))


No PCA
FTD vs. Psychiatry
1/40 0.0
2/40 2.02267384529
3/40 1.9081389904
4/40 1.9214861393
5/40 1.90757894516
6/40 1.88382506371
7/40 1.88168215752
8/40 1.9257619381
9/40 1.87069892883
10/40 1.79424786568
11/40 1.825922966
12/40 1.81512594223
13/40 1.94961190224
14/40 1.82442998886
15/40 1.95304703712
16/40 1.79126000404
17/40 1.86407399178
18/40 1.83250188828
19/40 1.86302995682
20/40 1.96576809883
21/40 1.89206790924
22/40 1.95329618454
23/40 1.93173694611
24/40 1.95956993103
25/40 1.89207100868
26/40 1.79873299599
27/40 1.93377304077
28/40 1.96898412704
29/40 1.92381882668
30/40 1.88555312157
31/40 1.79703783989
32/40 1.9367620945
33/40 1.85819506645
34/40 1.90107607841
35/40 1.89993190765
36/40 1.85769796371
37/40 1.77455186844
38/40 1.85080099106
39/40 1.842263937
40/40 1.90421199799
Accuracy: 0.8, Balanced Accuracy: 0.770833333333, Sensitivity: 0.625, Specificity: 0.916666666667
Ratio: #Psychiatry/#FTD 1.5
FTD vs. Neurlogical
1/43 0.0
2/43 2.16306805611
3/43 2.22825193405
4/43 2.24361515045
5/43 2.23523592949
6/43 2.24146580696
7/43 2.12228393555
8/43 2.14864110947
9/43 2.17312192917
10/43 2.22614097595
11/43 2.09868097305
12/43 2.0620970726
13/43 2.25437903404
14/43 2.16794991493
15/43 2.16725707054
16/43 2.10635519028
17/43 2.08901000023
18/43 2.05450201035
19/43 2.03426599503
20/43 2.18580102921
21/43 2.12143993378
22/43 2.08780908585
23/43 2.13565516472
24/43 2.16643285751
25/43 2.19337701797
26/43 2.1413090229
27/43 2.08619999886
28/43 2.1203019619
29/43 2.03515195847
30/43 2.13993787766
31/43 2.19915890694
32/43 2.1353969574
33/43 2.08333110809
34/43 2.18326306343
35/43 2.20210814476
36/43 2.13316702843
37/43 2.19767999649
38/43 2.10803604126
39/43 2.15233302116
40/43 2.21483683586
41/43 2.15076684952
42/43 2.19320511818
43/43 2.17129898071
Accuracy: 0.767441860465, Balanced Accuracy: 0.712962962963, Sensitivity: 0.5, Specificity: 0.925925925926
Ratio: #Neurological/#FTD 1.6875
Psychiatry vs. Neurlogical
1/51 0.0
2/51 3.38372397423
3/51 3.32818698883
4/51 3.14945793152
5/51 3.36785793304
6/51 3.3279941082
7/51 3.25546598434
8/51 3.22960400581
9/51 3.3197119236
10/51 3.44951701164
11/51 3.40378189087
12/51 3.45493197441
13/51 3.40537595749
14/51 3.33171105385
15/51 3.27423000336
16/51 3.52415895462
17/51 3.59335398674
18/51 3.10892295837
19/51 2.98692202568
20/51 2.96960496902
21/51 2.98174405098
22/51 2.99890112877
23/51 2.9882979393
24/51 2.94967389107
25/51 3.05838799477
26/51 3.00897097588
27/51 2.9977991581
28/51 3.07102513313
29/51 3.06559896469
30/51 3.02115488052
31/51 2.98466491699
32/51 2.96756219864
33/51 3.04000210762
34/51 3.01734399796
35/51 2.9416000843
36/51 3.04789710045
37/51 2.86659693718
38/51 3.0876789093
39/51 3.02964806557
40/51 3.08566999435
41/51 3.05709290504
42/51 3.01273798943
43/51 3.15437722206
44/51 3.05360889435
45/51 3.03302097321
46/51 3.06709194183
47/51 3.04581904411
48/51 3.04916214943
49/51 2.93495512009
50/51 3.09109997749
51/51 3.14973497391
Accuracy: 0.549019607843, Balanced Accuracy: 0.548611111111, Sensitivity: 0.541666666667, Specificity: 0.555555555556
Ratio: #Neurological/#Psychiatry 0.888888888889

In [11]:
print 'Use PCA'

print 'FTD vs. Psychiatry'
make_classification(ftd_files, psych_files, use_pca=True)
print 'Ratio: #Psychiatry/#FTD {}'.format(float(len(psych_files))/len(ftd_files))

print 'FTD vs. Neurlogical'
make_classification(ftd_files, neurol_files, use_pca=True)
print 'Ratio: #Neurological/#FTD {}'.format(float(len(neurol_files))/len(ftd_files))

print 'Psychiatry vs. Neurlogical'
make_classification(psych_files, neurol_files, use_pca=True)
print 'Ratio: #Neurological/#Psychiatry {}'.format(float(len(psych_files))/len(neurol_files))


Use PCA
FTD vs. Psychiatry
1/40 0.0
Number PCA Components: 26
2/40 3.58164596558
Number PCA Components: 26
3/40 3.69754004478
Number PCA Components: 26
4/40 3.60990405083
Number PCA Components: 26
5/40 3.62865495682
Number PCA Components: 26
6/40 3.55486488342
Number PCA Components: 26
7/40 3.58675408363
Number PCA Components: 26
8/40 3.6234960556
Number PCA Components: 26
9/40 3.6045191288
Number PCA Components: 26
10/40 3.68858790398
Number PCA Components: 26
11/40 3.58685803413
Number PCA Components: 26
12/40 3.67755293846
Number PCA Components: 26
13/40 3.61236810684
Number PCA Components: 26
14/40 3.59847903252
Number PCA Components: 26
15/40 3.59794998169
Number PCA Components: 26
16/40 3.61001992226
Number PCA Components: 26
17/40 3.58113098145
Number PCA Components: 26
18/40 3.57027983665
Number PCA Components: 26
19/40 3.59229898453
Number PCA Components: 26
20/40 3.58244895935
Number PCA Components: 26
21/40 3.62461209297
Number PCA Components: 27
22/40 3.55806112289
Number PCA Components: 26
23/40 3.59500002861
Number PCA Components: 26
24/40 3.55844712257
Number PCA Components: 26
25/40 3.55600786209
Number PCA Components: 26
26/40 3.60363101959
Number PCA Components: 26
27/40 3.57327508926
Number PCA Components: 26
28/40 3.584856987
Number PCA Components: 26
29/40 3.57883381844
Number PCA Components: 26
30/40 3.59634590149
Number PCA Components: 26
31/40 3.61911606789
Number PCA Components: 26
32/40 3.61196303368
Number PCA Components: 26
33/40 3.6383690834
Number PCA Components: 26
34/40 3.63907194138
Number PCA Components: 26
35/40 3.59273314476
Number PCA Components: 26
36/40 3.58671593666
Number PCA Components: 26
37/40 3.63680815697
Number PCA Components: 26
38/40 3.61631608009
Number PCA Components: 26
39/40 3.56498503685
Number PCA Components: 26
40/40 3.58737397194
Number PCA Components: 26
Accuracy: 0.825, Balanced Accuracy: 0.802083333333, Sensitivity: 0.6875, Specificity: 0.916666666667
Ratio: #Psychiatry/#FTD 1.5
FTD vs. Neurlogical
1/43 0.0
Number PCA Components: 28
2/43 4.06137394905
Number PCA Components: 28
3/43 4.1737010479
Number PCA Components: 28
4/43 4.06116890907
Number PCA Components: 28
5/43 4.15613508224
Number PCA Components: 28
6/43 4.26965498924
Number PCA Components: 28
7/43 3.89141607285
Number PCA Components: 28
8/43 3.88381314278
Number PCA Components: 28
9/43 3.84913897514
Number PCA Components: 28
10/43 3.90531301498
Number PCA Components: 28
11/43 3.92245316505
Number PCA Components: 28
12/43 4.02594399452
Number PCA Components: 28
13/43 3.91201496124
Number PCA Components: 28
14/43 3.97264790535
Number PCA Components: 28
15/43 3.92466187477
Number PCA Components: 28
16/43 3.84985780716
Number PCA Components: 28
17/43 3.95992493629
Number PCA Components: 28
18/43 3.83355498314
Number PCA Components: 28
19/43 3.90468597412
Number PCA Components: 28
20/43 3.86132907867
Number PCA Components: 28
21/43 3.86356806755
Number PCA Components: 28
22/43 3.92151904106
Number PCA Components: 28
23/43 3.84605789185
Number PCA Components: 28
24/43 3.86678481102
Number PCA Components: 28
25/43 3.86052298546
Number PCA Components: 28
26/43 3.85285401344
Number PCA Components: 28
27/43 3.83577084541
Number PCA Components: 28
28/43 3.84204697609
Number PCA Components: 28
29/43 3.85794997215
Number PCA Components: 28
30/43 3.89976096153
Number PCA Components: 28
31/43 3.85929393768
Number PCA Components: 28
32/43 3.97245693207
Number PCA Components: 28
33/43 3.87211585045
Number PCA Components: 28
34/43 3.85047578812
Number PCA Components: 28
35/43 3.84924793243
Number PCA Components: 28
36/43 5.10115504265
Number PCA Components: 28
37/43 3.93807601929
Number PCA Components: 28
38/43 3.88902306557
Number PCA Components: 28
39/43 3.85082006454
Number PCA Components: 28
40/43 3.86593008041
Number PCA Components: 28
41/43 3.93513393402
Number PCA Components: 28
42/43 3.97589612007
Number PCA Components: 28
43/43 4.0485329628
Number PCA Components: 28
Accuracy: 0.720930232558, Balanced Accuracy: 0.663194444444, Sensitivity: 0.4375, Specificity: 0.888888888889
Ratio: #Neurological/#FTD 1.6875
Psychiatry vs. Neurlogical
1/51 0.0
Number PCA Components: 35
2/51 4.53009200096
Number PCA Components: 35
3/51 4.55834412575
Number PCA Components: 35
4/51 4.55016493797
Number PCA Components: 35
5/51 4.54938101768
Number PCA Components: 35
6/51 4.43118095398
Number PCA Components: 35
7/51 4.38066792488
Number PCA Components: 35
8/51 4.39934015274
Number PCA Components: 35
9/51 4.42524981499
Number PCA Components: 35
10/51 4.43415784836
Number PCA Components: 35
11/51 4.3790371418
Number PCA Components: 35
12/51 4.38320088387
Number PCA Components: 35
13/51 4.39525008202
Number PCA Components: 35
14/51 4.5753698349
Number PCA Components: 35
15/51 4.71367502213
Number PCA Components: 35
16/51 4.58487415314
Number PCA Components: 35
17/51 4.58193492889
Number PCA Components: 35
18/51 4.71763110161
Number PCA Components: 35
19/51 4.42743182182
Number PCA Components: 35
20/51 4.42557501793
Number PCA Components: 35
21/51 4.43146705627
Number PCA Components: 35
22/51 4.42113995552
Number PCA Components: 35
23/51 4.5189409256
Number PCA Components: 35
24/51 4.41784381866
Number PCA Components: 35
25/51 4.4356341362
Number PCA Components: 35
26/51 4.42649888992
Number PCA Components: 35
27/51 4.41428494453
Number PCA Components: 35
28/51 4.40024495125
Number PCA Components: 35
29/51 4.513422966
Number PCA Components: 35
30/51 4.47757792473
Number PCA Components: 35
31/51 4.39108586311
Number PCA Components: 35
32/51 4.39812302589
Number PCA Components: 35
33/51 4.38263607025
Number PCA Components: 35
34/51 4.3926358223
Number PCA Components: 35
35/51 4.47507810593
Number PCA Components: 35
36/51 4.43460893631
Number PCA Components: 35
37/51 4.43138790131
Number PCA Components: 35
38/51 4.36883997917
Number PCA Components: 35
39/51 4.39952588081
Number PCA Components: 35
40/51 4.41220378876
Number PCA Components: 35
41/51 4.38891100883
Number PCA Components: 35
42/51 4.38982701302
Number PCA Components: 35
43/51 4.42238998413
Number PCA Components: 35
44/51 4.46550011635
Number PCA Components: 35
45/51 4.40474486351
Number PCA Components: 35
46/51 4.37475299835
Number PCA Components: 35
47/51 4.41395115852
Number PCA Components: 35
48/51 4.39801287651
Number PCA Components: 35
49/51 4.4129948616
Number PCA Components: 35
50/51 4.41059017181
Number PCA Components: 35
51/51 4.60696792603
Number PCA Components: 35
Accuracy: 0.509803921569, Balanced Accuracy: 0.506944444444, Sensitivity: 0.458333333333, Specificity: 0.555555555556
Ratio: #Neurological/#Psychiatry 0.888888888889

Does not work yet!


In [ ]:
PARENT_DIR = '/data/shared/bvFTD/VBM/default_non_modulated'
ftd_non_mod_files = glob(osp.join(PARENT_DIR, 'bvFTD', '*', 'structural', 'mri', 's*'))
psych_non_mod_files = glob(osp.join(PARENT_DIR, 'psychiatric', '*', 'structural', 'mri', 's*'))
neurol_non_mod_files = glob(osp.join(PARENT_DIR, 'neurological', '*', 'structural', 'mri', 's*'))

print 'No PCA, Non-Modulated'

print 'FTD vs. Psychiatry'
make_classification(ftd_non_mod_files, psych_non_mod_files)
print 'Ratio: #Psychiatry/#FTD {}'.format(float(len(psych_files))/len(ftd_files))

print 'FTD vs. Neurlogical'
make_classification(ftd_non_mod_files, neurol_non_mod_files)
print 'Ratio: #Neurological/#FTD {}'.format(float(len(neurol_files))/len(ftd_files))

print 'Psychiatry vs. Neurlogical'
make_classification(psych_non_mod_files, neurol_non_mod_files)
print 'Ratio: #Neurological/#Psychiatry {}'.format(float(len(psych_files))/len(neurol_files))

In [ ]:
print 'Multiclass: No PCA'
make_classification(ftd_files, psych_files, use_pca=False, additional_files=neurol_files)