Statistical Testing

Here we perform statistical testing using Functional Data Analysis (FDA), as in this paper. This is explained in the MSc report, Section 5.1.


In [1]:
import random
random.seed(20)

In [2]:
from pprint import pprint

In [3]:
import scipy

In [4]:
import pandas as pd

In [5]:
from pandas.tools.plotting import parallel_coordinates

In [6]:
from pandas import concat

In [7]:
import matplotlib
# Set backend to pgf
matplotlib.use('pgf')
import matplotlib.pyplot as plt
import numpy as np

# Some nice default configuration for plots
plt.rcParams['figure.figsize'] = 10, 7.5
plt.rcParams['axes.grid'] = True
plt.rcParams['axes.color_cycle'] = ['g', 'b', 'r']
plt.gray()

In [8]:
%matplotlib inline

In [9]:
from scipy.io import loadmat

In [10]:
import operator

In [11]:
randomSeed = 20;

# TODO: Change accordingly
CONNECTIVITY_MEASURE = 'dWPLI'
DATASETS_FOLDER = '/home/dragos/DTC/MSc/SummerProject/processed_data/features/'
DATASETS_FOLDER = DATASETS_FOLDER + CONNECTIVITY_MEASURE + '/full_graph/datasets/'
nameOfDataFileMat = 'datasetFullGraphMeasures.mat'
nameOfDataFileCSV = 'datasetFullGraphMeasures.csv'

Dataset File Structure

Dataset layout

The order of the features associated with each band is the following:

Alfa Beta Delta Gamma Theta Class

Each band has 5 columns, where each column corresponds to a graph feature, in the following order:

  • average clustering coefficient (C)
  • characteristic path length (L)
  • global efficiency (GE)
  • small-worldness (SW)
  • modularity (Q)

Create dataset mappings used for Functional Data Analysis (FDA).


In [12]:
# store frequencies of interest order as they appear in the dataset table
FoQ_table_order = dict([('delta', 3), ('theta', 5),
                         ('alpha', 1), ('beta', 2),
                         ('gamma', 4)])

# store plot order (in order of frequency values of each bands)
plot_order = dict([ (1, 'delta'), (2, 'theta'), (3, 'alpha'), 
                   (4, 'beta'), (5, 'gamma') ])

# stores class labels
classLabels = dict([ (1, "CS"), (2, "MCI"), (3, "AD") ])

# stores the order in which the measures are specified in the dataset matrix
graphMeasures = dict([('C', 1), ('L', 2), ('GE', 3), ('SW', 4), ('Q', 5)])

# stores the long names of the measures
graphMeasuresLong = dict([(1, 'Clustering Coefficient'), (2, 'Characteristic Path Length'),
                          (3, 'Global Efficiency'), (4, 'Small-Worldness'), (5, 'Modularity')])

In [13]:
thresholdVec = np.array([[0.05], [0.1], [0.15], [0.2], [0.3]])

In [16]:
sorted(graphMeasures.iteritems(), key=operator.itemgetter(1))


Out[16]:
[('C', 1), ('L', 2), ('GE', 3), ('SW', 4), ('Q', 5)]

FDA Analysis


In [16]:
################################################################################
### FDA Analysis between every two group pair for each measure, in all bands ###

## subject numbers: (26 in CS, 18 in MCI, 36 in AD) ##

classPairs = {(1,2), (1,3), (2,3)}

#with open("pValues","w") as f:

CSidxs = range(0,26); MCIidxs = range(26,44); ADidxs = range(44, 80);

classToNumbering = dict()
classToNumbering[1] = CSidxs
classToNumbering[2] = MCIidxs
classToNumbering[3] = ADidxs


# for each graph measure
for currentMeasure, measureOrder in sorted(graphMeasures.iteritems(), key=operator.itemgetter(1)):

    # for each frequency band of interest
    for bandOrderIdx in plot_order.keys():
        bandName = plot_order[bandOrderIdx]

        for pair in classPairs:
            oneClass = pair[0]
            otherClass = pair[1]                

            classIdxs = classToNumbering[oneClass] + classToNumbering[otherClass]

            ### compute empirical A (between the plotted curves)
            A = 0
            for threshold in np.nditer(thresholdVec):        
                A = A + abs( measureToBand[currentMeasure][bandOrderIdx][oneClass][threshold[()]] -
                            measureToBand[currentMeasure][bandOrderIdx][otherClass][threshold[()]] )

            ### permute and compute Astar for pseudo groups
            AstarList = []

            for l in range(0,10000):
                random.shuffle(classIdxs)
                pseudoOneClass = classIdxs[:len(classToNumbering[oneClass])] # assign first X IDs in the first pseudo group
                pseudoOtherClass = classIdxs[len(classToNumbering[oneClass]):] # assign the rest of the IDs to the second pseudo group

                Astar = 0

                for threshold in np.nditer(thresholdVec):
                    data_file_path = DATASETS_FOLDER + str(threshold) + '/' +  nameOfDataFileMat

                    # load dataset 
                    data_dict = loadmat(data_file_path)
                    data = data_dict['dataset']
                    theThreshold = data_dict['threshold']

                    n_samples = data.shape[0]
                    features = data[:, :-1]
                    targets = data[:, -1]

                    measureIndex = len(graphMeasures)*(FoQ_table_order[bandName]-1) + (graphMeasures[currentMeasure]-1)

                    # compute average and mean of all subjects in the pseudoClass
                    meanPseudoOneClass = (features[pseudoOneClass, measureIndex]).mean()
                    meanPseudoOtherClass = (features[pseudoOtherClass, measureIndex]).mean()

                    Astar = Astar + abs(meanPseudoOneClass - meanPseudoOtherClass)

                AstarList.append(Astar)

            Astar_greater_than_A = sum(i > A for i in AstarList)
            pValue = float(Astar_greater_than_A)/10000
            print(currentMeasure + " " + classLabels[oneClass] + " " + classLabels[otherClass] + 
                  " " + bandName + " " + str(pValue))
            #f.write(currentMeasure + " & " + classLabels[oneClass] + " & " + classLabels[otherClass] + 
            #      " & " + bandName + " & " + str(pValue) + r"\\" + "\n" )


C CS MCI delta 0.147
C CS AD delta 0.1941
C MCI AD delta 0.0027
C CS MCI theta 0.2541
C CS AD theta 0.4323
C MCI AD theta 0.0081
C CS MCI alpha 0.2175
C CS AD alpha 0.9422
C MCI AD alpha 0.0876
C CS MCI beta 0.32
C CS AD beta 0.0287
C MCI AD beta 0.192
C CS MCI gamma 0.4006
C CS AD gamma 0.1254
C MCI AD gamma 0.302
L CS MCI delta 0.2922
L CS AD delta 0.0794
L MCI AD delta 0.0222
L CS MCI theta 0.1522
L CS AD theta 0.6006
L MCI AD theta 0.163
L CS MCI alpha 0.1237
L CS AD alpha 0.0963
L MCI AD alpha 0.6627
L CS MCI beta 0.1532
L CS AD beta 0.0013
L MCI AD beta 0.0674
L CS MCI gamma 0.4131
L CS AD gamma 0.0762
L MCI AD gamma 0.1738
GE CS MCI delta 0.1228
GE CS AD delta 0.1194
GE MCI AD delta 0.0045
GE CS MCI theta 0.1772
GE CS AD theta 0.7637
GE MCI AD theta 0.0644
GE CS MCI alpha 0.2632
GE CS AD alpha 0.73
GE MCI AD alpha 0.2193
GE CS MCI beta 0.85
GE CS AD beta 0.5976
GE MCI AD beta 0.2561
GE CS MCI gamma 0.2032
GE CS AD gamma 0.1104
GE MCI AD gamma 0.014
SW CS MCI delta 0.2604
SW CS AD delta 0.0367
SW MCI AD delta 0.0054
SW CS MCI theta 0.1221
SW CS AD theta 0.7625
SW MCI AD theta 0.0148
SW CS MCI alpha 0.103
SW CS AD alpha 0.1784
SW MCI AD alpha 0.7169
SW CS MCI beta 0.3975
SW CS AD beta 0.2107
SW MCI AD beta 0.7289
SW CS MCI gamma 0.347
SW CS AD gamma 0.191
SW MCI AD gamma 0.0362
Q CS MCI delta 0.7647
Q CS AD delta 0.2373
Q MCI AD delta 0.2566
Q CS MCI theta 0.4433
Q CS AD theta 0.9974
Q MCI AD theta 0.2156
Q CS MCI alpha 0.7613
Q CS AD alpha 0.7781
Q MCI AD alpha 0.6965
Q CS MCI beta 0.6563
Q CS AD beta 0.6552
Q MCI AD beta 0.146
Q CS MCI gamma 0.4493
Q CS AD gamma 0.5859
Q MCI AD gamma 0.8743

In [16]:
print(sorted(graphMeasures.iteritems(), key=operator.itemgetter(1)))


[('C', 1), ('L', 2), ('GE', 3), ('SW', 4), ('Q', 5)]

In [19]:
print(plot_order.keys())


[1, 2, 3, 4, 5]