Here we perform statistical testing using Functional Data Analysis (FDA), as in this paper. This is explained in the MSc report, Section 5.1.
In [1]:
import random
random.seed(20)
In [2]:
from pprint import pprint
In [3]:
import scipy
In [4]:
import pandas as pd
In [5]:
from pandas.tools.plotting import parallel_coordinates
In [6]:
from pandas import concat
In [7]:
import matplotlib
# Set backend to pgf
matplotlib.use('pgf')
import matplotlib.pyplot as plt
import numpy as np
# Some nice default configuration for plots
plt.rcParams['figure.figsize'] = 10, 7.5
plt.rcParams['axes.grid'] = True
plt.rcParams['axes.color_cycle'] = ['g', 'b', 'r']
plt.gray()
In [8]:
%matplotlib inline
In [9]:
from scipy.io import loadmat
In [10]:
import operator
In [11]:
randomSeed = 20;
# TODO: Change accordingly
CONNECTIVITY_MEASURE = 'dWPLI'
DATASETS_FOLDER = '/home/dragos/DTC/MSc/SummerProject/processed_data/features/'
DATASETS_FOLDER = DATASETS_FOLDER + CONNECTIVITY_MEASURE + '/full_graph/datasets/'
nameOfDataFileMat = 'datasetFullGraphMeasures.mat'
nameOfDataFileCSV = 'datasetFullGraphMeasures.csv'
Dataset layout
The order of the features associated with each band is the following:
Alfa Beta Delta Gamma Theta Class
Each band has 5 columns, where each column corresponds to a graph feature, in the following order:
Create dataset mappings used for Functional Data Analysis (FDA).
In [12]:
# store frequencies of interest order as they appear in the dataset table
FoQ_table_order = dict([('delta', 3), ('theta', 5),
('alpha', 1), ('beta', 2),
('gamma', 4)])
# store plot order (in order of frequency values of each bands)
plot_order = dict([ (1, 'delta'), (2, 'theta'), (3, 'alpha'),
(4, 'beta'), (5, 'gamma') ])
# stores class labels
classLabels = dict([ (1, "CS"), (2, "MCI"), (3, "AD") ])
# stores the order in which the measures are specified in the dataset matrix
graphMeasures = dict([('C', 1), ('L', 2), ('GE', 3), ('SW', 4), ('Q', 5)])
# stores the long names of the measures
graphMeasuresLong = dict([(1, 'Clustering Coefficient'), (2, 'Characteristic Path Length'),
(3, 'Global Efficiency'), (4, 'Small-Worldness'), (5, 'Modularity')])
In [13]:
thresholdVec = np.array([[0.05], [0.1], [0.15], [0.2], [0.3]])
In [16]:
sorted(graphMeasures.iteritems(), key=operator.itemgetter(1))
Out[16]:
In [16]:
################################################################################
### FDA Analysis between every two group pair for each measure, in all bands ###
## subject numbers: (26 in CS, 18 in MCI, 36 in AD) ##
classPairs = {(1,2), (1,3), (2,3)}
#with open("pValues","w") as f:
CSidxs = range(0,26); MCIidxs = range(26,44); ADidxs = range(44, 80);
classToNumbering = dict()
classToNumbering[1] = CSidxs
classToNumbering[2] = MCIidxs
classToNumbering[3] = ADidxs
# for each graph measure
for currentMeasure, measureOrder in sorted(graphMeasures.iteritems(), key=operator.itemgetter(1)):
# for each frequency band of interest
for bandOrderIdx in plot_order.keys():
bandName = plot_order[bandOrderIdx]
for pair in classPairs:
oneClass = pair[0]
otherClass = pair[1]
classIdxs = classToNumbering[oneClass] + classToNumbering[otherClass]
### compute empirical A (between the plotted curves)
A = 0
for threshold in np.nditer(thresholdVec):
A = A + abs( measureToBand[currentMeasure][bandOrderIdx][oneClass][threshold[()]] -
measureToBand[currentMeasure][bandOrderIdx][otherClass][threshold[()]] )
### permute and compute Astar for pseudo groups
AstarList = []
for l in range(0,10000):
random.shuffle(classIdxs)
pseudoOneClass = classIdxs[:len(classToNumbering[oneClass])] # assign first X IDs in the first pseudo group
pseudoOtherClass = classIdxs[len(classToNumbering[oneClass]):] # assign the rest of the IDs to the second pseudo group
Astar = 0
for threshold in np.nditer(thresholdVec):
data_file_path = DATASETS_FOLDER + str(threshold) + '/' + nameOfDataFileMat
# load dataset
data_dict = loadmat(data_file_path)
data = data_dict['dataset']
theThreshold = data_dict['threshold']
n_samples = data.shape[0]
features = data[:, :-1]
targets = data[:, -1]
measureIndex = len(graphMeasures)*(FoQ_table_order[bandName]-1) + (graphMeasures[currentMeasure]-1)
# compute average and mean of all subjects in the pseudoClass
meanPseudoOneClass = (features[pseudoOneClass, measureIndex]).mean()
meanPseudoOtherClass = (features[pseudoOtherClass, measureIndex]).mean()
Astar = Astar + abs(meanPseudoOneClass - meanPseudoOtherClass)
AstarList.append(Astar)
Astar_greater_than_A = sum(i > A for i in AstarList)
pValue = float(Astar_greater_than_A)/10000
print(currentMeasure + " " + classLabels[oneClass] + " " + classLabels[otherClass] +
" " + bandName + " " + str(pValue))
#f.write(currentMeasure + " & " + classLabels[oneClass] + " & " + classLabels[otherClass] +
# " & " + bandName + " & " + str(pValue) + r"\\" + "\n" )
In [16]:
print(sorted(graphMeasures.iteritems(), key=operator.itemgetter(1)))
In [19]:
print(plot_order.keys())