Connectivity matrices

Extraction and plotting

Here I read the atlas partitioned datasets and calculate the connectivity matrix for each subject


In [15]:
%matplotlib inline

import h5py
import os.path           as     op
import numpy             as     np
import matplotlib.pyplot as     plt
import seaborn           as     sns
from   functools         import partial
from   natsort           import natsorted
from   collections       import OrderedDict

from   boyle.storage            import (get_dataset_names, get_group_names, get_datasets, 
                                        save_variables_to_hdf5, extract_datasets)
from   luigi.similarity_measure import SimilarityMeasureFactory
from   luigi.selection          import TimeSeriesSelectorFactory
from   luigi.connectivity       import build_timeseries, transform_timeseries, calculate_connectivity

from   fabfile                  import get_subject_labels, get_filtered_subjects_ids_and_labels

In [16]:
# SETUP LOGGING AND AUTO TRACER
# THIS CELL MUST BE SECOND IN POSITION
import logging
from   autologging import TRACE, traced

logger    = logging.getLogger()
shandler  = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
shandler.setFormatter(formatter)
logger.addHandler(shandler)
logger.setLevel(TRACE)

In [17]:
work_dir              = '/Users/alexandre/Projects/bcc/cobre/'
timeseries_h5path     = op.join(work_dir, 'cobre_partitioned_timeseries.hdf5')

TR                    = 2
build_timeseries      = partial(build_timeseries, sampling_interval=TR, pre_filter=None, normalize=None)

aalts_groupname       = '/pipe_wtemp_noglob_aal_3mm_func_timeseries'
aalconns_groupname    = '/pipe_wtemp_noglob_aal_3mm_connectivities'
subj_groups           = get_group_names(timeseries_h5path, aalts_groupname)

subj_ids, subj_labels = get_filtered_subjects_ids_and_labels()


fabfile.py read_subject_ids_file: DEBUG Reading list of subject ids from file /Users/alexandre/Projects/bcc/cobre/subject_list.
2015-05-26 14:02:05,154 - fabfile - DEBUG - Reading list of subject ids from file /Users/alexandre/Projects/bcc/cobre/subject_list.
fabfile.py get_filtered_subjects_ids_and_labels: DEBUG Filtering list of files using subjects ids from subject ids file.
2015-05-26 14:02:05,158 - fabfile - DEBUG - Filtering list of files using subjects ids from subject ids file.

In [18]:
@traced
def get_subject_timeseries(h5file_path, subj_path, sampling_interval=TR):
    """Return the timeseries of one subject in a HDF5 file.
    
    Parameters
    ----------
    h5file_path: str
        Path to the hdf5 file with the subject timeseries.
    
    subj_path: str
        HDF5 internal path to the subject.

    sampling_interval: int or float
        Timeseries sampling interval in seconds.

    Returns
    -------
    timeseries: OrderedDict of nitime.timeseries.TimeSeries
        A dictionary with all the partition timeseries of the subject.
    """
    #log.debug({}get_subject_timeseries.func_code.co_varnames)
    timeseries = OrderedDict()
    with h5py.File(h5file_path, mode='r') as timeseries_file:
        dspaths    = natsorted(get_dataset_names(timeseries_file, subj_path))
        for dspath in dspaths:
            timeseries[dspath.split('/')[-1]] = build_timeseries(timeseries_file[dspath][:], 
                                                                 sampling_interval=sampling_interval)

    return timeseries


@traced
def get_connectivity_matrix(timeseries, selection_method, similarity_measure):
    """
    Parameters
    ----------
    timeseries: dict or list of (nitime.timeseries.TimeSeries or numpy.ndarray)
        The N sets of timeseries of one subject.
        
    selection_method: str
        The name of the timeseries set transformation method.
        See `luigi.selection.TimeSeriesSelectorFactory.create_method` more information and the possible choices.
    
    similarity_method: str
        The name of the timeseries set transformation method.
        See `luigi.similarity_measure.SimilarityMeasureFactory.create_method` for more information and
        the possible choices.
    
    Returns
    -------
    connectivity: numpy.ndarray
        Matrix of shape (N, N)
    """
    selection  = TimeSeriesSelectorFactory.create_method(selection_method)
    similarity = SimilarityMeasureFactory. create_method(similarity_measure)

    # transform_timeseries(timeseries, selection_method, **kwargs)
    transformed_timeseries = transform_timeseries  (timeseries, selection)

    # calculate_connectivity(timeseries_set, measure, sampling_interval, lb=0, ub=None, **kwargs):
    return calculate_connectivity(transformed_timeseries, similarity, sampling_interval=TR)


@traced
def create_group_connectivites(timeseries_h5path, subj_groups, selection_method, similarity_measure):
    """
    Parameters
    ----------
    timeseries_h5path: str

    subj_groups: list of str

    selection_method: str
        The name of the timeseries set transformation method.
        See `luigi.selection.TimeSeriesSelectorFactory.create_method` more information and the possible choices.
    
    similarity_method: str
        The name of the timeseries set transformation method.
        See `luigi.similarity_measure.SimilarityMeasureFactory.create_method` for more information and
        the possible choices.
    
    Returns
    -------
    connectivities: dict
        Dictionary with subj_id -> connectivity_matrix
    """
    connectivities = OrderedDict()

    for subj_path in subj_groups:
        timeseries   = get_subject_timeseries  (timeseries_h5path, subj_path)
        connectivity = get_connectivity_matrix (timeseries, selection_method, similarity_measure)
        connectivities[subj_path.split('/')[-1]] = connectivity

    return connectivities

In [19]:
def plot_connectivity_matrix(x, title=None, show_ticklabels=False):
    sns.set(context="paper", font="monospace")
    #sns.set(style="darkgrid")
    #sns.set(rc={"figure.figsize": (6, 6)})
    fig, ax = plt.subplots()
    if title:
        plt.title(title)

    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    #sns.corrplot(x, annot=False, sig_stars=False, diag_names=False, cmap=cmap, ax=ax)
    sns.heatmap(x, linewidths=0, square=True, cmap=cmap)

    #plt.imshow(x, cmap='jet', interpolation='nearest')
    plt.setp(ax.get_yticklabels(), visible=show_ticklabels)
    plt.setp(ax.get_xticklabels(), visible=show_ticklabels)
    #if not show_ticklabels:
        #ax.set_xticklabels([])
        #ax.set_yticklabels([])

    fig.tight_layout()

#plot_connectivity_matrix(connectivity, 'test')

In [20]:
@traced
def get_connectivity_matrices(timeseries_h5path, connmats_grouppath, selection_method, similarity_measure, 
                              save_in_hdf=True):
    """
    Parameters
    ----------
    timeseries_h5path: str
        Path to the timeseries hdf5 file
    
    connmats_grouppath: str
        HDF5 group path to the group where the connectivity matrices for 
        the selection_method and similarity_matrices are stored
    
    selection_method: str
        Choices:
        methods = OrderedDict([ ('mean',          MeanTimeSeries),
                                ('eigen',         EigenTimeSeries),
                                ('ilsia',         ILSIATimeSeries),
                                ('cca',           CCATimeSeries),
                                ('filtered',      FilteredTimeSeries),
                                ('filtered_mean', MeanFilteredTimeSeries),
                                ('filered_eigen', EigenFilteredTimeSeries)])
    similarity_measure: str
        Choices:
        methods = OrderedDict([ ('correlation',          CorrelationMeasure),
                                ('coherence',            NiCoherenceMeasure),
                                ('grangercausality',     NiGrangerCausalityMeasure),
                                ('nicorrelation',        NiCorrelationMeasure),
                                ('seedcorrelation',      SeedCorrelationMeasure),
                                ('seedcoherence',        SeedCoherenceMeasure),
                                ('mean_coherence',       MeanCoherenceMeasure),
                                ('mean_seedcoherence',   MeanSeedCoherenceMeasure),
                                ('mean_seedcorrelation', MeanSeedCorrelationMeasure)])

    Returns
    -------
    connectivities: 

    """
    #from IPython.core.debugger import Tracer
    #Tracer()()
    
    file_groups = get_group_names(timeseries_h5path, aalconns_groupname)
   
    if connmats_grouppath in file_groups:
        
        connectivities = extract_datasets(timeseries_h5path, connmats_grouppath)
    else:
        try:
            print('Calculating connectivity matrices using {} and {}.'.format(selection_method, similarity_measure))
            connectivities = create_group_connectivites(timeseries_h5path, subj_groups, 
                                                        selection_method, similarity_measure)
        except:
            print('Error calculating connectivity matrices using {} and {}.'.format(selection_method, 
                                                                                    similarity_measure))
            raise
        else:
            # save the connectivity matrices into the hdf file
            save_variables_to_hdf5(timeseries_h5path, 
                                   {'{}-{}'.format(selection_method, similarity_measure): connectivities},
                                   mode='a', 
                                   h5path=connmats_grouppath)

    return connectivities

from functools import partial from boyle.parallel import parallel_function

selection_method = 'eigen' similarity_measure = 'mean_coherence'

def get_connectivity(timeseries_h5path, subj_path, selection_method, similarity_measure): timeseries = get_subject_timeseries(timeseries_h5path, subj_path) return get_connectivity_matrix(timeseries, selection_method, similarity_measure)

get_my_connectivities = partial(get_connectivity, timeseries_file=timeseries_h5path, selection_method=selection_method, similarity_measure=similarity_measure)

get_my_connectivities.parallel = parallel_function(get_my_connectivities, n_cpus=3)

start = time() conns = get_my_connectivities.parallel(subj_groups)

connectivities = OrderedDict() for subj_path, conn in zip(subj_groups, conns): connectivities[subj_path.split('/')[-1]] = conn

parallel_time = time() - start print('parallel_time: {}'.format(parallel_time))


In [21]:
#for conn in connectivities:
#    plot_connectivity_matrix(connectivities[conn], conn)

In [22]:
#from nitime.viz import drawmatrix_channels

#for conn in connectivities:
#    drawmatrix_channels(connectivities[conn], [], size=[10., 10.], color_anchor=0))

Classification


In [23]:
def extract_lower_triangular_matrix(x, k=0):
    """Return the lower triangular values of x without the main diagonal.
    
    Parameters
    ----------
    x: numpy.ndarray
        2D square matrix

    k : int, optional
        Diagonal above which to zero elements. 
        k = 0 (the default) is the main diagonal, 
        k < 0 is below it and 
        k > 0 is above.

    Returns
    -------
    features: numpy.ndarray
        vector
    """
    return x[np.tril_indices_from(x, k=k)]


def number_of_triangular_elements(x, k=0):
    """Return the number of elements that the lower triangular matrix of x has.
    
    Parameters
    ----------
    x: numpy.ndarray
        2D square matrix

    k : int, optional
        Diagonal above which to zero elements. 
        k = 0 (the default) is the main diagonal, 
        k < 0 is below it and 
        k > 0 is above.

    Returns
    -------
    n_elems: int
        number of elements in the triangular matrix
    """
    if not isinstance(x, np.ndarray):
        raise TypeError('Expected a numpy.ndarray, got a {}.'.format(type(x)))

    if x.ndim != 2:
        raise TypeError('Expected a 2D matrix, got a matrix with {} dimensions.'.format(x.ndim))

    if x.shape[0] != x.shape[1]:
        raise TypeError('Expected a square matrix, got a matrix with shape {}'.format(x.shape))

    if k == 0:
        rows    = x.shape[1]
        n_elems = 0.5 * ((rows + 1) * rows)
    else:
        ones    = np.ones_like(x)
        n_elems = np.sum(np.tril(ones, k=k)) 
    
    return int(n_elems)

In [24]:
@traced
def create_cobre_connmat_featuresets(timeseries_h5path, connmats_grouppath, selection_method, similarity_measure):
    # Diagonal above which to zero elements.
    k = -1

    # get the data
    connectivities = get_connectivity_matrices(timeseries_h5path, connmats_grouppath, 
                                               selection_method, similarity_measure)

    # create the samples matrix
    sample     = next (iter (connectivities.values()))
    n_subjects = len(connectivities)
    n_features = number_of_triangular_elements(sample, k=k)

    feature_matrix = np.zeros((n_subjects, n_features), dtype=sample.dtype)

    # fill the feature matrix
    for idx, conn in enumerate(connectivities):
        feature_matrix[idx, :] = extract_lower_triangular_matrix(connectivities[conn], k=k)

    return feature_matrix

Parameter grid


In [25]:
from sklearn.grid_search import ParameterGrid

def build_param_grid(*variable_names):
    adict = {var: eval(var) for var in variable_names}
    return ParameterGrid(adict)

In [26]:
# set up the parameter values grid
selection_methods   = ['eigen', 'mean'] #'ilsia', 'cca']
similarity_measures = ['correlation', 'coherence', 'grangercausality']
classifiers_names   = ['RandomForestClassifier', 'RBFSVC', 'LinearSVC', 'GMM']
cvmethod            = 'loo'

# similarity measures value ranges
sm_value_ranges = {'correlation':      (-1, 1  ),
                   'coherence':        (-1, 1  ),
                   'grangercausality': ( 0, 0.5),}

In [27]:
param_grid = build_param_grid('selection_methods', 'similarity_measures', 'classifiers_names')

Experiment


In [ ]:
def classification_experiments():
    from darwin.pipeline import ClassificationPipeline

    logger.setLevel(logging.INFO)
    
    results = {}
    metrics = {}

    for params in param_grid:

        selection_method   = params['selection_methods']
        similarity_measure = params['similarity_measures']
        classifier_name    = params['classifiers_names']

        connmats_grouppath = '{}/{}_{}'.format(aalconns_groupname, selection_method, similarity_measure)

        # get features
        print('Getting functional matrices data for {}.'.format(str(params)))
        features = create_cobre_connmat_featuresets(timeseries_h5path, connmats_grouppath, 
                                                    selection_method, similarity_measure)

        # -- test with darwin
        pipe = ClassificationPipeline(clfmethod=classifier_name, cvmethod=cvmethod)

        print('Classifying with settings: {}.'.format(str(params)))
        results[str(params)], metrics[str(params)] = pipe.cross_validation(features, np.array(subj_labels))

    return results, metrics


import shelve
results, metrics = classification_experiments()
d = shelve.open(op.join(work_dir, 'classification_results_metrics.shelve'))
d['results'] = results
d['metrics'] = metrics
d.close()


Getting functional matrices data for {'similarity_measures': 'correlation', 'classifiers_names': 'RandomForestClassifier', 'selection_methods': 'eigen'}.
Classifying with settings: {'similarity_measures': 'correlation', 'classifiers_names': 'RandomForestClassifier', 'selection_methods': 'eigen'}.

Plot connectivity matrices


In [ ]:
from nitime.viz import make_axes_locatable

def drawmatrix_channels(in_m, channel_names=None, fig=None, x_tick_rot=0,
                        size=None, cmap=plt.cm.RdBu_r, colorbar=True,
                        color_anchor=None, title=None):
    """Creates a lower-triangle of the matrix of an nxn set of values. This is
    the typical format to show a symmetrical bivariate quantity (such as
    correlation or coherence between two different ROIs).

    Parameters
    ----------

    in_m: nxn array with values of relationships between two sets of rois or
    channels

    channel_names (optional): list of strings with the labels to be applied to
    the channels in the input. Defaults to '0','1','2', etc.

    fig (optional): a matplotlib figure

    cmap (optional): a matplotlib colormap to be used for displaying the values
    of the connections on the graph

    title (optional): string to title the figure (can be like '$\alpha$')

    color_anchor (optional): determine the mapping from values to colormap
        if None, min and max of colormap correspond to min and max of in_m
        if 0, min and max of colormap correspond to max of abs(in_m)
        if (a,b), min and max of colormap correspond to (a,b)

    Returns
    -------

    fig: a figure object

    """
    N = in_m.shape[0]
    ind = np.arange(N)  # the evenly spaced plot indices

    def channel_formatter(x, pos=None):
        thisind = np.clip(int(x), 0, N - 1)
        return channel_names[thisind]

    if fig is None:
        fig = plt.figure()

    if size is not None:

        fig.set_figwidth(size[0])
        fig.set_figheight(size[1])

    w = fig.get_figwidth()
    h = fig.get_figheight()

    ax_im = fig.add_subplot(1, 1, 1)

    #If you want to draw the colorbar:
    if colorbar:
        divider = make_axes_locatable(ax_im)
        ax_cb = divider.new_vertical(size="10%", pad=0.1, pack_start=True)
        fig.add_axes(ax_cb)

    #Make a copy of the input, so that you don't make changes to the original
    #data provided
    m = in_m.copy()

    #Null the upper triangle, so that you don't get the redundant and the
    #diagonal values:
    idx_null = np.triu_indices(m.shape[0])
    m[idx_null] = np.nan

    #Extract the minimum and maximum values for scaling of the
    #colormap/colorbar:
    max_val = np.nanmax(m)
    min_val = np.nanmin(m)

    if color_anchor is None:
        color_min = min_val
        color_max = max_val
    elif color_anchor == 0:
        bound = max(abs(max_val), abs(min_val))
        color_min = -bound
        color_max = bound
    else:
        color_min = color_anchor[0]
        color_max = color_anchor[1]

    #The call to imshow produces the matrix plot:
    im = ax_im.imshow(m, origin='upper', interpolation='nearest',
       vmin=color_min, vmax=color_max, cmap=cmap)

    #Formatting:
    ax = ax_im
    ax.grid(True)
    #Label each of the cells with the row and the column:
    if channel_names is not None:
        for i in range(0, m.shape[0]):
            if i < (m.shape[0] - 1):
                ax.text(i - 0.3, i, channel_names[i], rotation=x_tick_rot)
            if i > 0:
                ax.text(-1, i + 0.3, channel_names[i],
                        horizontalalignment='right')

        ax.set_axis_off()
        ax.set_xticks(np.arange(N))
        ax.xaxis.set_major_formatter(ticker.FuncFormatter(channel_formatter))
        fig.autofmt_xdate(rotation=x_tick_rot)
        ax.set_yticks(np.arange(N))
        ax.set_yticklabels(channel_names)
        ax.set_ybound([-0.5, N - 0.5])
        ax.set_xbound([-0.5, N - 1.5])

    #Make the tick-marks invisible:
    for line in ax.xaxis.get_ticklines():
        line.set_markeredgewidth(0)

    for line in ax.yaxis.get_ticklines():
        line.set_markeredgewidth(0)

    ax.set_axis_off()

    if title is not None:
        ax.set_title(title)

    #The following produces the colorbar and sets the ticks
    if colorbar:
        #Set the ticks - if 0 is in the interval of values, set that, as well
        #as the maximal and minimal values:
        if min_val < 0:
            ticks = [color_min, min_val, 0, max_val, color_max]
        #Otherwise - only set the minimal and maximal value:
        else:
            ticks = [color_min, min_val, max_val, color_max]

        #This makes the colorbar:
        cb = fig.colorbar(im, cax=ax_cb, orientation='horizontal',
                          cmap=cmap,
                          norm=im.norm,
                          boundaries=np.linspace(color_min, color_max, 256),
                          ticks=ticks,
                          format='%.2f')

    # Set the current figure active axis to be the top-one, which is the one
    # most likely to be operated on by users later on
    fig.sca(ax)

    return fig

In [ ]:
def plot_all_connectivity_matrices():
    from darwin.pipeline import ClassificationPipeline

    logger.setLevel(logging.INFO)

    results = {}
    metrics = {}

    param_grid = build_param_grid('selection_methods', 'similarity_measures')

    n_combinations = len(selection_methods) * len(similarity_measures)

    plot_count = 1
    for paramidx, params in enumerate(param_grid):

        selection_method   = params['selection_methods']
        similarity_measure = params['similarity_measures']
        value_range        = sm_value_ranges[similarity_measure]

        connmats_grouppath = '{}/{}_{}'.format(aalconns_groupname, selection_method, similarity_measure)

        print('Getting functional matrices data for {}.'.format(str(params)))
        connectivities = get_connectivity_matrices(timeseries_h5path, connmats_grouppath, 
                                                   selection_method, similarity_measure)

        n_subjects = len(connectivities)
        for connidx, conn in enumerate(connectivities):
            #plot_idx = plot_count  paramidx n_subjects
            ax = plt.subplot(n_subjects, n_combinations, plot_count)
            drawmatrix_channels(connectivities[conn], title=conn)#, color_anchor=value_range)
            plot_count += 1
            plt.show()
            #print(plot_idx)


#plot_all_connectivity_matrices()

In [ ]:
%debug

In [ ]:
!h5ls /Users/alexandre/Projects/bcc/cobre/cobre_partitioned_timeseries.hdf5/pipe_wtemp_noglob_aal_3mm_connectivities

In [ ]: