notebook.community

Edit and run



In [1]:

    
# Import Necessary Libraries
import numpy as np
import scipy.io

import matplotlib
from matplotlib import *
from matplotlib import pyplot as plt
import itertools
from mpl_toolkits.axes_grid1 import make_axes_locatable


from sklearn.decomposition import PCA
import scipy.stats as stats
from scipy.spatial import distance as Distance

# pretty charting
import seaborn as sns
sns.set_palette('muted')
sns.set_style('darkgrid')

%matplotlib inline



In [3]:

    
######## Get list of files (.mat) we want to work with ########
filedir = '../condensed_data/blocks/'
sessions = os.listdir(filedir)
sessions = sessions[2:]
print "Blocks are: \n", os.listdir(filedir+sessions[0])

def find_same(wordpair, groups):
    # split wordpair and reverse
    wordsplit = wordpair.split('_')    
    try:
        sameword_index = groups.index(wordpair)
    except:
        sameword_index = -1
    return sameword_index

# functions for finding the different groups of word pairings
def find_reverse(wordpair, groups):
    # split wordpair and reverse
    wordsplit = wordpair.split('_')
    wordsplit.reverse()
    reverseword = '_'.join(wordsplit)
    # find index of reversed word index
    try:
        reverseword_index = groups.index(reverseword)
    except:
        reverseword_index = -1
    return reverseword_index
def find_different(wordpair, groups):
    # split wordpair and reverse
    wordsplit = wordpair.split('_')    
    differentword_index = []
    
    for idx, group in enumerate(groups):
        groupsplit = group.split('_')
        if not any(x in groupsplit for x in wordsplit):
            differentword_index.append(idx)
    
    # convert to single number if a list
    if len(differentword_index) == 1:
        differentword_index = differentword_index[0]
    return differentword_index

def find_probe(wordpair, groups):
    # split wordpair and reverse
    wordsplit = wordpair.split('_')    
    probeword_index = []
    # loop through group of words to check word pair in
    for idx, group in enumerate(groups):
        groupsplit = group.split('_')
        # check if probe word overlaps
        if wordsplit[0] == groupsplit[0] and wordsplit[1] != groupsplit[1]:
            probeword_index.append(idx)
    # convert to single number if a list
    if len(probeword_index) != 1 and probeword_index:
        print probeword_index
        print "problem in find probe"
    elif not probeword_index: # if no probe words overlap
        probeword_index = -1
    else:
        probeword_index = probeword_index[0]
    return probeword_index

def find_target(wordpair, groups):
    # split wordpair and reverse
    wordsplit = wordpair.split('_')    
    targetword_index = []
    # loop through group of words to check word pair in
    for idx, group in enumerate(groups):
        groupsplit = group.split('_')
        # check if target word overlaps
        if wordsplit[1] == groupsplit[1] and wordsplit[0] != groupsplit[0]:
            targetword_index.append(idx)
    # convert to single number if a list
    if len(targetword_index) != 1 and targetword_index:
        print targetword_index
        print "problem in find target"
    elif not targetword_index: # if no target words overlap
        targetword_index = -1
    else:
        targetword_index = targetword_index[0]
    return targetword_index

# check if group is in the list of names
def inGroup(group, names):
    for i in range(0, len(group)):
        if cmpT(group[i],names):
            return True
    return False

def cmpT(t1, t2): 
    return sorted(t1) == sorted(t2)









    



Blocks are: 
['BLOCK_0', 'BLOCK_1', 'BLOCK_2', 'BLOCK_3', 'BLOCK_4', 'BLOCK_5']



In [2]:

    
######## Get list of files (.mat) we want to work with ########
filedir = '../condensed_data/blocks/'
sessions = os.listdir(filedir)
sessions = sessions[2:]

session_pval_dict = {}
# loop through each session
sessiondir = filedir + sessions[0]  
# get all blocks for this session
blocks = os.listdir(sessiondir)
block = blocks[0]
block_dir = sessiondir + '/' + block

# in each block, get list of word pairs from first and second block
wordpairs = os.listdir(block_dir)
channels = os.listdir(block_dir+'/'+wordpairs[0])

chan_order = []
for jdx, chan in sorted(enumerate(channels)):
    chan_order.append(chan)
# print chan_order
chan_order = np.array(chan_order)
print len(chan_order)

def binarize_pval_mat(pval_mat):
    pval_mat[pval_mat > 0.05] = 0.5
    pval_mat[pval_mat <= 0.05] = 1
    pval_mat[pval_mat == 0.5] = 0
    return pval_mat



In [4]:

    
### Functions to help extract features and plot histogram of distances
# loops through each wordpairing group and extract features
def extractFeatures(wordgroup, session, blockone, blocktwo):
    fig = plt.figure(figsize=(7.5*len(wordgroup), 3))
    
    for idx, pair in enumerate(wordgroup):
        # load in data
        first_wordpair_dir = firstblock_dir + '/' + pair[0]
        second_wordpair_dir = secondblock_dir + '/' + pair[1]

        # initialize np arrays for holding feature vectors for each event
        first_pair_features = []
        second_pair_features = []

        # load in channels
        first_channels = os.listdir(first_wordpair_dir)
        second_channels = os.listdir(second_wordpair_dir)
        # loop through channels
        for jdx, chans in enumerate(first_channels):
            first_chan_file = first_wordpair_dir + '/' + chans
            second_chan_file = second_wordpair_dir + '/' + chans

            # load in data
            data_first = scipy.io.loadmat(first_chan_file)
            data_first = data_first['data']
            data_second = scipy.io.loadmat(second_chan_file)
            data_second = data_second['data']

            ## 06: get the time point for probeword on
            first_timeZero = data_first['timeZero'][0][0][0]
            second_timeZero = data_second['timeZero'][0][0][0]

            ## 07: get the time point of vocalization
            first_vocalization = data_first['vocalization'][0][0][0]
            second_vocalization = data_second['vocalization'][0][0][0]

            ## Power Matrix
            first_matrix = data_first['powerMatZ'][0][0]
            second_matrix = data_second['powerMatZ'][0][0]
            first_matrix = first_matrix[:, freq_bands,:]
            second_matrix = second_matrix[:, freq_bands,:]

            ### 02: get only the time point before vocalization
            first_mean = []
            second_mean = []
            for i in range(0, len(first_vocalization)):
                # either go from timezero -> vocalization, or some other timewindow
                first_mean.append(np.mean(first_matrix[i,:,first_timeZero:first_vocalization[i]], axis=1))
#                     first_mean.append(np.ndarray.flatten(first_matrix[i,:,first_vocalization[i]-num_time_windows:first_vocalization[i]]))
            for i in range(0, len(second_vocalization)):
                second_mean.append(np.mean(second_matrix[i,:,second_timeZero:second_vocalization[i]], axis=1))
#                     second_mean.append(np.ndarray.flatten(second_matrix[i,:,second_vocalization[i]-num_time_windows:second_vocalization[i]]))

            # create feature vector for each event
            if jdx == 0:
                first_pair_features.append(first_mean)
                second_pair_features.append(second_mean)
                first_pair_features = np.squeeze(np.array(first_pair_features))
                second_pair_features = np.squeeze(np.array(second_pair_features))
            else:
                first_pair_features = np.concatenate((first_pair_features, first_mean), axis=1)
                second_pair_features = np.concatenate((second_pair_features, second_mean), axis=1)
        # end of loop through channels
        
        # compute paired distances between each feature matrix
        first_hist, second_hist = computePairDistances(first_pair_features, second_pair_features)
        
        ## Plotting Paired Distances
        plt.subplot(1, len(wordgroup), idx+1)
#         plt.subplot(1, 5, )
        plt.hist(first_hist, label=pair[0],  lw=3, alpha = 0.75)
        plt.hist(second_hist, label=pair[1],  lw=3, alpha = 0.75)
        plt.ylim([0.0, 6.0])
        plt.xlim([0.0, 1.6])
        plt.legend()
        plt.title(session + ' comparing ' + pair[0] + '(' + str(len(first_pair_features)) +') vs '+ pair[1] + '(' + str(len(second_pair_features)) +')')

# Compute all pairwise distances between first_mat to second_mat
def computePairDistances(first_mat, second_mat):
    distance_list = []
    for idx in range(0, first_mat.shape[0]):
        distance_list.append([distances(x, first_mat[idx,:]) for x in second_mat])
    distance_list = np.ndarray.flatten(np.array(distance_list))
    return distance_list



In [5]:

    
################################### HYPER-PARAMETERS TO TUNE #######################################################
np.random.seed(123456789)  # for reproducibility, set random seed

anova_threshold = 90   # how many channels we want to keep
distances = Distance.cosine # define distance metric to use
num_time_windows = 5
low_freq_bands = [0, 1, 2]
high_freq_bands = [3, 4, 5, 6]
freq_bands = np.arange(0,7,1)

freq_labels = ['delta', 'theta', 'alpha', 'beta', 'low gamma', 'high gamma', 'HFO']

print 'low bands: ', [freq_labels[i] for i in low_freq_bands]
print 'high bands: ', [freq_labels[i] for i in high_freq_bands]
print "The length of the feature vector for each channel will be: ", \
            num_time_windows*len(freq_bands), \
            ' total=', 96*num_time_windows*len(freq_bands)









    



low bands:  ['delta', 'theta', 'alpha']
high bands:  ['beta', 'low gamma', 'high gamma', 'HFO']
The length of the feature vector for each channel will be:  35  total= 3360

Run Analysis Here



In [ ]:

    
######## Get list of files (.mat) we want to work with ########
filedir = '../condensed_data/blocks/'
sessions = os.listdir(filedir)
sessions = sessions[2:]

# loop through each session
for session in sessions:
    print "Analyzing session ", session
    sessiondir = filedir + session
    
    # get all blocks for this session
    blocks = os.listdir(sessiondir)
    
    if len(blocks) != 6: # error check on the directories
        print blocks
        print("Error in the # of blocks. There should be 5.")
        break
    
    # loop through each block one at a time, analyze
    for i in range(0, 5):
        print "Analyzing block ", blocks[i], ' and ', blocks[i+1]
        firstblock = blocks[i]
        secondblock = blocks[i+1]
        
        firstblock_dir = sessiondir+'/'+firstblock
        secondblock_dir = sessiondir+'/'+secondblock
        # in each block, get list of word pairs from first and second block
        first_wordpairs = os.listdir(sessiondir+'/'+firstblock)
        second_wordpairs = os.listdir(sessiondir+'/'+secondblock)
        
        diff_word_group = []
        reverse_word_group = []
        probe_word_group = []
        target_word_group = []
        same_word_group = []
        
        print first_wordpairs
        print second_wordpairs
        
        #### plot meta information about which session and blocks we're analyzing
        fig=plt.figure()
        axes = plt.gca()
        ymin, ymax = axes.get_ylim()
        xmin, xmax = axes.get_xlim()
        plt.text((xmax-xmin)/4.5, (ymax-ymin)/2, r'Session %s %scomparing %s vs. %s'%(session, '\n',firstblock, secondblock), fontsize=20)
        plt.title(session + ' comparing blocks: ' + firstblock + ' vs. ' + secondblock)
        plt.grid(False)
        
        # go through first block and assign pairs to different groups
        for idx, pair in enumerate(first_wordpairs):
#             print "Analyzing ", pair
            # obtain indices of: sameword, reverseword, differentwords, probeoverlap, targetoverlap
            same_word_index = find_same(pair, second_wordpairs)
            reverse_word_index = find_reverse(pair, second_wordpairs)
            diff_word_index = find_different(pair, second_wordpairs)
            probe_word_index = find_probe(pair, second_wordpairs)
            target_word_index = find_target(pair, second_wordpairs)
            
            # append to list groupings holding pairs of these word groupings
            if same_word_index != -1 and not inGroup(same_word_group, [pair, second_wordpairs[same_word_index]]):
                same_word_group.append([pair, second_wordpairs[same_word_index]])
            if reverse_word_index != -1 and not inGroup(reverse_word_group, [pair, second_wordpairs[reverse_word_index]]): 
                reverse_word_group.append([pair, second_wordpairs[reverse_word_index]])
            if diff_word_index != -1:
                if isinstance(diff_word_index, list): # if list, break it down and one pairing at a time
                    for diffI in diff_word_index:     # loop through each different word index
                        if not inGroup(diff_word_group, [pair, second_wordpairs[diffI]]):
                            diff_word_group.append([pair, second_wordpairs[diffI]])
                else:
                    diff_word_group.append([pair, second_wordpairs[diff_word_index]])
            if probe_word_index != -1 and not inGroup(probe_word_group, [pair, second_wordpairs[probe_word_index]]): 
                probe_word_group.append([pair, second_wordpairs[probe_word_index]])
            if target_word_index != -1 and not inGroup(target_word_group, [pair, second_wordpairs[target_word_index]]):
                target_word_group.append([pair, second_wordpairs[target_word_index]])
        # end of loop through word pairs
    # end of loop through block
        print same_word_group
        print reverse_word_group
        print probe_word_group
        print target_word_group
        print diff_word_group[0:2]
        
        #### Go through each group and extract the feature data per event
        ## 01: Same Word Group
        fig = plt.figure(figsize=(15, 3))
        extractFeatures(same_word_group, session, firstblock, secondblock)
        extractFeatures(reverse_word_group, session, firstblock, secondblock)
        extractFeatures(probe_word_group, session, firstblock, secondblock)
        extractFeatures(target_word_group, session, firstblock, secondblock)
        extractFeatures(diff_word_group[0:2], session, firstblock, secondblock)
        
    break