In [1]:
# Import Necessary Libraries
import numpy as np
import scipy.io
import matplotlib
from matplotlib import *
from matplotlib import pyplot as plt
import itertools
from mpl_toolkits.axes_grid1 import make_axes_locatable
from sklearn.decomposition import PCA
import scipy.stats as stats
from scipy.spatial import distance as Distance
# pretty charting
import seaborn as sns
sns.set_palette('muted')
sns.set_style('darkgrid')
%matplotlib inline
In [3]:
######## Get list of files (.mat) we want to work with ########
filedir = '../condensed_data/blocks/'
sessions = os.listdir(filedir)
sessions = sessions[2:]
print "Blocks are: \n", os.listdir(filedir+sessions[0])
def find_same(wordpair, groups):
# split wordpair and reverse
wordsplit = wordpair.split('_')
try:
sameword_index = groups.index(wordpair)
except:
sameword_index = -1
return sameword_index
# functions for finding the different groups of word pairings
def find_reverse(wordpair, groups):
# split wordpair and reverse
wordsplit = wordpair.split('_')
wordsplit.reverse()
reverseword = '_'.join(wordsplit)
# find index of reversed word index
try:
reverseword_index = groups.index(reverseword)
except:
reverseword_index = -1
return reverseword_index
def find_different(wordpair, groups):
# split wordpair and reverse
wordsplit = wordpair.split('_')
differentword_index = []
for idx, group in enumerate(groups):
groupsplit = group.split('_')
if not any(x in groupsplit for x in wordsplit):
differentword_index.append(idx)
# convert to single number if a list
if len(differentword_index) == 1:
differentword_index = differentword_index[0]
return differentword_index
def find_probe(wordpair, groups):
# split wordpair and reverse
wordsplit = wordpair.split('_')
probeword_index = []
# loop through group of words to check word pair in
for idx, group in enumerate(groups):
groupsplit = group.split('_')
# check if probe word overlaps
if wordsplit[0] == groupsplit[0] and wordsplit[1] != groupsplit[1]:
probeword_index.append(idx)
# convert to single number if a list
if len(probeword_index) != 1 and probeword_index:
print probeword_index
print "problem in find probe"
elif not probeword_index: # if no probe words overlap
probeword_index = -1
else:
probeword_index = probeword_index[0]
return probeword_index
def find_target(wordpair, groups):
# split wordpair and reverse
wordsplit = wordpair.split('_')
targetword_index = []
# loop through group of words to check word pair in
for idx, group in enumerate(groups):
groupsplit = group.split('_')
# check if target word overlaps
if wordsplit[1] == groupsplit[1] and wordsplit[0] != groupsplit[0]:
targetword_index.append(idx)
# convert to single number if a list
if len(targetword_index) != 1 and targetword_index:
print targetword_index
print "problem in find target"
elif not targetword_index: # if no target words overlap
targetword_index = -1
else:
targetword_index = targetword_index[0]
return targetword_index
# check if group is in the list of names
def inGroup(group, names):
for i in range(0, len(group)):
if cmpT(group[i],names):
return True
return False
def cmpT(t1, t2):
return sorted(t1) == sorted(t2)
In [2]:
######## Get list of files (.mat) we want to work with ########
filedir = '../condensed_data/blocks/'
sessions = os.listdir(filedir)
sessions = sessions[2:]
session_pval_dict = {}
# loop through each session
sessiondir = filedir + sessions[0]
# get all blocks for this session
blocks = os.listdir(sessiondir)
block = blocks[0]
block_dir = sessiondir + '/' + block
# in each block, get list of word pairs from first and second block
wordpairs = os.listdir(block_dir)
channels = os.listdir(block_dir+'/'+wordpairs[0])
chan_order = []
for jdx, chan in sorted(enumerate(channels)):
chan_order.append(chan)
# print chan_order
chan_order = np.array(chan_order)
print len(chan_order)
def binarize_pval_mat(pval_mat):
pval_mat[pval_mat > 0.05] = 0.5
pval_mat[pval_mat <= 0.05] = 1
pval_mat[pval_mat == 0.5] = 0
return pval_mat
In [4]:
### Functions to help extract features and plot histogram of distances
# loops through each wordpairing group and extract features
def extractFeatures(wordgroup, session, blockone, blocktwo):
fig = plt.figure(figsize=(7.5*len(wordgroup), 3))
for idx, pair in enumerate(wordgroup):
# load in data
first_wordpair_dir = firstblock_dir + '/' + pair[0]
second_wordpair_dir = secondblock_dir + '/' + pair[1]
# initialize np arrays for holding feature vectors for each event
first_pair_features = []
second_pair_features = []
# load in channels
first_channels = os.listdir(first_wordpair_dir)
second_channels = os.listdir(second_wordpair_dir)
# loop through channels
for jdx, chans in enumerate(first_channels):
first_chan_file = first_wordpair_dir + '/' + chans
second_chan_file = second_wordpair_dir + '/' + chans
# load in data
data_first = scipy.io.loadmat(first_chan_file)
data_first = data_first['data']
data_second = scipy.io.loadmat(second_chan_file)
data_second = data_second['data']
## 06: get the time point for probeword on
first_timeZero = data_first['timeZero'][0][0][0]
second_timeZero = data_second['timeZero'][0][0][0]
## 07: get the time point of vocalization
first_vocalization = data_first['vocalization'][0][0][0]
second_vocalization = data_second['vocalization'][0][0][0]
## Power Matrix
first_matrix = data_first['powerMatZ'][0][0]
second_matrix = data_second['powerMatZ'][0][0]
first_matrix = first_matrix[:, freq_bands,:]
second_matrix = second_matrix[:, freq_bands,:]
### 02: get only the time point before vocalization
first_mean = []
second_mean = []
for i in range(0, len(first_vocalization)):
# either go from timezero -> vocalization, or some other timewindow
first_mean.append(np.mean(first_matrix[i,:,first_timeZero:first_vocalization[i]], axis=1))
# first_mean.append(np.ndarray.flatten(first_matrix[i,:,first_vocalization[i]-num_time_windows:first_vocalization[i]]))
for i in range(0, len(second_vocalization)):
second_mean.append(np.mean(second_matrix[i,:,second_timeZero:second_vocalization[i]], axis=1))
# second_mean.append(np.ndarray.flatten(second_matrix[i,:,second_vocalization[i]-num_time_windows:second_vocalization[i]]))
# create feature vector for each event
if jdx == 0:
first_pair_features.append(first_mean)
second_pair_features.append(second_mean)
first_pair_features = np.squeeze(np.array(first_pair_features))
second_pair_features = np.squeeze(np.array(second_pair_features))
else:
first_pair_features = np.concatenate((first_pair_features, first_mean), axis=1)
second_pair_features = np.concatenate((second_pair_features, second_mean), axis=1)
# end of loop through channels
# compute paired distances between each feature matrix
first_hist, second_hist = computePairDistances(first_pair_features, second_pair_features)
## Plotting Paired Distances
plt.subplot(1, len(wordgroup), idx+1)
# plt.subplot(1, 5, )
plt.hist(first_hist, label=pair[0], lw=3, alpha = 0.75)
plt.hist(second_hist, label=pair[1], lw=3, alpha = 0.75)
plt.ylim([0.0, 6.0])
plt.xlim([0.0, 1.6])
plt.legend()
plt.title(session + ' comparing ' + pair[0] + '(' + str(len(first_pair_features)) +') vs '+ pair[1] + '(' + str(len(second_pair_features)) +')')
# Compute all pairwise distances between first_mat to second_mat
def computePairDistances(first_mat, second_mat):
distance_list = []
for idx in range(0, first_mat.shape[0]):
distance_list.append([distances(x, first_mat[idx,:]) for x in second_mat])
distance_list = np.ndarray.flatten(np.array(distance_list))
return distance_list
In [5]:
################################### HYPER-PARAMETERS TO TUNE #######################################################
np.random.seed(123456789) # for reproducibility, set random seed
anova_threshold = 90 # how many channels we want to keep
distances = Distance.cosine # define distance metric to use
num_time_windows = 5
low_freq_bands = [0, 1, 2]
high_freq_bands = [3, 4, 5, 6]
freq_bands = np.arange(0,7,1)
freq_labels = ['delta', 'theta', 'alpha', 'beta', 'low gamma', 'high gamma', 'HFO']
print 'low bands: ', [freq_labels[i] for i in low_freq_bands]
print 'high bands: ', [freq_labels[i] for i in high_freq_bands]
print "The length of the feature vector for each channel will be: ", \
num_time_windows*len(freq_bands), \
' total=', 96*num_time_windows*len(freq_bands)
In [ ]:
######## Get list of files (.mat) we want to work with ########
filedir = '../condensed_data/blocks/'
sessions = os.listdir(filedir)
sessions = sessions[2:]
# loop through each session
for session in sessions:
print "Analyzing session ", session
sessiondir = filedir + session
# get all blocks for this session
blocks = os.listdir(sessiondir)
if len(blocks) != 6: # error check on the directories
print blocks
print("Error in the # of blocks. There should be 5.")
break
# loop through each block one at a time, analyze
for i in range(0, 5):
print "Analyzing block ", blocks[i], ' and ', blocks[i+1]
firstblock = blocks[i]
secondblock = blocks[i+1]
firstblock_dir = sessiondir+'/'+firstblock
secondblock_dir = sessiondir+'/'+secondblock
# in each block, get list of word pairs from first and second block
first_wordpairs = os.listdir(sessiondir+'/'+firstblock)
second_wordpairs = os.listdir(sessiondir+'/'+secondblock)
diff_word_group = []
reverse_word_group = []
probe_word_group = []
target_word_group = []
same_word_group = []
print first_wordpairs
print second_wordpairs
#### plot meta information about which session and blocks we're analyzing
fig=plt.figure()
axes = plt.gca()
ymin, ymax = axes.get_ylim()
xmin, xmax = axes.get_xlim()
plt.text((xmax-xmin)/4.5, (ymax-ymin)/2, r'Session %s %scomparing %s vs. %s'%(session, '\n',firstblock, secondblock), fontsize=20)
plt.title(session + ' comparing blocks: ' + firstblock + ' vs. ' + secondblock)
plt.grid(False)
# go through first block and assign pairs to different groups
for idx, pair in enumerate(first_wordpairs):
# print "Analyzing ", pair
# obtain indices of: sameword, reverseword, differentwords, probeoverlap, targetoverlap
same_word_index = find_same(pair, second_wordpairs)
reverse_word_index = find_reverse(pair, second_wordpairs)
diff_word_index = find_different(pair, second_wordpairs)
probe_word_index = find_probe(pair, second_wordpairs)
target_word_index = find_target(pair, second_wordpairs)
# append to list groupings holding pairs of these word groupings
if same_word_index != -1 and not inGroup(same_word_group, [pair, second_wordpairs[same_word_index]]):
same_word_group.append([pair, second_wordpairs[same_word_index]])
if reverse_word_index != -1 and not inGroup(reverse_word_group, [pair, second_wordpairs[reverse_word_index]]):
reverse_word_group.append([pair, second_wordpairs[reverse_word_index]])
if diff_word_index != -1:
if isinstance(diff_word_index, list): # if list, break it down and one pairing at a time
for diffI in diff_word_index: # loop through each different word index
if not inGroup(diff_word_group, [pair, second_wordpairs[diffI]]):
diff_word_group.append([pair, second_wordpairs[diffI]])
else:
diff_word_group.append([pair, second_wordpairs[diff_word_index]])
if probe_word_index != -1 and not inGroup(probe_word_group, [pair, second_wordpairs[probe_word_index]]):
probe_word_group.append([pair, second_wordpairs[probe_word_index]])
if target_word_index != -1 and not inGroup(target_word_group, [pair, second_wordpairs[target_word_index]]):
target_word_group.append([pair, second_wordpairs[target_word_index]])
# end of loop through word pairs
# end of loop through block
print same_word_group
print reverse_word_group
print probe_word_group
print target_word_group
print diff_word_group[0:2]
#### Go through each group and extract the feature data per event
## 01: Same Word Group
fig = plt.figure(figsize=(15, 3))
extractFeatures(same_word_group, session, firstblock, secondblock)
extractFeatures(reverse_word_group, session, firstblock, secondblock)
extractFeatures(probe_word_group, session, firstblock, secondblock)
extractFeatures(target_word_group, session, firstblock, secondblock)
extractFeatures(diff_word_group[0:2], session, firstblock, secondblock)
break