In [2]:
# Import Necessary Libraries
import numpy as np
import scipy.io
import matplotlib
from matplotlib import *
from matplotlib import pyplot as plt
import itertools
from mpl_toolkits.axes_grid1 import make_axes_locatable
from sklearn.decomposition import PCA
import scipy.stats as stats
from scipy.spatial import distance as Distance
# pretty charting
import seaborn as sns
sns.set_palette('muted')
sns.set_style('darkgrid')
%matplotlib inline
In [3]:
#### Extract wordpairs data into a dictionary for a subject/session/block
#### dictionary{wordpair:{channels}}
def extractSubjSessionBlockData(subj, session, block):
# file directory for a subj/session/block
filedir = '../../condensed_data_' + subj + '/blocks/' + session + '/' + block
wordpairs = os.listdir(filedir)
# initialize data dictionary with meta data
data_dict = {}
data_dict['meta'] = {'subject': subj,
'session': session,
'block': block}
for wordpair in wordpairs: # loop thru all wordpairs
wordpair_dir = filedir + '/' + wordpair
all_channel_mats = os.listdir(wordpair_dir)
data_dict[wordpair] = {}
for channel in all_channel_mats: # loop thru all channels
chan_file = wordpair_dir + '/' + channel
## 00: load in data
data = scipy.io.loadmat(chan_file)
data = data['data']
## 01: get the time point for probeword on
timeZero = data['timeZero'][0][0][0]
## 02: get the time point of vocalization
vocalization = data['vocalization'][0][0][0]
## 03: Get Power Matrix
power_matrix = data['powerMatZ'][0][0]
chan = channel.split('_')[0]
# convert channel data into a json dict
data_dict[wordpair][chan] = {'timeZero': timeZero,
'timeVocalization':vocalization,
'powerMat': power_matrix}
return data_dict
In [4]:
######## Get list of files (.mat) we want to work with ########
subj = 'NIH034' # change the directories if you want
filedir = '../../condensed_data_' + subj + '/blocks/'
sessions = os.listdir(filedir)
sessions = sessions[2:] # change which sessions we want
print "Analyzing subject: ", subj
print "The sessions: ", sessions
# loop through each session
for idx, session in enumerate(sessions):
# the session directory
sessiondir = filedir + sessions[idx]
# get all blocks for this session
blocks = os.listdir(sessiondir)
print "The blocks are: \n", blocks, ' \n'
if len(blocks) != 6: # error check on the directories
print blocks
print("Error in the # of blocks. There should be 5.")
break
# loop through each block one at a time, analyze
for i in range(0, 1):
block = blocks[i]
block_dir = sessiondir + '/' + block
# in each block, get list of word pairs from first and second block
wordpairs = os.listdir(block_dir)
print "The word pairs for this session/block: \n", wordpairs
a_wordpair = wordpairs[0]
print 'Subject: ', subj
print 'Session: ', session
print 'Block: ', block
block_data = extractSubjSessionBlockData(subj, session, block)
print block_data.keys()
print block_data['meta'], '\n\n'
print "The type of data input for power matrix is: ", type(block_data['BRICK_CLOCK']['1']['powerMat'])
print "The shape of each power matrix is (approximately): ", block_data['BRICK_CLOCK']['1']['powerMat'].shape
print "The range of data goes from: ", np.amin(block_data['BRICK_CLOCK']['1']['powerMat']), " to ", np.amax(block_data['BRICK_CLOCK']['1']['powerMat'])
break
break
In [6]:
def extractSubjSessionBlockData(subj, session, block):
# file directory for a subj/session/block
filedir = '../../condensed_data_' + subj + '/sessions/' + session + '/' + block
wordpairs = os.listdir(filedir)
# initialize data dictionary with meta data
data_dict = {}
data_dict['meta'] = {'subject': subj,
'session': session,
'block': block}
data_dict['data'] = {}
for wordpair in wordpairs: # loop thru all wordpairs
wordpair_dir = filedir + '/' + wordpair
all_channel_mats = os.listdir(wordpair_dir)
data_dict['data'][wordpair] = {}
for channel in all_channel_mats: # loop thru all channels
chan_file = wordpair_dir + '/' + channel
## 00: load in data
data = scipy.io.loadmat(chan_file)
data = data['data']
## 01: get the time point for probeword on
timeZero = data['timeZero'][0][0][0]
## 02: get the time point of vocalization
vocalization = data['vocalization'][0][0][0]
## 03: Get Power Matrix
power_matrix = data['powerMatZ'][0][0]
## 04: Get absolute response times for plotting
responseTimes = data['originalResponseTimes'][0][0][0]
## 05: Get probe word and target
probeWord = data['probeWord'][0][0][0]
targetWord = data['targetWord'][0][0][0]
chan = channel.split('_')[0]
# convert channel data into a json dict
data_dict['data'][wordpair][chan] = {'timeZero': timeZero,
'timeVocalization':vocalization,
'powerMat': power_matrix,
'responseTimes': responseTimes,
'probeWord': probeWord,
'targetWord': targetWord}
data_dict['meta']['description'] = data['description'][0][0][0]
return data_dict
In [6]:
######## Get list of files (.mat) we want to work with ########
subj = 'NIH039' # change the directories if you want
filedir = '../../condensed_data_' + subj + '/sessions/'
sessions = os.listdir(filedir)
sessions = sessions[2:] # change which sessions we want
print "Analyzing subject: ", subj
print "The sessions: ", sessions
# loop through each session
for idx, session in enumerate(sessions):
# the session directory
sessiondir = filedir + sessions[idx]
# get all blocks for this session
blocks = os.listdir(sessiondir)
print "The blocks are: \n", blocks, ' \n'
if len(blocks) != 6: # error check on the directories
print blocks
print("Error in the # of blocks. There should be 5.")
break
# loop through each block one at a time, analyze
for i in range(0, 1):
block = blocks[i]
block_dir = sessiondir + '/' + block
# in each block, get list of word pairs from first and second block
wordpairs = os.listdir(block_dir)
print "The word pairs for this session/block: \n", wordpairs
a_wordpair = wordpairs[0]
print 'Subject: ', subj
print 'Session: ', session
print 'Block: ', block
block_data = extractSubjSessionBlockData(subj, session, block)
print block_data.keys()
print block_data['meta'], '\n\n'
print "The type of data input for power matrix is: ", type(block_data['data']['BRICK_CLOCK']['1']['powerMat'])
print "The shape of each power matrix is (approximately): ", block_data['data']['BRICK_CLOCK']['1']['powerMat'].shape
print "The range of data goes from: ", np.amin(block_data['data']['BRICK_CLOCK']['1']['powerMat']), " to ", np.amax(block_data['data']['BRICK_CLOCK']['1']['powerMat'])
print "The number of channels are: ", len(block_data['data']['BRICK_CLOCK'].keys())
break
break
In [7]:
responseTimes = block_data['data']['BRICK_CLOCK']['1']['responseTimes']/1000.
fig = plt.figure()
plt.hist(responseTimes)
Out[7]:
In [4]:
def createWordGroups(wordpairs):
# create same group pairs
for idx, pair in enumerate(wordpairs):
same_word_group.append([pair, pair])
# create reverse, and different groups
for idx, pairs in enumerate(itertools.combinations(wordpairs,2)):
if isReverse(pairs[0], pairs[1]):
reverse_word_group.append([pairs[0], pairs[1]])
else:
diff_word_group.append([pairs[0], pairs[1]])
return same_word_group, reverse_word_group, diff_word_group
def isReverse(pair1, pair2):
pair1split = pair1.split('_')
pair2split = pair2.split('_')
if pair1split[0] == pair2split[1] and pair1split[1] == pair2split[0]:
return True
else:
return False
In [14]:
######## Get list of files (.mat) we want to work with ########
subj = 'NIH039'
filedir = '../../condensed_data_'+ subj +'/sessions/'
sessions = os.listdir(filedir)
# sessions = sessions[2:]
session_pval_dict = {}
debug_on = 1
responseTimes = {}
# loop through each session
for session in sessions:
print "Analyzing session ", session
sessiondir = filedir + session
session_pval_diff_mat = np.array(())
session_pval_same_mat = np.array(())
session_pval_reverse_mat = np.array(())
# get all blocks for this session
blocks = os.listdir(sessiondir)
if len(blocks) != 6: # error check on the directories
print blocks
print("Error in the # of blocks. There should be 5.")
break
responseTimes[str(session)] = np.array(())
# loop through each block one at a time, analyze
for i in range(0, 6):
# var for block and directory
block = blocks[i]
block_dir = sessiondir + '/' + block
# in each block, get list of word pairs from first and second block
wordpairs = os.listdir(block_dir)
# within-groups analysis only has: SAME, REVERSE, DIFFERENT
diff_word_group = []
reverse_word_group = []
same_word_group = []
## 01: Create WordPair Groups
same_word_group, reverse_word_group, diff_word_group = createWordGroups(wordpairs)
# extract sessionblockdata dictionary
block_data = extractSubjSessionBlockData(subj, session, block)
for words in block_data['data'].keys():
responseTimes[str(session)] = np.append(responseTimes[str(session)], block_data['data'][words]['1']['responseTimes']/1000., axis=0)
In [39]:
print responseTimes.keys()
def plotResponseTimes(subj, responseTimes):
responseTimeAverage = {}
responseTimeSem = {}
fig = plt.figure()
for idx, key in enumerate(responseTimes.keys()):
responseTimeAverage[key] = np.mean(responseTimes[key])
responseTimeSem[key] = scipy.stats.sem(responseTimes[key])
plt.bar(idx+0.5, responseTimeAverage[key], yerr=responseTimeSem[key])
plt.title('Respones Times with SEM For ' + subj)
plt.ylabel('Response Time (seconds)')
plt.xticks(range(1, len(responseTimes.keys())+1), responseTimes.keys())
plt.xlim([0, len(responseTimes.keys())+1])
plt.tight_layout()
In [40]:
plotResponseTimes(subj, responseTimes)