In [1]:
#Permit inline plotting
%pylab inline
#Do basic imports
from __future__ import division
import sys, os
import scipy
import pylab as pl
import numpy as np
import numpy.lib.recfunctions as recfunctions
#from ptsa.data.bvwrapper import BVWrapper
#from ptsa.plotting.topo import topoplot
#change to the scripts directory to permit importing from there
%cd ../scripts
import protoReadExtractEEGlab
In [2]:
# Definitions
def adjustR2(R2, numFeatures, numSamples):
#1/0
#return R2
return R2-(1-R2)*(float(numFeatures)/(numSamples-numFeatures-1))
def mynormalise(A):
A = scipy.stats.zscore(A)
A[numpy.isnan(A)] = 0
return A
In [3]:
#change to the data directory to load in the data
%cd ../MEG_data
#load MEG data
(rawSignalData, metadata, trials, epochedSignalData, epochSliceTimepoints) = \
protoReadExtractEEGlab.load('aud_hofd_a_allRuns_tsss_audiobookPrepro_stPad1_lp50_resamp125_frac10ICAed.set',[])
In [4]:
#load alignment data
wordPropsFile = 'hod_JoeTimes_LoadsaFeaturesV3.tab'
wordProps = scipy.genfromtxt(wordPropsFile,delimiter='\t',names=True,\
dtype="i4,f4,f4,S50,S50,i2,i2,i2,S10,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4")
print(wordProps.shape)
At this point, the following data structures are active:
wordProps: ndarray of word-level properties
rawSignalData: ndarray w/ one row per channel and one column per timeslice
metadata: matlab.iostruct: use metadata._fieldnames to find colnames
trials: empty
epochedSignalData: empty
epochedSliceTimepoints: empty
wordProps exploration
In [5]:
print(wordProps.dtype.names)
print(wordProps['asrToken'][0:50])
In [6]:
print(wordProps['onTime'])
print(mean(wordProps['onTime']))
print(wordProps['track'])
print(len(wordProps[wordProps['track']==5]))
In [7]:
#Alignment option 1: adding a modifier to the onset and offset times
# to get them in the same domain as the raw data
prevt = 0.0
timemod = 0
newWordProps = np.copy(wordProps)
for ti,t in enumerate(wordProps['onTime']):
if t < prevt:
#assume the final sample of a session doesn't occur after the final wordProps row
timemod += wordProps['offTime'][ti-1]
newWordProps['onTime'][ti] += timemod
newWordProps['offTime'][ti] += timemod
prevt = t
In [8]:
print(newWordProps['onTime'])
print(mean(newWordProps['onTime']))
print(newWordProps['track'])
print(len(newWordProps[newWordProps['track']==5]))
prevt = 1
for ti,t in enumerate(wordProps['track']):
if t != prevt:
#boundary zone differences... they aren't flush because the first word of each session has some lead-in
#but it's important to also note that they don't line up with the metadata.event.latency list below...
print newWordProps['offTime'][ti-1], newWordProps['onTime'][ti]
prevt = t
metadata exploration
In [9]:
print(metadata._fieldnames)
In [10]:
print(metadata.chanlocs[0]._fieldnames)
print(metadata.chanlocs[0].labels)
print(rawSignalData.shape[1])
In [11]:
print(metadata.pnts)
print(metadata.srate)
print(metadata.times/1000)
print(mean(metadata.times)/1000)
In [12]:
for event in metadata.event:
print(event.type, event.latency)
rawSignalData exploration
In [13]:
print(rawSignalData.shape)
print(rawSignalData.dtype)
print(epochedSignalData.shape)
print(epochedSignalData.dtype)
Get session-level stretches
In [22]:
#adapted from protoReadExtractEEGlab.py
triggersOfInterest = []
for n in range(1,9):
triggersOfInterest += ['s'+str(n),'e'+str(n)]
eLDS = metadata
sessionStartSec = 0 #number of seconds to include before each session (needed to get lead-in for early word epochs)
sessionEndSec = 0#1 #number of seconds to include after each session (needed to get lead-out for late word epochs)
trialTriggers = []; # list of triggers
trialLatencies = []; # ... and their latencies in timepoints (aka samples) to make record array of trials
sessionSliceTimepoints = []; # embedded lists containing indices for each epoch
sessionStartTimepoints = round(sessionStartSec*eLDS.srate) # number of timepoints (aka samples) before trigger to include in session
sessionEndTimepoints = round(sessionEndSec*eLDS.srate) # ... ditto after the trigger
mystart = 0.0
myend = 0.0
for event in eLDS.event:
# how to see stucture of one trigger: [(a, eval("eLDS.event[0].%s" % a)) for a in dir(eLDS.event[0]) if not a.startswith('_')]
if event.type in triggersOfInterest:
if event.type[0] == 's':
#start event
mystart = event.latency + sessionStartTimepoints
else:
sessionSliceTimepoints.append(range(int(round(mystart)),int(round(event.latency+sessionEndTimepoints))))
#trialTriggers.append(event.type)
#trialLatencies.append(event.latency)
#sessionSliceTimepoints.append(range(int(round(prevlatency)),int(round(event.latency))))
# create record array of trial IDs and their position in samples of the recording, allows field and entry-wise referencing: trials.timepoint, trials[5], trials[5].trigger, trials.trigger[5]
#trials = np.core.records.fromarrays([trialTriggers, trialLatencies],names='trigger,timepoint')
#L.info('Extracted %d trials of interest, with epochs running from %-.2fs to %+.2fs relative to triggers' % (len(trials), sessionStartSec, sessionEndSec))
# create session-based view on data, using sessions of particular type
#sessionedSignalData = rawSignalData[:,sessionSliceTimepoints[0]]#.swapaxes(0,1) # sessions x channels x timepoints
#print(sessionSliceTimepoints[-1][-1])
for l in sessionSliceTimepoints:
print(len(l))
sessionedSignalData = []#list(rawSignalData[:,sessionSliceTimepoints[0]])
for sitting in sessionSliceTimepoints[:-1]: #range(1,len(sessionSliceTimepoints)-1):
#sessionedSignalData.append(list(rawSignalData[:,sessionSliceTimepoints[sitting]]))
sessionedSignalData.append(list(rawSignalData[:,sitting]))
if sessionSliceTimepoints[-1][-1] >= len(rawSignalData[0]):
#the final timeslice ends after the last sample, so omit it
#sessionedSignalData.append(list(rawSignalData[:,sessionSliceTimepoints[-1][:-1]]))
print('Trimming the final session')
print 'Final slice: ',sessionSliceTimepoints[-1][-1], '/', len(rawSignalData[0])
sessionedSignalData.append(list(rawSignalData[:,sessionSliceTimepoints[-1][0]:]))
else:
sessionedSignalData.append(list(rawSignalData[:,sessionSliceTimepoints[-1]]))
#sessionedSignalData = rawSignalData[:,sessionSliceTimepoints]#.swapaxes(0,1)
print('----')
for sit in sessionedSignalData:
print(len(sit),len(sit[0]))
#current view is session x channel x sample
#pick a channel, say 113, then epoch just that channel for this preliminary stuff
In [23]:
for sit in sessionedSignalData:
print(sit[0][0:10])
Find the desired channel
In [24]:
#The broca-ish sensors are 0211,0212,0213
# 0211: magnetometer
# 0212: rostral gradiometer
# 0213: caudal gradiometer
chanids = [i for i in range(len(metadata.chanlocs)) if metadata.chanlocs[i].labels in \
('MEG0211','MEG0212','MEG0213')]
print(chanids)
Align session samples to word-level epochs
In [41]:
for s in range(1,9):
print(len(wordProps[wordProps['track']==s]))
In [43]:
epochedWordSamples = [] #has to be a list rather than a np.array because sessions have diff numbers of words; ragged
prevtrack = -1
previx = 0 #a marker that notes the last grabbed sample
sampsum = 0
for i,track in enumerate(wordProps['track']):
if track != prevtrack:
#we're in a new session
#presume the new session started immediately after the old session ended
#timemod -= wordProps['offTime'][i-1]
print('i-1:',track-1,
' ix0:',previx,' ix1:',previx+int(round((wordProps['offTime'][i]-wordProps['onTime'][i]) * metadata.srate)))
sampsum += previx
previx = 0
prevtrack = track
duration = int(round((wordProps['offTime'][i]-wordProps['onTime'][i]) * metadata.srate)) #number of samples for this word
chanEpoch = [] #has to be a list rather than a np.array because words have diff numbers of samples; ragged
for chanid in chanids:
#try:
chanEpoch.append(sessionedSignalData[track-1][chanid][previx:previx+duration])
#chanEpoch.append(sessionedSignalData[i-1][chanid][previx:previx+duration])
#except:
# raise
epochedWordSamples.append(chanEpoch) #add the channel-based info to the epoch info
previx += duration #update the sample use history
sampsum += previx
#current view is word-epoch x channel x sample
#can find out which epochs are at the session boundaries using the wordProps['track']
In [44]:
print((sum(len(t[0]) for t in sessionedSignalData)-sampsum)/metadata.srate) #missing seconds
In [ ]: