MEG Pilot

The project should be in a directory with the following subdirectories:

-scripts/: importable scripts
-MEG_data/: MEG data and epoch alignment files
-notebooks/: This file and other associated ipython notebooks

Imports


In [1]:
#Permit inline plotting
%pylab inline

#Do basic imports
from __future__ import division
import sys, os
import scipy
import pylab as pl
import numpy as np
import numpy.lib.recfunctions as recfunctions
#from ptsa.data.bvwrapper import BVWrapper
#from ptsa.plotting.topo import topoplot

#change to the scripts directory to permit importing from there
%cd ../scripts
import protoReadExtractEEGlab


Populating the interactive namespace from numpy and matplotlib
/media/webley/Stacked Data/Documents/Work/meg_playground/scripts

Definitions


In [2]:
# Definitions
def adjustR2(R2, numFeatures, numSamples):
        #1/0                                                               
        #return R2                                                         
        return R2-(1-R2)*(float(numFeatures)/(numSamples-numFeatures-1))

def mynormalise(A):
        A = scipy.stats.zscore(A)
        A[numpy.isnan(A)] = 0
        return A

Load Data


In [3]:
#change to the data directory to load in the data
%cd ../MEG_data

#load MEG data
(rawSignalData, metadata, trials, epochedSignalData, epochSliceTimepoints) = \
    protoReadExtractEEGlab.load('aud_hofd_a_allRuns_tsss_audiobookPrepro_stPad1_lp50_resamp125_frac10ICAed.set',[])


/media/webley/Stacked Data/Documents/Work/meg_playground/MEG_data

In [4]:
#load alignment data
wordPropsFile = 'hod_JoeTimes_LoadsaFeaturesV3.tab'                   
wordProps = scipy.genfromtxt(wordPropsFile,delimiter='\t',names=True,\
                             dtype="i4,f4,f4,S50,S50,i2,i2,i2,S10,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4,f4")
print(wordProps.shape)


(14595,)

Play

At this point, the following data structures are active:

wordProps: ndarray of word-level properties
rawSignalData: ndarray w/ one row per channel and one column per timeslice
metadata: matlab.iostruct: use metadata._fieldnames to find colnames
trials:  empty
epochedSignalData:  empty
epochedSliceTimepoints:  empty

wordProps exploration


In [5]:
print(wordProps.dtype.names)
print(wordProps['asrToken'][0:50])


('track', 'onTime', 'offTime', 'asrToken', 'annotToken', 'sentenceSerial', 'runSerial', 'storySerial', 'stanfPOS', 'lenLett', 'lenPhonCMU', 'lenSyllCMU', 'logFreq_ANC', 'bigramLogProbBack_COCA', 'trigramLogProbBack_COCA', 'surprisal2back_COCA', 'tokenPrimed', 'tokenPosPrimed', 'bigramEntropy_COCA_previous', 'bigramEntropy_COCA_here')
['<SENT-END>' 'ONE' 'EVENING' 'AS' 'I' 'WAS' 'LYING' 'FLAT' 'ON' 'THE'
 'DECK' 'OF' 'MY' 'STEAMBOAT' '<SPACE>' 'I' 'HEARD' 'VOICES' 'APPROACHING'
 '<SPACE>' 'AND' 'THERE' 'WERE' 'THE' 'NEPHEW' 'AND' 'THE' 'UNCLE'
 'STROLLING' 'ALONG' 'THE' 'BANK' '<SENT-END>' 'I' 'LAID' 'MY' 'HEAD' 'ON'
 'MY' 'ARM' 'AGAIN' '<SPACE>' 'AND' 'HAD' 'NEARLY' 'LOST' 'MYSELF' 'IN' 'A'
 'DOZE']

In [6]:
print(wordProps['onTime'])
print(mean(wordProps['onTime']))
print(wordProps['track'])
print(len(wordProps[wordProps['track']==5]))


[  3.40000018e-02   1.05400002e+00   1.32400000e+00 ...,   6.76174011e+02
   6.76283997e+02   6.76963989e+02]
299.449
[1 1 1 ..., 8 8 8]
1781

In [7]:
#Alignment option 1: adding a modifier to the onset and offset times
#  to get them in the same domain as the raw data

prevt = 0.0
timemod = 0

newWordProps = np.copy(wordProps)

for ti,t in enumerate(wordProps['onTime']):
    if t < prevt:
        #assume the final sample of a session doesn't occur after the final wordProps row
        timemod += wordProps['offTime'][ti-1]  
    newWordProps['onTime'][ti] += timemod
    newWordProps['offTime'][ti] += timemod
    prevt = t

In [8]:
print(newWordProps['onTime'])
print(mean(newWordProps['onTime']))
print(newWordProps['track'])
print(len(newWordProps[newWordProps['track']==5]))

prevt = 1
for ti,t in enumerate(wordProps['track']):
    if t != prevt:
        #boundary zone differences... they aren't flush because the first word of each session has some lead-in
        #but it's important to also note that they don't line up with the metadata.event.latency list below...
        print newWordProps['offTime'][ti-1], newWordProps['onTime'][ti]
    prevt = t


[  3.40000018e-02   1.05400002e+00   1.32400000e+00 ...,   6.89766406e+03
   6.89777393e+03   6.89845361e+03]
3736.34
[1 1 1 ..., 8 8 8]
1781
445.404 445.438
2687.61 2687.65
3251.72 3251.75
3777.42 3777.45
4433.91 4433.94
5322.31 5322.34
6221.49 6221.52

metadata exploration


In [9]:
print(metadata._fieldnames)


['setname', 'filename', 'filepath', 'subject', 'group', 'condition', 'session', 'comments', 'nbchan', 'trials', 'pnts', 'srate', 'xmin', 'xmax', 'times', 'data', 'icaact', 'icawinv', 'icasphere', 'icaweights', 'icachansind', 'chanlocs', 'urchanlocs', 'chaninfo', 'ref', 'event', 'urevent', 'eventdescription', 'epoch', 'epochdescription', 'reject', 'stats', 'specdata', 'specicaact', 'splinefile', 'icasplinefile', 'dipfit', 'history', 'saved', 'etc', 'datfile']

In [10]:
print(metadata.chanlocs[0]._fieldnames)
print(metadata.chanlocs[0].labels)
print(rawSignalData.shape[1])


['labels', 'ref', 'theta', 'radius', 'X', 'Y', 'Z', 'sph_theta', 'sph_phi', 'sph_radius', 'type', 'urchan']
MEG0113
588038

In [11]:
print(metadata.pnts)
print(metadata.srate)
print(metadata.times/1000)
print(mean(metadata.times)/1000)


588038
125
[ -0.00000000e+00   8.00000000e-03   1.60000000e-02 ...,   4.70428000e+03
   4.70428800e+03   4.70429600e+03]
2352.148

In [12]:
for event in metadata.event:
    print(event.type, event.latency)


(u'boundary', 0.5)
(u's1', 125.99999999999977)
(u'e1', 55865)
(u'boundary', 55990.5)
(u's2', 56115.99999999999)
(u'e2', 150082.875)
(u'boundary', 150208.5)
(u's3', 150333.99999999997)
(u'e3', 220933.87499999997)
(u'boundary', 221059.06249999997)
(u's4', 221184.99999999994)
(u'e4', 286949.125)
(u'boundary', 287074.5)
(u's5', 287200)
(u'e5', 356377.625)
(u'boundary', 356502.8125000001)
(u's6', 356628.0000000001)
(u'e6', 425840.6250000001)
(u'boundary', 425965.8125000001)
(u's7', 426091.0000000001)
(u'e7', 502837.5000000001)
(u'boundary', 502962.68750000035)
(u's8', 503088.00000000035)
(u'e8', 587913.6250000002)

rawSignalData exploration


In [13]:
print(rawSignalData.shape)
print(rawSignalData.dtype)
print(epochedSignalData.shape)
print(epochedSignalData.dtype)


(307, 588038)
float32
(0, 307)
float32

Work

Get session-level stretches


In [22]:
#adapted from protoReadExtractEEGlab.py

triggersOfInterest = []
for n in range(1,9):
    triggersOfInterest += ['s'+str(n),'e'+str(n)]
eLDS = metadata
sessionStartSec = 0 #number of seconds to include before each session (needed to get lead-in for early word epochs)
sessionEndSec = 0#1 #number of seconds to include after each session (needed to get lead-out for late word epochs)

trialTriggers = []; # list of triggers
trialLatencies = []; # ... and their latencies in timepoints (aka samples) to make record array of trials
sessionSliceTimepoints = []; # embedded lists containing indices for each epoch
sessionStartTimepoints = round(sessionStartSec*eLDS.srate) # number of timepoints (aka samples) before trigger to include in session
sessionEndTimepoints = round(sessionEndSec*eLDS.srate) # ... ditto after the trigger
mystart = 0.0
myend = 0.0
for event in eLDS.event:        
# how to see stucture of one trigger: [(a, eval("eLDS.event[0].%s" % a)) for a in dir(eLDS.event[0]) if not a.startswith('_')]
    if event.type in triggersOfInterest:
        if event.type[0] == 's':
            #start event
            mystart = event.latency + sessionStartTimepoints
        else:
            sessionSliceTimepoints.append(range(int(round(mystart)),int(round(event.latency+sessionEndTimepoints))))
        #trialTriggers.append(event.type)
        #trialLatencies.append(event.latency)
        #sessionSliceTimepoints.append(range(int(round(prevlatency)),int(round(event.latency))))

# create record array of trial IDs and their position in samples of the recording, allows field and entry-wise referencing: trials.timepoint, trials[5], trials[5].trigger, trials.trigger[5]
#trials = np.core.records.fromarrays([trialTriggers, trialLatencies],names='trigger,timepoint')
#L.info('Extracted %d trials of interest, with epochs running from %-.2fs to %+.2fs relative to triggers' % (len(trials), sessionStartSec, sessionEndSec))
# create session-based view on data, using sessions of particular type

#sessionedSignalData = rawSignalData[:,sessionSliceTimepoints[0]]#.swapaxes(0,1) # sessions x channels x timepoints
#print(sessionSliceTimepoints[-1][-1])
for l in sessionSliceTimepoints:
    print(len(l))
    
sessionedSignalData = []#list(rawSignalData[:,sessionSliceTimepoints[0]])
for sitting in sessionSliceTimepoints[:-1]: #range(1,len(sessionSliceTimepoints)-1):
    #sessionedSignalData.append(list(rawSignalData[:,sessionSliceTimepoints[sitting]]))
    sessionedSignalData.append(list(rawSignalData[:,sitting]))


if sessionSliceTimepoints[-1][-1] >= len(rawSignalData[0]):
    #the final timeslice ends after the last sample, so omit it
    #sessionedSignalData.append(list(rawSignalData[:,sessionSliceTimepoints[-1][:-1]]))
    print('Trimming the final session')
    print 'Final slice: ',sessionSliceTimepoints[-1][-1], '/', len(rawSignalData[0])
    sessionedSignalData.append(list(rawSignalData[:,sessionSliceTimepoints[-1][0]:]))
else:
    sessionedSignalData.append(list(rawSignalData[:,sessionSliceTimepoints[-1]]))
#sessionedSignalData = rawSignalData[:,sessionSliceTimepoints]#.swapaxes(0,1)

print('----')
for sit in sessionedSignalData:
    print(len(sit),len(sit[0]))

#current view is session x channel x sample
#pick a channel, say 113, then epoch just that channel for this preliminary stuff


55739
93967
70600
65764
69178
69213
76747
84826
----
(307, 55739)
(307, 93967)
(307, 70600)
(307, 65764)
(307, 69178)
(307, 69213)
(307, 76747)
(307, 84826)

In [23]:
for sit in sessionedSignalData:
    print(sit[0][0:10])


[  6.17254945e-12   6.11931296e-12   1.36693781e-12  -3.08979686e-12
  -2.50620661e-12  -2.73018413e-12  -5.97545451e-12  -6.97526759e-12
  -3.75410814e-12  -3.13188928e-12]
[ -1.62532227e-11  -1.34842163e-11  -1.12523159e-11  -1.34263043e-11
  -1.93062979e-11  -1.85832304e-11  -1.37902805e-11  -1.27745705e-11
  -1.07362010e-11  -7.75561646e-12]
[ -1.07671511e-11  -1.06840257e-11  -1.52687637e-11  -1.65780133e-11
  -1.47737308e-11  -1.33857586e-11  -1.46486052e-11  -1.51999021e-11
  -1.47199995e-11  -1.48809923e-11]
[  6.65713058e-12   3.93415248e-12  -4.56732829e-12  -1.32479574e-11
  -1.42655610e-11  -6.42588803e-12   4.15774143e-12   8.18270712e-12
  -3.35470746e-13  -1.01489954e-11]
[  8.20967860e-12   7.01449359e-12   4.91032345e-12   1.31045663e-12
   1.35004594e-12   5.95377220e-12   7.52772844e-12   7.67418681e-12
   1.01015576e-11   8.65426567e-12]
[  3.53339190e-12   4.48985640e-12   3.22289222e-12   4.25144555e-12
   7.92016713e-12   1.28682230e-11   1.15218798e-11   5.51289006e-12
   3.43023028e-12   1.73977065e-12]
[  4.44983980e-12   5.34656954e-12   7.31456042e-12   8.35117305e-12
   1.10943164e-11   1.46087534e-11   1.35914847e-11   6.89525433e-12
   2.01742269e-13   1.35568889e-12]
[ -7.34207226e-12   5.29593838e-13   4.36068499e-12   8.54629464e-13
  -2.71335016e-12  -3.60461595e-12  -4.96962367e-12  -5.29613593e-12
  -3.48688743e-12  -6.12990398e-13]

Find the desired channel


In [24]:
#The broca-ish sensors are 0211,0212,0213
# 0211: magnetometer
# 0212: rostral gradiometer
# 0213: caudal gradiometer
chanids = [i for i in range(len(metadata.chanlocs)) if metadata.chanlocs[i].labels in \
           ('MEG0211','MEG0212','MEG0213')]
print(chanids)


[12, 13, 14]

Align session samples to word-level epochs


In [41]:
for s in range(1,9):
  print(len(wordProps[wordProps['track']==s]))


1414
2429
1704
1693
1781
1604
1827
2143

In [43]:
epochedWordSamples = [] #has to be a list rather than a np.array because sessions have diff numbers of words; ragged
prevtrack = -1
previx = 0 #a marker that notes the last grabbed sample
sampsum = 0
for i,track in enumerate(wordProps['track']):
    if track != prevtrack:
        #we're in a new session
        #presume the new session started immediately after the old session ended
        #timemod -= wordProps['offTime'][i-1]
        print('i-1:',track-1,
            ' ix0:',previx,' ix1:',previx+int(round((wordProps['offTime'][i]-wordProps['onTime'][i]) * metadata.srate)))
        
        sampsum += previx
        previx = 0
    prevtrack = track
    duration = int(round((wordProps['offTime'][i]-wordProps['onTime'][i]) * metadata.srate)) #number of samples for this word
    
    chanEpoch = [] #has to be a list rather than a np.array because words have diff numbers of samples; ragged
    for chanid in chanids:
        #try:
        chanEpoch.append(sessionedSignalData[track-1][chanid][previx:previx+duration])
            #chanEpoch.append(sessionedSignalData[i-1][chanid][previx:previx+duration])
        #except:
        #    raise
    epochedWordSamples.append(chanEpoch) #add the channel-based info to the epoch info
    previx += duration #update the sample use history

sampsum += previx
#current view is word-epoch x channel x sample
#can find out which epochs are at the session boundaries using the wordProps['track']


('i-1:', 0, ' ix0:', 0, ' ix1:', 127)
('i-1:', 1, ' ix0:', 55683, ' ix1:', 55808)
('i-1:', 2, ' ix0:', 93784, ' ix1:', 93910)
('i-1:', 3, ' ix0:', 70497, ' ix1:', 70632)
('i-1:', 4, ' ix0:', 65707, ' ix1:', 65833)
('i-1:', 5, ' ix0:', 69199, ' ix1:', 69333)
('i-1:', 6, ' ix0:', 69245, ' ix1:', 69365)
('i-1:', 7, ' ix0:', 76651, ' ix1:', 76815)

In [44]:
print((sum(len(t[0]) for t in sessionedSignalData)-sampsum)/metadata.srate) #missing seconds


4.144

In [ ]: