ERP statistics using beamforming and linear mixed effects models


In [8]:
import psychic
import numpy as np
import progressbar
import scipy.stats
%matplotlib inline

Some parameters


In [20]:
# Regularization parameter for covariance matrix inversion.
reg = 0.2

# Various covariance estimators. Give them a try.
# cov = lambda X: X.dot(X.T)
# cov = np.cov
cov = psychic.lw_cov

# Resample for dimensionality reduction
sample_rate = 50.0

# Different priors about N400 timing
# No knowledge
# coi_prior = lambda x: 1.0

# Slight nudge towards 0.4 ms
coi_prior = scipy.stats.norm(0.4, 0.1).pdf

# Strong nudge towards 0.4 ms
# coi_prior = scipy.stats.norm(0.4, 0.05).pdf

Load the data


In [10]:
subjects = ['anne', 'cedric', 'daniel', 'dieter', 'heleen', 'me', 'ph', 'tom', 'zander', 'zeger']
all_trials = []
bar = progressbar.ProgressBar(maxval=len(subjects)).start()
for num, s in enumerate(subjects, 1):
    d = psychic.DataSet.load('/home/marijn/data/dedeyne_levels_norm_delayed_button/%s-dedeyne_levels_norm_delayed_button-trials.dat' % s)

    # Subsample to 50 Hz
    d = psychic.nodes.Resample(sample_rate).train_apply(d)

    # Limit data to EEG channels only, -0.1 to 1.0 seconds in time
    d = d.lix[:'Cz', -0.1:1.0, :]
    d = d.ix[:, :54, :] # Make sure the signal is 54 samples in length (this should also be the length of the temporal template)

    # Original data has 3 classes. Only keep two classes (related vs unrelated)
    d = d.get_class(['related', 'unrelated'], drop_others=True)
    
    all_trials.append(d)
    bar.update(num)
bar.finish()

channel_names = all_trials[0].feat_lab[0]
time = all_trials[0].feat_lab[1]


100% |########################################################################|

In [11]:
print 'There is data for %d subjects' % len(all_trials)
print 'For each subject, there is a DataSet:'
print all_trials[0]
print 'Class labels:', all_trials[0].cl_lab


There is data for 10 subjects
For each subject, there is a DataSet:
DataSet with 200 instances, 1728 features [32x54], 2 classes: [100, 100], extras: []
Class labels: ['related', 'unrelated']

ERP plot for all subjects


In [12]:
f = plt.figure(figsize=(8,8))
psychic.plot_erp(psychic.concatenate(all_trials, ignore_index=True), fig=f, vspace=8, ncols=3);


There is some N400 component in that data. In order to do statistics on it, we need to estimate its amplitude for each trial.

Loading the spatial and temporal templates for the N400 component


In [23]:
spatial_template = psychic.DataSet.load('../../prior_knowledge/results/n400_spatial_template.dat')
spatial_template = spatial_template.data.flatten()
print 'Shape of spatial_template', spatial_template.shape

temporal_template = psychic.DataSet.load('../../prior_knowledge/results/n400_temporal_template.dat')
# Re-sample the temporal template to match the sample rate of our data
temporal_template = psychic.nodes.Resample(sample_rate).train_apply(temporal_template)
temporal_template = temporal_template.data.flatten()
print 'Shape of temporal_template', temporal_template.shape


Shape of spatial_template (32,)
Shape of temporal_template (54,)

Plotting the spatial and temporal templates


In [24]:
plt.figure()
plt.subplot(211)
psychic.scalpplot.plot_scalp(spatial_template, channel_names)
cm = np.max(np.abs(spatial_template))
psychic.scalpplot.plot_scalp(spatial_template, channel_names,
                             cmap=plt.get_cmap('RdBu_r'))
plt.clim(-cm, cm)
plt.title('spatial pattern')
plt.subplot(212)
plt.plot(time, temporal_template, '-k')
plt.xlabel('time (s)')
plt.title('temporal pattern')
plt.tight_layout()


We could fix the pattern up a bit by applying some prior:


In [25]:
temporal_template *= coi_prior(time)

f = plt.figure(figsize=(7,2))
plt.plot(time, temporal_template, '-k')
plt.xlabel('time (s)')
plt.title('temporal pattern with prior')


Out[25]:
<matplotlib.text.Text at 0x7f8d3491f790>

Applying the beamformer filter to all subjects


In [26]:
beamformer = psychic.nodes.Chain([
    psychic.nodes.SpatialBeamformer(spatial_template, cov_func=cov, reg=reg),
    psychic.nodes.TemplateBeamformer(temporal_template, cov_func=cov, reg=reg),
])

results = [beamformer.train_apply(d) for d in all_trials]

print 'Results obtained for %d subjects' % len(results)
print 'Each result is a DataSet:'
print results[0]


Results obtained for 10 subjects
Each result is a DataSet:
DataSet with 200 instances, 1 features [1], 2 classes: [100, 100], extras: []

The resulting DataSet's only has 1 feature per instance: an estimation of N400 amplitude:


In [29]:
print results[0].data[:, :5]


[[ 0.3355669  -0.23624597  0.33971349 -0.36654966 -0.21764235]]

Let's do a quick t-test between the 'related' and 'unrelated' cases for each subject:


In [30]:
for num, s in enumerate(subjects):
    r = results[num]
    t, p = scipy.stats.ttest_ind(r.get_class('related').data[0,:], r.get_class('unrelated').data[0,:])
    print '{subject:<10s} t({df:d})={t:.3f}, p={p:.5f}'.format(subject=s, df=len(r)/2-1, t=float(t), p=float(p))


anne       t(99)=-2.253, p=0.02534
cedric     t(99)=-3.911, p=0.00013
daniel     t(99)=-3.540, p=0.00050
dieter     t(99)=-4.354, p=0.00002
heleen     t(99)=-2.279, p=0.02370
me         t(99)=-3.036, p=0.00272
ph         t(99)=-3.094, p=0.00226
tom        t(99)=-1.350, p=0.17867
zander     t(99)=-3.563, p=0.00046
zeger      t(99)=-5.224, p=0.00000

Creating a Pandas dataframe for better analysis


In [32]:
import pandas
dfs = []
for num, s in enumerate(subjects):
    df = pandas.DataFrame({'N400': results[num].data[0,:],
                           'class': results[num].y})
    df['subject'] = s
    df['stimulus'] = range(len(df))
    dfs.append(df)
    
df = pandas.concat(dfs)
print df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 2001 entries, 0 to 199
Data columns (total 4 columns):
N400        2001 non-null float64
class       2001 non-null int64
subject     2001 non-null object
stimulus    2001 non-null int64
dtypes: float64(1), int64(2), object(1)None

In [33]:
df.head(10)


Out[33]:
N400 class subject stimulus
0 0.335567 1 anne 0
1 -0.236246 1 anne 1
2 0.339713 1 anne 2
3 -0.366550 1 anne 3
4 -0.217642 1 anne 4
5 -0.191852 1 anne 5
6 0.403107 1 anne 6
7 -0.106438 1 anne 7
8 -0.064486 1 anne 8
9 0.241951 1 anne 9

In [34]:
df.tail(10)


Out[34]:
N400 class subject stimulus
190 0.813513 0 zeger 190
191 -0.105086 0 zeger 191
192 0.633405 0 zeger 192
193 -0.166775 0 zeger 193
194 -0.090670 0 zeger 194
195 -0.718325 0 zeger 195
196 -0.009582 0 zeger 196
197 0.397734 0 zeger 197
198 -0.176660 0 zeger 198
199 -0.149326 0 zeger 199

Moving everything to R in order to fit a mixed effects model for a proper statistical evaluation


In [35]:
%load_ext rmagic

In [39]:
%%R -i df

library('lme4')
library('lmerTest')

# LME model with subjects (slopes + intercepts) and items (intercepts only) as random effects
m = lmer(N400 ~ class + (class | subject) + (1 | stimulus), data=df)
s = summary(m)
print(s)

t = s$coefficients[2,4]
p = s$coefficients[2,5]


Error in library("lmerTest") : there is no package called ‘lmerTest’

In [41]:
print 't=%f, p=%f' %(t, p)
print 'Is it significant?', 'YES!' if p < 0.05 else 'no :('


t=-5.223702, p=0.000000
Is it significant? YES!

In [ ]: