In [8]:
import psychic
import numpy as np
import progressbar
import scipy.stats
%matplotlib inline
In [20]:
# Regularization parameter for covariance matrix inversion.
reg = 0.2
# Various covariance estimators. Give them a try.
# cov = lambda X: X.dot(X.T)
# cov = np.cov
cov = psychic.lw_cov
# Resample for dimensionality reduction
sample_rate = 50.0
# Different priors about N400 timing
# No knowledge
# coi_prior = lambda x: 1.0
# Slight nudge towards 0.4 ms
coi_prior = scipy.stats.norm(0.4, 0.1).pdf
# Strong nudge towards 0.4 ms
# coi_prior = scipy.stats.norm(0.4, 0.05).pdf
In [10]:
subjects = ['anne', 'cedric', 'daniel', 'dieter', 'heleen', 'me', 'ph', 'tom', 'zander', 'zeger']
all_trials = []
bar = progressbar.ProgressBar(maxval=len(subjects)).start()
for num, s in enumerate(subjects, 1):
d = psychic.DataSet.load('/home/marijn/data/dedeyne_levels_norm_delayed_button/%s-dedeyne_levels_norm_delayed_button-trials.dat' % s)
# Subsample to 50 Hz
d = psychic.nodes.Resample(sample_rate).train_apply(d)
# Limit data to EEG channels only, -0.1 to 1.0 seconds in time
d = d.lix[:'Cz', -0.1:1.0, :]
d = d.ix[:, :54, :] # Make sure the signal is 54 samples in length (this should also be the length of the temporal template)
# Original data has 3 classes. Only keep two classes (related vs unrelated)
d = d.get_class(['related', 'unrelated'], drop_others=True)
all_trials.append(d)
bar.update(num)
bar.finish()
channel_names = all_trials[0].feat_lab[0]
time = all_trials[0].feat_lab[1]
In [11]:
print 'There is data for %d subjects' % len(all_trials)
print 'For each subject, there is a DataSet:'
print all_trials[0]
print 'Class labels:', all_trials[0].cl_lab
In [12]:
f = plt.figure(figsize=(8,8))
psychic.plot_erp(psychic.concatenate(all_trials, ignore_index=True), fig=f, vspace=8, ncols=3);
There is some N400 component in that data. In order to do statistics on it, we need to estimate its amplitude for each trial.
In [23]:
spatial_template = psychic.DataSet.load('../../prior_knowledge/results/n400_spatial_template.dat')
spatial_template = spatial_template.data.flatten()
print 'Shape of spatial_template', spatial_template.shape
temporal_template = psychic.DataSet.load('../../prior_knowledge/results/n400_temporal_template.dat')
# Re-sample the temporal template to match the sample rate of our data
temporal_template = psychic.nodes.Resample(sample_rate).train_apply(temporal_template)
temporal_template = temporal_template.data.flatten()
print 'Shape of temporal_template', temporal_template.shape
In [24]:
plt.figure()
plt.subplot(211)
psychic.scalpplot.plot_scalp(spatial_template, channel_names)
cm = np.max(np.abs(spatial_template))
psychic.scalpplot.plot_scalp(spatial_template, channel_names,
cmap=plt.get_cmap('RdBu_r'))
plt.clim(-cm, cm)
plt.title('spatial pattern')
plt.subplot(212)
plt.plot(time, temporal_template, '-k')
plt.xlabel('time (s)')
plt.title('temporal pattern')
plt.tight_layout()
We could fix the pattern up a bit by applying some prior:
In [25]:
temporal_template *= coi_prior(time)
f = plt.figure(figsize=(7,2))
plt.plot(time, temporal_template, '-k')
plt.xlabel('time (s)')
plt.title('temporal pattern with prior')
Out[25]:
In [26]:
beamformer = psychic.nodes.Chain([
psychic.nodes.SpatialBeamformer(spatial_template, cov_func=cov, reg=reg),
psychic.nodes.TemplateBeamformer(temporal_template, cov_func=cov, reg=reg),
])
results = [beamformer.train_apply(d) for d in all_trials]
print 'Results obtained for %d subjects' % len(results)
print 'Each result is a DataSet:'
print results[0]
The resulting DataSet's only has 1 feature per instance: an estimation of N400 amplitude:
In [29]:
print results[0].data[:, :5]
Let's do a quick t-test between the 'related' and 'unrelated' cases for each subject:
In [30]:
for num, s in enumerate(subjects):
r = results[num]
t, p = scipy.stats.ttest_ind(r.get_class('related').data[0,:], r.get_class('unrelated').data[0,:])
print '{subject:<10s} t({df:d})={t:.3f}, p={p:.5f}'.format(subject=s, df=len(r)/2-1, t=float(t), p=float(p))
In [32]:
import pandas
dfs = []
for num, s in enumerate(subjects):
df = pandas.DataFrame({'N400': results[num].data[0,:],
'class': results[num].y})
df['subject'] = s
df['stimulus'] = range(len(df))
dfs.append(df)
df = pandas.concat(dfs)
print df.info()
In [33]:
df.head(10)
Out[33]:
In [34]:
df.tail(10)
Out[34]:
In [35]:
%load_ext rmagic
In [39]:
%%R -i df
library('lme4')
library('lmerTest')
# LME model with subjects (slopes + intercepts) and items (intercepts only) as random effects
m = lmer(N400 ~ class + (class | subject) + (1 | stimulus), data=df)
s = summary(m)
print(s)
t = s$coefficients[2,4]
p = s$coefficients[2,5]
In [41]:
print 't=%f, p=%f' %(t, p)
print 'Is it significant?', 'YES!' if p < 0.05 else 'no :('
In [ ]: