Introduction to visualizing data in the eeghdf files


In [1]:
# %load explore-eeghdf-files-basics.py
# Here is an example of how to do basic exploration of what is in the eeghdf file. I show how to discover the fields in the file and to plot them.
# 
# I have copied the stacklineplot from my python-edf/examples code to help with display. Maybe I will put this as a helper or put it out as a utility package to make it easier to install.

from __future__ import print_function, division, unicode_literals
%matplotlib inline
# %matplotlib notebook

import matplotlib
import matplotlib.pyplot as plt
#import seaborn
import pandas as pd
import numpy as np
import h5py
from pprint import pprint

import stacklineplot


# matplotlib.rcParams['figure.figsize'] = (18.0, 12.0)
matplotlib.rcParams['figure.figsize'] = (12.0, 8.0)

In [2]:
hdf = h5py.File('./archive/DA05505C_1-1+.eeghdf')

In [3]:
pprint(list(hdf.items()))
pprint(list(hdf['patient'].attrs.items()))


[('patient', <HDF5 group "/patient" (0 members)>),
 ('record-0', <HDF5 group "/record-0" (10 members)>)]
[('patient_name', '77, Subject'),
 ('patientcode', '383f6cc99c7652bf96d9be9ea44606a8'),
 ('gender', 'Male'),
 ('birthdate', '1990-01-01'),
 ('patient_additional', ''),
 ('gestatational_age_at_birth_days', -1.0),
 ('born_premature', 'unknown')]

In [4]:
rec = hdf['record-0']
pprint(list(rec.items()))
pprint(list(rec.attrs.items()))
years_old = rec.attrs['patient_age_days']/365
pprint("age in years: %s" % years_old)


[('edf_annotations', <HDF5 group "/record-0/edf_annotations" (3 members)>),
 ('physical_dimensions',
  <HDF5 dataset "physical_dimensions": shape (36,), type "|O">),
 ('prefilters', <HDF5 dataset "prefilters": shape (36,), type "|O">),
 ('signal_digital_maxs',
  <HDF5 dataset "signal_digital_maxs": shape (36,), type "<i4">),
 ('signal_digital_mins',
  <HDF5 dataset "signal_digital_mins": shape (36,), type "<i4">),
 ('signal_labels', <HDF5 dataset "signal_labels": shape (36,), type "|O">),
 ('signal_physical_maxs',
  <HDF5 dataset "signal_physical_maxs": shape (36,), type "<f8">),
 ('signal_physical_mins',
  <HDF5 dataset "signal_physical_mins": shape (36,), type "<f8">),
 ('signals', <HDF5 dataset "signals": shape (36, 609200), type "<i2">),
 ('transducers', <HDF5 dataset "transducers": shape (36,), type "|O">)]
[('start_isodatetime', '2006-08-10 18:50:55'),
 ('end_isodatetime', '2006-08-10 19:41:41'),
 ('number_channels', 36),
 ('number_samples_per_channel', 609200),
 ('sample_frequency', 200.00000000000003),
 ('bits_per_sample', 16),
 ('technician', ''),
 ('patient_age_days', 6065.7853587962964)]
'age in years: 16.6185900241'

In [5]:
signals = rec['signals']
labels = rec['signal_labels']
electrode_labels = [str(s,'ascii') for s in labels]
numbered_electrode_labels = ["%d:%s" % (ii, str(labels[ii], 'ascii')) for ii in range(len(labels))]

Simple visualization of EEG (left temporal seizure pattern)

See rhythmic theta activity (4-7 Hz) with some evolution in frequency and location


In [16]:
# plot 10s epochs (multiples in DE)
ch0, ch1 = (0,19)
DE = 2 # how many 10s epochs to display
epoch = 53; ptepoch = 10*int(rec.attrs['sample_frequency'])
dp = int(0.5*ptepoch)
stacklineplot.stackplot(signals[ch0:ch1,epoch*ptepoch+dp:(epoch+DE)*ptepoch+dp],seconds=DE*10.0, ylabels=electrode_labels[ch0:ch1], yscale=3.0)
print("epoch:", epoch)


epoch: 53

In [7]:
# perhaps a slightly easier representation of the plot
stacklineplot.show_epoch_centered(signals, 545,
                        epoch_width_sec=20,
                        chstart=0, chstop=19, fs=rec.attrs['sample_frequency'],
                        ylabels=electrode_labels, yscale=3.0)



In [8]:
annot = rec['edf_annotations']
#print(list(annot.items()))
#annot['texts'][:]

In [9]:
antext = [s.decode('utf-8') for s in annot['texts'][:]]
starts100ns = [xx for xx in annot['starts_100ns'][:]]
len(starts100ns), len(antext)


Out[9]:
(38, 38)

In [10]:
import pandas as pd

In [11]:
df = pd.DataFrame(data=antext, columns=['text'])
df['starts100ns'] = starts100ns
df['starts_sec'] = df['starts100ns']/10**7

In [12]:
df # look at the annotations


Out[12]:
text starts100ns starts_sec
0 REC START A_DB EEG 0 0.000
1 A1+A2 OFF 0 0.000
2 texting 5197550000 519.755
3 *****SEIZURE 1 5332390000 533.239
4 first change 5415300000 541.530
5 stare vs watching tv 5437730000 543.773
6 turns and looks to r 5497230000 549.723
7 playing with phone ? 5626540000 562.654
8 clear staring off 5791480000 579.148
9 turns around all the 5947660000 594.766
10 MARK ON 5949200000 594.920
11 MARK OFF 5970800000 597.080
12 MARK ON 5971900000 597.190
13 MARK ON 5981700000 598.170
14 MARK OFF 6013500000 601.350
15 MARK ON 6014400000 601.440
16 tonic to clonic 6026740000 602.674
17 MARK OFF 6090200000 609.020
18 MARK ON 6091100000 609.110
19 MARK ON 6094500000 609.450
20 MARK ON 6098700000 609.870
21 MARK OFF 6192800000 619.280
22 MARK ON 6194900000 619.490
23 great RN 6307320000 630.732
24 MARK OFF 6360200000 636.020
25 MARK ON 6643400000 664.340
26 MARK OFF 6655700000 665.570
27 off 6968070000 696.807
28 starts back up 7170090000 717.009
29 shaking stopped 7579280000 757.928
30 nodding to answer qu 8544350000 854.435
31 left slowing 11475780000 1147.578
32 arouses, picks at he 20714970000 2071.497
33 accidental? no chang 21372400000 2137.240
34 MARK ON 21394800000 2139.480
35 MARK OFF 21485000000 2148.500
36 MARK ON 21520300000 2152.030
37 MARK OFF 21730100000 2173.010

In [13]:
df[df.text.str.contains('sz',case=False)]


Out[13]:
text starts100ns starts_sec

In [14]:
df[df.text.str.contains('seizure',case=False)] # find the seizure


Out[14]:
text starts100ns starts_sec
3 *****SEIZURE 1 5332390000 533.239

In [15]:
list(annot.items())


Out[15]:
[('durations_char16',
  <HDF5 dataset "durations_char16": shape (38,), type "|S16">),
 ('starts_100ns', <HDF5 dataset "starts_100ns": shape (38,), type "<i8">),
 ('texts', <HDF5 dataset "texts": shape (38,), type "|O">)]

In [ ]: