Introduction to visualizing data in the eeghdf files


In [1]:
# %load explore-eeghdf-files-basics.py
# Here is an example of how to do basic exploration of what is in the eeghdf file. I show how to discover the fields in the file and to plot them.
# 
# I have copied the stacklineplot from my python-edf/examples code to help with display. Maybe I will put this as a helper or put it out as a utility package to make it easier to install.

from __future__ import print_function, division, unicode_literals
%matplotlib inline
# %matplotlib notebook

import matplotlib
import matplotlib.pyplot as plt
#import seaborn
import pandas as pd
import numpy as np
import h5py
from pprint import pprint

import stacklineplot


# matplotlib.rcParams['figure.figsize'] = (18.0, 12.0)
matplotlib.rcParams['figure.figsize'] = (12.0, 8.0)

In [2]:
hdf = h5py.File('./archive/YA2741BS_1-1+.eeghdf') # 5mo boy

In [3]:
pprint(list(hdf.items()))
pprint(list(hdf['patient'].attrs.items()))


[('patient', <HDF5 group "/patient" (0 members)>),
 ('record-0', <HDF5 group "/record-0" (10 members)>)]
[('patient_name', '906, Subject'),
 ('patientcode', '0bf1acf3d68c50938e6f9dd40906f3ad'),
 ('gender', 'Male'),
 ('birthdate', '1990-01-01'),
 ('patient_additional', ''),
 ('gestatational_age_at_birth_days', -1.0),
 ('born_premature', 'unknown')]

In [4]:
rec = hdf['record-0']
pprint(list(rec.items()))
pprint(list(rec.attrs.items()))
years_old = rec.attrs['patient_age_days']/365
pprint("age in years: %s" % years_old)


[('edf_annotations', <HDF5 group "/record-0/edf_annotations" (3 members)>),
 ('physical_dimensions',
  <HDF5 dataset "physical_dimensions": shape (36,), type "|O">),
 ('prefilters', <HDF5 dataset "prefilters": shape (36,), type "|O">),
 ('signal_digital_maxs',
  <HDF5 dataset "signal_digital_maxs": shape (36,), type "<i4">),
 ('signal_digital_mins',
  <HDF5 dataset "signal_digital_mins": shape (36,), type "<i4">),
 ('signal_labels', <HDF5 dataset "signal_labels": shape (36,), type "|O">),
 ('signal_physical_maxs',
  <HDF5 dataset "signal_physical_maxs": shape (36,), type "<f8">),
 ('signal_physical_mins',
  <HDF5 dataset "signal_physical_mins": shape (36,), type "<f8">),
 ('signals', <HDF5 dataset "signals": shape (36, 446000), type "<i2">),
 ('transducers', <HDF5 dataset "transducers": shape (36,), type "|O">)]
[('start_isodatetime', '1990-06-17 13:20:56'),
 ('end_isodatetime', '1990-06-17 13:58:06'),
 ('number_channels', 36),
 ('number_samples_per_channel', 446000),
 ('sample_frequency', 200.00000000000003),
 ('bits_per_sample', 16),
 ('technician', ''),
 ('patient_age_days', 167.55620370370372)]
'age in years: 0.459058092339'

In [5]:
signals = rec['signals']
labels = rec['signal_labels']
electrode_labels = [str(s,'ascii') for s in labels]
numbered_electrode_labels = ["%d:%s" % (ii, str(labels[ii], 'ascii')) for ii in range(len(labels))]

Simple visualization of EEG (electrodecrement seizure pattern)


In [6]:
# plot 10s epochs (multiples in DE)
ch0, ch1 = (0,19)
DE = 2 # how many 10s epochs to display
epoch = 53; ptepoch = 10*int(rec.attrs['sample_frequency'])
dp = int(0.5*ptepoch)
# stacklineplot.stackplot(signals[ch0:ch1,epoch*ptepoch+dp:(epoch+DE)*ptepoch+dp],seconds=DE*10.0, ylabels=electrode_labels[ch0:ch1], yscale=0.3)
print("epoch:", epoch)


epoch: 53

In [18]:
# search identified spasms at 1836, 1871, 1901, 1939
stacklineplot.show_epoch_centered(signals, 1836,
                        epoch_width_sec=15,
                        chstart=0, chstop=19, fs=rec.attrs['sample_frequency'],
                        ylabels=electrode_labels, yscale=3.0)



In [8]:
annot = rec['edf_annotations']
#print(list(annot.items()))
#annot['texts'][:]

In [9]:
signals.shape


Out[9]:
(36, 446000)

In [10]:
antext = [s.decode('utf-8') for s in annot['texts'][:]]
starts100ns = [xx for xx in annot['starts_100ns'][:]]
len(starts100ns), len(antext)


Out[10]:
(66, 66)

In [11]:
import pandas as pd

In [12]:
df = pd.DataFrame(data=antext, columns=['text'])
df['starts100ns'] = starts100ns
df['starts_sec'] = df['starts100ns']/10**7

In [13]:
df # look at the annotations


Out[13]:
text starts100ns starts_sec
0 REC START Biocal CAL 0 0.000
1 A1+A2 OFF 11400000 1.140
2 PAT Biocal EEG 61900000 6.190
3 PAT A_DB EEG 114200000 11.420
4 IMP CHECK ON 564500000 56.450
5 IMP CHECK OFF 593500000 59.350
6 resting on right sid 731150000 73.115
7 head on right side 772180000 77.218
8 head on neckroll 809810000 80.981
9 nursnig quietly 856350000 85.635
10 01 spike wave 1661970000 166.197
11 nursing quietly 2262760000 226.276
12 head movement 2429420000 242.942
13 whimpering 2543050000 254.305
14 nursing again 2668330000 266.833
15 PAT B_IPSIEARS EEG 3041200000 304.120
16 PAT G_CIRCLE EEG 3682900000 368.290
17 PAT J_T1/T2 EEG 4924600000 492.460
18 sleeping quietly 4969760000 496.976
19 done nursing 5303080000 530.308
20 PAT A_DB EEG 6245300000 624.530
21 quietly sleeping 6387370000 638.737
22 R spindles 6948450000 694.845
23 R spindles 7391120000 739.112
24 R sleep spindles 7637360000 763.736
25 sleeping quietly 8069910000 806.991
26 R sleep spindles 9620170000 962.017
27 sleeping quietly 10266330000 1026.633
28 awakening pt 12977750000 1297.775
29 mom talking to baby 13062420000 1306.242
... ... ... ...
36 holding rattle w/ bo 14787490000 1478.749
37 smiling 15278720000 1527.872
38 stretching 15416540000 1541.654
39 laying happily on be 15769740000 1576.974
40 smiling 16119960000 1611.996
41 PHOTO 2Hz 16724300000 1672.430
42 PHOTO 4Hz 16826500000 1682.650
43 staring right at lig 16856500000 1685.650
44 PHOTO 6Hz 16925800000 1692.580
45 PHOTO 8Hz 17026000000 1702.600
46 PHOTO 10Hz 17126000000 1712.600
47 PHOTO 12Hz 17225900000 1722.590
48 PHOTO 14Hz 17326100000 1732.610
49 PHOTO 16Hz 17425900000 1742.590
50 PHOTO 18Hz 17526000000 1752.600
51 PHOTO 20Hz 17626000000 1762.600
52 eyes held closed by 18038310000 1803.831
53 eyes closed by tech 18232610000 1823.261
54 SPASM? 18367010000 1836.701
55 SPASM? 18710300000 1871.030
56 SPASM 19014460000 1901.446
57 SPASM 19388010000 1938.801
58 removed neckroll 19857900000 1985.790
59 fussing 19928480000 1992.848
60 whimpering 20161040000 2016.104
61 sneezing 20695670000 2069.567
62 sneezing 21130030000 2113.003
63 whimpering 21685980000 2168.598
64 quiet 22029380000 2202.938
65 quiet 22168930000 2216.893

66 rows × 3 columns


In [14]:
df[df.text.str.contains('sz',case=False)]


Out[14]:
text starts100ns starts_sec

In [15]:
df[df.text.str.contains('seizure',case=False)] # find the seizure


Out[15]:
text starts100ns starts_sec

In [16]:
df[df.text.str.contains('spasm',case=False)] # find the seizure


Out[16]:
text starts100ns starts_sec
54 SPASM? 18367010000 1836.701
55 SPASM? 18710300000 1871.030
56 SPASM 19014460000 1901.446
57 SPASM 19388010000 1938.801

In [17]:
list(annot.items())


Out[17]:
[('durations_char16',
  <HDF5 dataset "durations_char16": shape (66,), type "|S16">),
 ('starts_100ns', <HDF5 dataset "starts_100ns": shape (66,), type "<i8">),
 ('texts', <HDF5 dataset "texts": shape (66,), type "|O">)]

In [ ]:

2.6*10**12 /10


In [ ]:


In [ ]:


In [ ]: