Introduction to visualizing data in the eeghdf files



In [1]:

    
# %load explore-eeghdf-files-basics.py
# Here is an example of how to do basic exploration of what is in the eeghdf file. I show how to discover the fields in the file and to plot them.
# 
# I have copied the stacklineplot from my python-edf/examples code to help with display. Maybe I will put this as a helper or put it out as a utility package to make it easier to install.

from __future__ import print_function, division, unicode_literals
%matplotlib inline
# %matplotlib notebook

import matplotlib
import matplotlib.pyplot as plt
#import seaborn
import pandas as pd
import numpy as np
import h5py
from pprint import pprint

import stacklineplot


# matplotlib.rcParams['figure.figsize'] = (18.0, 12.0)
matplotlib.rcParams['figure.figsize'] = (12.0, 8.0)



In [2]:

    
hdf = h5py.File('./archive/DA05505C_1-1+.eeghdf')



In [3]:

    
pprint(list(hdf.items()))
pprint(list(hdf['patient'].attrs.items()))









    



[('patient', <HDF5 group "/patient" (0 members)>),
 ('record-0', <HDF5 group "/record-0" (10 members)>)]
[('patient_name', '77, Subject'),
 ('patientcode', '383f6cc99c7652bf96d9be9ea44606a8'),
 ('gender', 'Male'),
 ('birthdate', '1990-01-01'),
 ('patient_additional', ''),
 ('gestatational_age_at_birth_days', -1.0),
 ('born_premature', 'unknown')]



In [4]:

    
rec = hdf['record-0']
pprint(list(rec.items()))
pprint(list(rec.attrs.items()))
years_old = rec.attrs['patient_age_days']/365
pprint("age in years: %s" % years_old)









    



[('edf_annotations', <HDF5 group "/record-0/edf_annotations" (3 members)>),
 ('physical_dimensions',
  <HDF5 dataset "physical_dimensions": shape (36,), type "|O">),
 ('prefilters', <HDF5 dataset "prefilters": shape (36,), type "|O">),
 ('signal_digital_maxs',
  <HDF5 dataset "signal_digital_maxs": shape (36,), type "<i4">),
 ('signal_digital_mins',
  <HDF5 dataset "signal_digital_mins": shape (36,), type "<i4">),
 ('signal_labels', <HDF5 dataset "signal_labels": shape (36,), type "|O">),
 ('signal_physical_maxs',
  <HDF5 dataset "signal_physical_maxs": shape (36,), type "<f8">),
 ('signal_physical_mins',
  <HDF5 dataset "signal_physical_mins": shape (36,), type "<f8">),
 ('signals', <HDF5 dataset "signals": shape (36, 609200), type "<i2">),
 ('transducers', <HDF5 dataset "transducers": shape (36,), type "|O">)]
[('start_isodatetime', '2006-08-10 18:50:55'),
 ('end_isodatetime', '2006-08-10 19:41:41'),
 ('number_channels', 36),
 ('number_samples_per_channel', 609200),
 ('sample_frequency', 200.00000000000003),
 ('bits_per_sample', 16),
 ('technician', ''),
 ('patient_age_days', 6065.7853587962964)]
'age in years: 16.6185900241'



In [5]:

    
signals = rec['signals']
labels = rec['signal_labels']
electrode_labels = [str(s,'ascii') for s in labels]
numbered_electrode_labels = ["%d:%s" % (ii, str(labels[ii], 'ascii')) for ii in range(len(labels))]

Simple visualization of EEG (left temporal seizure pattern)

See rhythmic theta activity (4-7 Hz) with some evolution in frequency and location



In [16]:

    
# plot 10s epochs (multiples in DE)
ch0, ch1 = (0,19)
DE = 2 # how many 10s epochs to display
epoch = 53; ptepoch = 10*int(rec.attrs['sample_frequency'])
dp = int(0.5*ptepoch)
stacklineplot.stackplot(signals[ch0:ch1,epoch*ptepoch+dp:(epoch+DE)*ptepoch+dp],seconds=DE*10.0, ylabels=electrode_labels[ch0:ch1], yscale=3.0)
print("epoch:", epoch)









    



epoch: 53



In [7]:

    
# perhaps a slightly easier representation of the plot
stacklineplot.show_epoch_centered(signals, 545,
                        epoch_width_sec=20,
                        chstart=0, chstop=19, fs=rec.attrs['sample_frequency'],
                        ylabels=electrode_labels, yscale=3.0)



In [8]:

    
annot = rec['edf_annotations']
#print(list(annot.items()))
#annot['texts'][:]



In [9]:

    
antext = [s.decode('utf-8') for s in annot['texts'][:]]
starts100ns = [xx for xx in annot['starts_100ns'][:]]
len(starts100ns), len(antext)









    Out[9]:





(38, 38)



In [10]:

    
import pandas as pd



In [11]:

    
df = pd.DataFrame(data=antext, columns=['text'])
df['starts100ns'] = starts100ns
df['starts_sec'] = df['starts100ns']/10**7



In [12]:

    
df # look at the annotations









    Out[12]:






  
    
      
      text
      starts100ns
      starts_sec
    
  
  
    
      0
      REC START A_DB EEG
      0
      0.000
    
    
      1
      A1+A2 OFF
      0
      0.000
    
    
      2
      texting
      5197550000
      519.755
    
    
      3
      *****SEIZURE 1
      5332390000
      533.239
    
    
      4
      first change
      5415300000
      541.530
    
    
      5
      stare vs watching tv
      5437730000
      543.773
    
    
      6
      turns and looks to r
      5497230000
      549.723
    
    
      7
      playing with phone ?
      5626540000
      562.654
    
    
      8
      clear staring off
      5791480000
      579.148
    
    
      9
      turns around all the
      5947660000
      594.766
    
    
      10
      MARK ON
      5949200000
      594.920
    
    
      11
      MARK OFF
      5970800000
      597.080
    
    
      12
      MARK ON
      5971900000
      597.190
    
    
      13
      MARK ON
      5981700000
      598.170
    
    
      14
      MARK OFF
      6013500000
      601.350
    
    
      15
      MARK ON
      6014400000
      601.440
    
    
      16
      tonic to clonic
      6026740000
      602.674
    
    
      17
      MARK OFF
      6090200000
      609.020
    
    
      18
      MARK ON
      6091100000
      609.110
    
    
      19
      MARK ON
      6094500000
      609.450
    
    
      20
      MARK ON
      6098700000
      609.870
    
    
      21
      MARK OFF
      6192800000
      619.280
    
    
      22
      MARK ON
      6194900000
      619.490
    
    
      23
      great RN
      6307320000
      630.732
    
    
      24
      MARK OFF
      6360200000
      636.020
    
    
      25
      MARK ON
      6643400000
      664.340
    
    
      26
      MARK OFF
      6655700000
      665.570
    
    
      27
      off
      6968070000
      696.807
    
    
      28
      starts back up
      7170090000
      717.009
    
    
      29
      shaking stopped
      7579280000
      757.928
    
    
      30
      nodding to answer qu
      8544350000
      854.435
    
    
      31
      left slowing
      11475780000
      1147.578
    
    
      32
      arouses, picks at he
      20714970000
      2071.497
    
    
      33
      accidental? no chang
      21372400000
      2137.240
    
    
      34
      MARK ON
      21394800000
      2139.480
    
    
      35
      MARK OFF
      21485000000
      2148.500
    
    
      36
      MARK ON
      21520300000
      2152.030
    
    
      37
      MARK OFF
      21730100000
      2173.010



In [13]:

    
df[df.text.str.contains('sz',case=False)]









    Out[13]:






  
    
      
      text
      starts100ns
      starts_sec



In [14]:

    
df[df.text.str.contains('seizure',case=False)] # find the seizure









    Out[14]:






  
    
      
      text
      starts100ns
      starts_sec
    
  
  
    
      3
      *****SEIZURE 1
      5332390000
      533.239



In [15]:

    
list(annot.items())









    Out[15]:





[('durations_char16',
  <HDF5 dataset "durations_char16": shape (38,), type "|S16">),
 ('starts_100ns', <HDF5 dataset "starts_100ns": shape (38,), type "<i8">),
 ('texts', <HDF5 dataset "texts": shape (38,), type "|O">)]



In [ ]:

	text	starts100ns	starts_sec
0	REC START A_DB EEG	0	0.000
1	A1+A2 OFF	0	0.000
2	texting	5197550000	519.755
3	*****SEIZURE 1	5332390000	533.239
4	first change	5415300000	541.530
5	stare vs watching tv	5437730000	543.773
6	turns and looks to r	5497230000	549.723
7	playing with phone ?	5626540000	562.654
8	clear staring off	5791480000	579.148
9	turns around all the	5947660000	594.766
10	MARK ON	5949200000	594.920
11	MARK OFF	5970800000	597.080
12	MARK ON	5971900000	597.190
13	MARK ON	5981700000	598.170
14	MARK OFF	6013500000	601.350
15	MARK ON	6014400000	601.440
16	tonic to clonic	6026740000	602.674
17	MARK OFF	6090200000	609.020
18	MARK ON	6091100000	609.110
19	MARK ON	6094500000	609.450
20	MARK ON	6098700000	609.870
21	MARK OFF	6192800000	619.280
22	MARK ON	6194900000	619.490
23	great RN	6307320000	630.732
24	MARK OFF	6360200000	636.020
25	MARK ON	6643400000	664.340
26	MARK OFF	6655700000	665.570
27	off	6968070000	696.807
28	starts back up	7170090000	717.009
29	shaking stopped	7579280000	757.928
30	nodding to answer qu	8544350000	854.435
31	left slowing	11475780000	1147.578
32	arouses, picks at he	20714970000	2071.497
33	accidental? no chang	21372400000	2137.240
34	MARK ON	21394800000	2139.480
35	MARK OFF	21485000000	2148.500
36	MARK ON	21520300000	2152.030
37	MARK OFF	21730100000	2173.010