In [1]:
# import libraries
from __future__ import print_function, division, unicode_literals
%matplotlib inline
# %matplotlib notebook
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import h5py
from pprint import pprint
import stacklineplot # local copy
# matplotlib.rcParams['figure.figsize'] = (18.0, 12.0)
matplotlib.rcParams['figure.figsize'] = (12.0, 8.0)
In [2]:
hdf = h5py.File('./archive/YA2741G2_1-1+.eeghdf')
The data is stored hierachically in an hdf5 file as a tree of keys and values. It is possible to inspect the file using standard hdf5 tools. Below we show the keys and values associated with the root of the tree. This shows that there is a "patient" group and a group "record-0"
In [3]:
list(hdf.items())
Out[3]:
We can focus on the patient group and access it via hdf['patient'] as if it was a python dictionary. Here are the key,value pairs in that group. Note that the patient information has been anonymized. Everyone is given the same set of birthdays. This shows that this file is for Subject 2619, who is male.
In [4]:
list(hdf['patient'].attrs.items())
Out[4]:
Now we look at how the waveform data is stored. By convention, the first record is called "record-0" and it contains the waveform data as well as the approximate time (relative to the birthdate)at which the study was done, as well as technical information like the number of channels, electrode names and sample rate.
In [9]:
rec = hdf['record-0']
list(rec.attrs.items())
Out[9]:
In [10]:
# here is the list of data arrays stored in the record
list(rec.items())
Out[10]:
In [13]:
rec['physical_dimensions'][:]
Out[13]:
In [15]:
rec['prefilters'][:]
Out[15]:
In [16]:
rec['signal_digital_maxs'][:]
Out[16]:
In [18]:
rec['signal_digital_mins'][:]
Out[18]:
In [19]:
rec['signal_physical_maxs'][:]
Out[19]:
In [ ]:
In [ ]:
In [ ]:
We can then grab the actual waveform data and visualize it.
In [6]:
signals = rec['signals']
labels = rec['signal_labels']
electrode_labels = [str(s,'ascii') for s in labels]
numbered_electrode_labels = ["%d:%s" % (ii, str(labels[ii], 'ascii')) for ii in range(len(labels))]
In [7]:
# search identified spasms at 1836, 1871, 1901, 1939
stacklineplot.show_epoch_centered(signals, 1476,epoch_width_sec=15,chstart=0, chstop=19, fs=rec.attrs['sample_frequency'], ylabels=electrode_labels, yscale=3.0)
plt.title('Absence Seizure');
In [8]:
annot = rec['edf_annotations']
In [9]:
antext = [s.decode('utf-8') for s in annot['texts'][:]]
starts100ns = [xx for xx in annot['starts_100ns'][:]] # process the bytes into text and lists of start times
In [10]:
df = pd.DataFrame(data=antext, columns=['text']) # load into a pandas data frame
df['starts100ns'] = starts100ns
df['starts_sec'] = df['starts100ns']/10**7
del df['starts100ns']
It is easy then to find the annotations related to seizures
In [11]:
df[df.text.str.contains('sz',case=False)]
Out[11]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: