In [6]:
import wfdb
import os
import glob
import numpy as np
import matplotlib.pyplot as plt 
import random
import pickle
from scipy.signal import resample, medfilt,savgol_filter

In [7]:
fs=1e3 # original sampling rate of PTB ecg signal
fs_resampled1=250 
fs_resampled2=64

Data directory and filepaths


In [8]:
data_dir=os.path.join('..','ptbdb') # data is in ../ptbdb/

In [9]:
filepaths=list(set([os.path.splitext(fl)[0] for fl in glob.glob(os.path.join(data_dir,'*','*'))]))
filepaths[0:3]


Out[9]:
['../ptbdb/patient059/s0208lre',
 '../ptbdb/patient030/s0153lre',
 '../ptbdb/patient020/s0069lre']

Utility functions


In [10]:
def preprocess(sig):
    sig_resampled,_=wfdb.processing.resample_sig(sig,fs,fs_resampled1)
    sig_resampled_mdf_stg1=medfilt(volume=sig_resampled,kernel_size=fs_resampled1//2)
    sig_resampled_mdf_stg2=medfilt(volume=sig_resampled_mdf_stg1,kernel_size=fs_resampled1-1)
    sig_resampled_mdf=sig_resampled-sig_resampled_mdf_stg2
    sig_resampled_mdf_sg=savgol_filter(x=sig_resampled_mdf,window_length=15,polyorder=3)
    sig_resampled_mdf_sg_resampled,_=wfdb.processing.resample_sig(sig_resampled_mdf_sg,fs_resampled1,fs_resampled2)
    return sig_resampled_mdf_sg_resampled    
def get_segments(key,channels):
    sig,fields=wfdb.srdsamp(recordname=key,channels=channels)
    sig_processed=[]
    for record in sig.T:
        record_processed=preprocess(record)
        n=int(3.072*fs_resampled2)
        n_segments=len(record_processed)//n
        record_processed=record_processed[0:n_segments*n]
        record_processed=np.reshape(record_processed,[n_segments,n])
        sig_processed.append(np.array(record_processed))
    sig_processed=np.array(sig_processed) # data shape: [n_channels,n_segments,n_samples]
    sig_processed=np.swapaxes(sig_processed,0,1) # data shape: [n_segments,n_channels,n_samples]
    if 'Healthy control' in fields['comments'][4]:
        label_bin=np.array([1,0])
    if 'Myocardial infarction' in fields['comments'][4]:
        label_bin=np.array([0,1])
    return sig_processed,label_bin

In [12]:
keys_imi=[]
keys_hc=[]
data={}
segments_hc=0
segments_imi=0
for i,key in enumerate(filepaths):
    _,fields=wfdb.srdsamp(key)
    if 'Healthy control' in fields['comments'][4]:
        segments,label_bin=get_segments(key,[1,2,5])
        data[key]=(segments,label_bin)
        segments_hc=segments_hc+segments.shape[0]
        keys_hc.append(key)
    else:
        if 'Myocardial infarction' in fields['comments'][4]:
            if 'inferior' in fields['comments'][5]:
                segments,label_bin=get_segments(key,[1,2,5])
                data[key]=(segments,label_bin)
                segments_imi=segments_imi+segments.shape[0]
                keys_imi.append(key)
    print('processed {}/{}'.format(i+1,len(filepaths),),end='\r')
print('\n')
patients_imi=set([key.split(os.path.sep)[-2] for key in keys_imi])
patients_hc=set([key.split(os.path.sep)[-2] for key in keys_hc])
len(patients_imi),len(patients_hc)


processed 549/549

Out[12]:
(30, 52)

In [13]:
segments_total=segments_hc+segments_imi
print('Total segments: {}, HC segments: {} ({:.2f}%), IMI segments: {} ({:.2f}%)'.\
      format(segments_total,segments_hc,segments_hc/segments_total*100,segments_imi,segments_imi/segments_total*100))


Total segments: 6277, HC segments: 3055 (48.67%), IMI segments: 3222 (51.33%)

In [14]:
# save preprocessesd and segmented ECGs
with open(os.path.join('..','data','imi_hc_64Hz_3_lead.bin'),'wb') as pfile:
    pickle.dump(file=pfile,obj=data,protocol=4)