In [6]:
import wfdb
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
import random
import pickle
from scipy.signal import resample, medfilt,savgol_filter
In [7]:
fs=1e3 # original sampling rate of PTB ecg signal
fs_resampled1=250
fs_resampled2=64
In [8]:
data_dir=os.path.join('..','ptbdb') # data is in ../ptbdb/
In [9]:
filepaths=list(set([os.path.splitext(fl)[0] for fl in glob.glob(os.path.join(data_dir,'*','*'))]))
filepaths[0:3]
Out[9]:
In [10]:
def preprocess(sig):
sig_resampled,_=wfdb.processing.resample_sig(sig,fs,fs_resampled1)
sig_resampled_mdf_stg1=medfilt(volume=sig_resampled,kernel_size=fs_resampled1//2)
sig_resampled_mdf_stg2=medfilt(volume=sig_resampled_mdf_stg1,kernel_size=fs_resampled1-1)
sig_resampled_mdf=sig_resampled-sig_resampled_mdf_stg2
sig_resampled_mdf_sg=savgol_filter(x=sig_resampled_mdf,window_length=15,polyorder=3)
sig_resampled_mdf_sg_resampled,_=wfdb.processing.resample_sig(sig_resampled_mdf_sg,fs_resampled1,fs_resampled2)
return sig_resampled_mdf_sg_resampled
def get_segments(key,channels):
sig,fields=wfdb.srdsamp(recordname=key,channels=channels)
sig_processed=[]
for record in sig.T:
record_processed=preprocess(record)
n=int(3.072*fs_resampled2)
n_segments=len(record_processed)//n
record_processed=record_processed[0:n_segments*n]
record_processed=np.reshape(record_processed,[n_segments,n])
sig_processed.append(np.array(record_processed))
sig_processed=np.array(sig_processed) # data shape: [n_channels,n_segments,n_samples]
sig_processed=np.swapaxes(sig_processed,0,1) # data shape: [n_segments,n_channels,n_samples]
if 'Healthy control' in fields['comments'][4]:
label_bin=np.array([1,0])
if 'Myocardial infarction' in fields['comments'][4]:
label_bin=np.array([0,1])
return sig_processed,label_bin
In [12]:
keys_imi=[]
keys_hc=[]
data={}
segments_hc=0
segments_imi=0
for i,key in enumerate(filepaths):
_,fields=wfdb.srdsamp(key)
if 'Healthy control' in fields['comments'][4]:
segments,label_bin=get_segments(key,[1,2,5])
data[key]=(segments,label_bin)
segments_hc=segments_hc+segments.shape[0]
keys_hc.append(key)
else:
if 'Myocardial infarction' in fields['comments'][4]:
if 'inferior' in fields['comments'][5]:
segments,label_bin=get_segments(key,[1,2,5])
data[key]=(segments,label_bin)
segments_imi=segments_imi+segments.shape[0]
keys_imi.append(key)
print('processed {}/{}'.format(i+1,len(filepaths),),end='\r')
print('\n')
patients_imi=set([key.split(os.path.sep)[-2] for key in keys_imi])
patients_hc=set([key.split(os.path.sep)[-2] for key in keys_hc])
len(patients_imi),len(patients_hc)
Out[12]:
In [13]:
segments_total=segments_hc+segments_imi
print('Total segments: {}, HC segments: {} ({:.2f}%), IMI segments: {} ({:.2f}%)'.\
format(segments_total,segments_hc,segments_hc/segments_total*100,segments_imi,segments_imi/segments_total*100))
In [14]:
# save preprocessesd and segmented ECGs
with open(os.path.join('..','data','imi_hc_64Hz_3_lead.bin'),'wb') as pfile:
pickle.dump(file=pfile,obj=data,protocol=4)