Fill Database WaveForm Headers

1) Import de las librerias que utilizaremos


In [1]:
import urllib.request
import wfdb
import psycopg2
from psycopg2.extensions import AsIs

2) Leemos el archivo con las WaveForm que vamos a utilizar


In [2]:
target_url = "https://physionet.org/physiobank/database/mimic2wdb/matched/RECORDS-waveforms"
data = urllib.request.urlopen(target_url) # it's a file like object and works just like a file
lines = data.readlines();
line = str(lines[2])
line


Out[2]:
"b's00020/s00020-2567-03-30-17-47\\n'"

3) Limpiamos los caracteres extraños y Dividimos la cadena donde pXXNNNN-YYYY-MM-DD-hh-mm donde XXNNNN es el identificador unico del paciente SUBJECT_ID y YYYY-MM-DD-hh-mm es la fecha de la estadia del paciente'


In [5]:
line = line.replace('b\'','').replace('\'','').replace('\\n','')
splited = line.split("/")
carpeta,onda = line.split("/")
subject_id = carpeta.replace('s','')
recordDate = onda.replace(carpeta+"-","")
print("subject_id: ",subject_id)
print("recordDate: ",recordDate)
print("onda: ",onda)
print("carpeta: ",carpeta)


subject_id:  00020
recordDate:  2567-03-30-17-47
onda:  s00020-2567-03-30-17-47
carpeta:  s00020

4) Leemos el encabezado del waveform, para obtener la información del paciente que almacenaremos


In [6]:
try:
    sig, fields = wfdb.srdsamp(onda,pbdir='mimic2wdb/matched/'+carpeta, sampto=1)
    print(fields)
except Exception as inst:
    print("onda vacia")


{'signame': ['II', 'AVF', 'ABP', 'PAP'], 'units': ['mV', 'mV', 'mmHg', 'mmHg'], 'comments': ['<age>: 76 <sex>: F'], 'fs': 125}

Le agregamos el subject_id y la fecha del record a los campos


In [10]:
fields['subject_id'] = subject_id
fields['recordDate'] = recordDate
fields['database'] = "mimic2"

convertimos los campos en un diccionario


In [8]:
columns = fields.keys()
values = [fields[column] for column in columns]
print(columns)


dict_keys(['signame', 'fs', 'comments', 'subject_id', 'recordDate', 'units'])

Nos conectamos a la base de datos postgres donde almacenaremos los datos


In [31]:
conn = psycopg2.connect("dbname=mimic")
cur = conn.cursor()

Creamos la tabla donde quedaran almacenados los datos


In [8]:
table = "waveformFields"
#cur.execute("DROP TABLE "+table)
cur.execute('''CREATE TABLE IF NOT EXISTS waveformFields
             (id serial PRIMARY KEY,
            comments character varying(255)[],
            fs integer, signame character varying(255)[],
            units character varying(255)[],
            subject_id integer,
            recordDate character varying(255),
            database character varying(50))''')

verificamos si ya existe el dato


In [36]:
def track_not_exists(cur, subject_id,recordDate,database):
    select_stament = 'select id from waveformFields where subject_id= %s and recorddate = %s and database = %s'
    cur.execute(select_stament,(int(subject_id),recordDate,database))
    return cur.fetchone() is None
def track_subject(cur,subject_id):
    select_stament= 'SELECT id FROM subjectwords WHERE subject_id= %s'
    cur.execute(select_stament,(int(subject_id),))
    return cur.fetchone() is None
def patient_dead(cur,subject_id):
    select_stament= 'SELECT dod FROM patients WHERE subject_id= %s'
    cur.execute(select_stament,(int(subject_id),))
    row = cur.fetchone()
    if(row[0] is None):
        return False
    else :
        print("row "+row[0])
        return True
notExist = False
if track_not_exists(cur,subject_id,recordDate,"mimic2") and track_subject(cur,subject_id) and patient_dead(cur,subject_id) :
    notExist = True
    print("not exist %s " % subject_id)

Insertamos los datos


In [10]:
insert_statement = 'insert into '+table+' (%s) values %s'

print(cur.mogrify(insert_statement, (AsIs(','.join(columns)), tuple(values))))
if notExist:
    cur.execute(insert_statement, (AsIs(','.join(columns)), tuple(values)))


b"insert into waveformFields (units,signame,comments,recordDate,subject_id,fs) values (ARRAY['mV', 'mV'], ARRAY['II', 'MCL1'], '{}', '2116-12-24-12-35', '000033', 125)"

Hacemos commit


In [11]:
conn.commit()

cerramos conexion


In [26]:
conn.close()

In [ ]: