1) Import de las librerias que utilizaremos
In [1]:
import urllib.request
import wfdb
import psycopg2
from psycopg2.extensions import AsIs
2) Leemos el archivo con las WaveForm que vamos a utilizar
In [2]:
target_url = "https://physionet.org/physiobank/database/mimic2wdb/matched/RECORDS-waveforms"
data = urllib.request.urlopen(target_url) # it's a file like object and works just like a file
lines = data.readlines();
line = str(lines[2])
line
Out[2]:
3) Limpiamos los caracteres extraños y Dividimos la cadena donde pXXNNNN-YYYY-MM-DD-hh-mm donde XXNNNN es el identificador unico del paciente SUBJECT_ID y YYYY-MM-DD-hh-mm es la fecha de la estadia del paciente'
In [5]:
line = line.replace('b\'','').replace('\'','').replace('\\n','')
splited = line.split("/")
carpeta,onda = line.split("/")
subject_id = carpeta.replace('s','')
recordDate = onda.replace(carpeta+"-","")
print("subject_id: ",subject_id)
print("recordDate: ",recordDate)
print("onda: ",onda)
print("carpeta: ",carpeta)
4) Leemos el encabezado del waveform, para obtener la información del paciente que almacenaremos
In [6]:
try:
sig, fields = wfdb.srdsamp(onda,pbdir='mimic2wdb/matched/'+carpeta, sampto=1)
print(fields)
except Exception as inst:
print("onda vacia")
Le agregamos el subject_id y la fecha del record a los campos
In [10]:
fields['subject_id'] = subject_id
fields['recordDate'] = recordDate
fields['database'] = "mimic2"
convertimos los campos en un diccionario
In [8]:
columns = fields.keys()
values = [fields[column] for column in columns]
print(columns)
Nos conectamos a la base de datos postgres donde almacenaremos los datos
In [31]:
conn = psycopg2.connect("dbname=mimic")
cur = conn.cursor()
Creamos la tabla donde quedaran almacenados los datos
In [8]:
table = "waveformFields"
#cur.execute("DROP TABLE "+table)
cur.execute('''CREATE TABLE IF NOT EXISTS waveformFields
(id serial PRIMARY KEY,
comments character varying(255)[],
fs integer, signame character varying(255)[],
units character varying(255)[],
subject_id integer,
recordDate character varying(255),
database character varying(50))''')
verificamos si ya existe el dato
In [36]:
def track_not_exists(cur, subject_id,recordDate,database):
select_stament = 'select id from waveformFields where subject_id= %s and recorddate = %s and database = %s'
cur.execute(select_stament,(int(subject_id),recordDate,database))
return cur.fetchone() is None
def track_subject(cur,subject_id):
select_stament= 'SELECT id FROM subjectwords WHERE subject_id= %s'
cur.execute(select_stament,(int(subject_id),))
return cur.fetchone() is None
def patient_dead(cur,subject_id):
select_stament= 'SELECT dod FROM patients WHERE subject_id= %s'
cur.execute(select_stament,(int(subject_id),))
row = cur.fetchone()
if(row[0] is None):
return False
else :
print("row "+row[0])
return True
notExist = False
if track_not_exists(cur,subject_id,recordDate,"mimic2") and track_subject(cur,subject_id) and patient_dead(cur,subject_id) :
notExist = True
print("not exist %s " % subject_id)
Insertamos los datos
In [10]:
insert_statement = 'insert into '+table+' (%s) values %s'
print(cur.mogrify(insert_statement, (AsIs(','.join(columns)), tuple(values))))
if notExist:
cur.execute(insert_statement, (AsIs(','.join(columns)), tuple(values)))
Hacemos commit
In [11]:
conn.commit()
cerramos conexion
In [26]:
conn.close()
In [ ]: