In [1]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import psycopg2
import collections
import string
from psycopg2.extensions import register_adapter, AsIs
import wfdb
np.set_printoptions(suppress=True,precision=10)
Methods
In [2]:
def obtainMeasures(dbname="mimic",unlimit=False,centroid=None) :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
limit = "" if unlimit else " LIMIT 10000 "
null = " AND centroid is null" if centroid is None else "AND centroid IS NOT NULL" if centroid =="notNull" else ""
select_stament = ("SELECT DISTINCT t_i-q_i AS qt,t_i-s_i AS ts,s_i-r_i AS sr,centroid "
" FROM waveformqrst "
" WHERE t_i>s_i "
" AND t_i-q_i<1000 "
" AND s_i-r_i<1000 "
" AND q_i IS NOT NULL "
" AND t_i IS NOT NULL "
" AND s_i IS NOT NULL "
" AND r_i IS NOT NULL "+null+limit
)
## "OFFSET floor(random()* 630702) limit 100000"
##+"limit (select count(1)/2 from waveformqrst)"
print(cur.mogrify(select_stament))
cur.execute(select_stament)
waves = []
for row in cur :
qt = row[0]
ts = row[1]
sr = row[2]
if centroid =="notNull" :
# centroid = 10 if row[3] is None else row[3]
waves.append([qt,ts,sr,row[3]])
else :
waves.append([qt,ts,sr])
conn.close()
return waves
In [3]:
fig_size = [12,9]
plt.rcParams["figure.figsize"] = fig_size
def printKmeans(kmeans) :
labels, values = zip(*collections.Counter(kmeans.labels_).items())
indexes = np.arange(len(labels))
width = 0.5
fig, ax = plt.subplots()
for i, v in enumerate(values):
ax.text(i-0.1,v+1, str(v), color='blue', fontweight='bold')
plt.bar(indexes, values, width)
plt.title('Cantidad de Latidos por centroide')
plt.ylabel('Cantidad de latidos')
plt.xlabel('Centroides')
plt.xticks(indexes + width * 0.01, labels)
plt.show()
Implementation
In [4]:
waves = obtainMeasures(centroid="all",unlimit=True)
X = np.array(waves)
In [5]:
centroid = np.array([
[71.1394970414,37.1831854044,31.0994575937]
,[160.9690551506,118.464585339,38.6135332141]
,[256.8278445722,178.5887785328,73.9569611431]
,[170.7113402062,43.2392611684,123.2794243986]
,[380.5647636587,94.7071823204,281.2449355433]
,[415.7864838394,333.9813907933,77.2056807052]
,[731.8383838384,106.9353535354,620.3656565657]
,[728.2561307902,560.3923705722,163.3732970027]
]
)
kmeans = KMeans(n_clusters=8 ,n_init=1,init=centroid
).fit(X)
In [6]:
centroid = kmeans.cluster_centers_
print(centroid)
#print(kmeans)
In [7]:
printKmeans(kmeans)
Update Waves with the K-means
In [8]:
def updateQRST(wacentroid) :
conn = psycopg2.connect("dbname=mimic")
cur = conn.cursor()
select_stament = """
UPDATE waveformqrst AS t
SET
centroid = s.centroid
FROM unnest(%s) s(tq integer,ts integer,sr integer,centroid unknown)
WHERE t.t_i-t.q_i = s.tq
AND t.t_i-t.s_i = s.ts
AND t.s_i-t.r_i = s.sr;
"""
# print(cur.mogrify(select_stament, (wacentroid,)))
cur.execute(select_stament, (wacentroid,))
conn.commit()
conn.close()
In [9]:
def fillEmptyCentroids(iterations=1280) :
waves = obtainMeasures(unlimit=True,centroid=None)
print("measures obtained:"+str(len(waves)))
waves = waves[:iterations] if iterations<len(waves) else waves
numberToAlpha = dict(enumerate(string.ascii_lowercase, 0))
i = 1
centro = []
for wave in waves :
try:
centValue = numberToAlpha[kmeans.predict([[wave[0],wave[1],wave[2]]])[0]]
centro.append((wave[0],wave[1],wave[2],str(centValue)))
except:
print("error in value")
i=i+1
updateQRST(centro)
In [10]:
fillEmptyCentroids(200)
# 5669 1083284
In [11]:
kmeans.predict([[1,18,14],[43,28,10],[49,37,7],[65,46,14], [26,13,8],[96,72,19]
,[247,236,11],[1242, 1136, 101]])
Out[11]:
In [12]:
def centroidColor(clist) :
color = {
'a': "blue",
'b': "red",
'c': "green",
'd': "magenta",
'e': "cyan",
'f': "yellow",
'g': "purple",
'h': "chocolate",
None : "maroon"
}
return [color[x] for x in clist]
In [13]:
waves = obtainMeasures(unlimit=True,centroid="notNull")
qt = np.array([item[0] for item in waves])
ts = np.array([item[1] for item in waves])
sr = np.array([item[2] for item in waves])
color = np.array([item[3] for item in waves])
qtc = np.array([item[0] for item in centroid])
tsc = np.array([item[1] for item in centroid])
src = np.array([item[2] for item in centroid])
color = centroidColor(color)
fig = plt.figure()
ax = fig.add_subplot(221)
ax.set_title("qt/ts")
ax.set_xlabel("qt")
ax.scatter(qt,ts, c=color)
ax.scatter(qtc,tsc,c='black', marker=">")
ax = fig.add_subplot(222)
ax.set_title("ts/sr")
ax.set_xlabel("ts")
ax.scatter(ts,sr, c=color)
ax.scatter(tsc,src,c='black', marker=">")
ax = fig.add_subplot(223)
ax.set_title("sr/qt")
ax.set_xlabel("sr")
ax.scatter(sr,qt, c=color)
ax.scatter(src,qtc,c='black', marker=">")
plt.show()
In [14]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(tsc,qtc,src, c='black',marker="^",s=160)
ax.scatter(ts,qt,sr, c=color)
plt.show()
In [15]:
def findCentroid(centroid,window=0,dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
qt = str(centroid[0])
ts = str(centroid[1])
sr = str(centroid[2])
strWin = str(window)
select_stament = ("SELECT qrst.subject_id,qrst.recorddate,signame,fs,signallength,centroid,wave,"
" q_amp,q_i,r_amp,r_i,s_amp,s_i,t_amp,t_i "
" FROM waveformqrst qrst "
" LEFT JOIN waveformfields fields"
" ON qrst.subject_id = fields.subject_id "
" AND qrst.recorddate = fields.recorddate "
" WHERE "
" (t_i-q_i between floor("+qt+")-"+strWin+" and ceil("+qt+")+"+strWin+")"
" AND (t_i-s_i between floor("+ts+")-"+strWin+" and ceil("+ts+")+"+strWin+")"
" AND (s_i-r_i between floor("+sr+")-"+strWin+" and ceil("+sr+")+"+strWin+")"
" LIMIT 1")
cur.execute(select_stament)
notFound = True
for row in cur :
notFound = False
patient = "p"+str(row[0]).zfill(6)
onda = patient+'-'+row[1]
carpeta = patient[:3]+"/"+patient
qrst = [[row[7],row[8]],[row[9],row[10]],[row[11],row[12]],[row[13],row[14]]]
# print(centroid,row,patient,onda,carpeta,window)
return {"subject_id":row[0],"recorddate":row[1],"fs":row[3],"onda":onda,"carpeta":carpeta,
"signalII":row[2].index("II"),"signallength":row[4],"centroid":row[5],
"wave":row[6],"begin":row[8],"end":row[14],"qrst":qrst}
conn.close()
if(notFound) :
return findCentroid(value,window=window+1)
In [16]:
def updateOriginalWave(subject_id,recorddate,begin,end,originalWave):
conn = psycopg2.connect("dbname=mimic")
cur = conn.cursor()
select_stament = ("UPDATE waveformqrst SET wave = (%s) "
"WHERE subject_id = (%s) AND recorddate =(%s) AND q_i = (%s) AND t_i = (%s)")
cur.execute(select_stament, (originalWave.tolist(),subject_id,recorddate,begin,end,))
conn.commit()
conn.close()
In [17]:
def printCentroid(wave) :
carpeta = wave["carpeta"]
onda = wave["onda"]
signalII = wave["signalII"]
fs = wave["fs"]
subject_id = wave["subject_id"]
recorddate = wave["recorddate"]
begin = wave["begin"]
end = wave["end"]
originalWave = wave["wave"]
qrst = wave["qrst"]
amp =[item[0] for item in qrst]
time = [item[1]-int(begin) for item in qrst]
if originalWave is None :
print("searching new Centroid")
signal = wfdb.rdsamp(onda,pbdir='mimic3wdb/matched/'+carpeta,channels =[signalII]).p_signals
signalNan = signal[~np.isnan(signal)]
subsignal = signalNan[-1800000:]
originalWave = subsignal[begin:begin+100]
updateOriginalWave(subject_id,recorddate,begin,end,originalWave)
cen = wave["centroid"] if wave["centroid"] is not None else "z"
ax.set_title(cen+" subject:"+str(subject_id))
ax.plot(originalWave[:100])
In [18]:
fig = plt.figure()
inx= 1
for value in centroid :
print(value)
ax = fig.add_subplot(3,3,inx)
wave = findCentroid(value)
printCentroid(wave)
inx=inx+1
plt.show()
In [19]:
centroid
Out[19]:
In [ ]: