# K-means and Alphabet

Imports

In [1]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import psycopg2
import collections
import string
import wfdb
np.set_printoptions(suppress=True,precision=10)

Methods

In [2]:
def obtainMeasures(dbname="mimic",unlimit=False,centroid=None) :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
limit = "" if unlimit else " LIMIT 10000 "
null = " AND centroid is null" if centroid is None else "AND centroid IS NOT NULL" if centroid =="notNull" else ""
select_stament = ("SELECT DISTINCT t_i-q_i AS qt,t_i-s_i AS ts,s_i-r_i AS sr,centroid "
" FROM waveformqrst "
" WHERE t_i>s_i "
" AND t_i-q_i<1000 "
" AND s_i-r_i<1000 "
" AND q_i IS NOT NULL "
" AND t_i IS NOT NULL "
" AND s_i IS NOT NULL "
" AND r_i IS NOT NULL "+null+limit
)
## "OFFSET floor(random()* 630702) limit 100000"
##+"limit (select count(1)/2 from waveformqrst)"
print(cur.mogrify(select_stament))
cur.execute(select_stament)
waves = []
for row in cur :
qt = row[0]
ts = row[1]
sr = row[2]
if centroid  =="notNull" :
#            centroid = 10 if row[3] is None else row[3]
waves.append([qt,ts,sr,row[3]])
else :
waves.append([qt,ts,sr])
conn.close()
return waves

In [3]:
fig_size = [12,9]
plt.rcParams["figure.figsize"] = fig_size
def printKmeans(kmeans) :
labels, values = zip(*collections.Counter(kmeans.labels_).items())
indexes = np.arange(len(labels))
width = 0.5
fig, ax = plt.subplots()
for i, v in enumerate(values):
ax.text(i-0.1,v+1, str(v), color='blue', fontweight='bold')
plt.bar(indexes, values, width)
plt.xlabel('Centroides')
plt.xticks(indexes + width * 0.01, labels)
plt.show()

Implementation

In [4]:
waves = obtainMeasures(centroid="all",unlimit=True)
X = np.array(waves)

b'SELECT DISTINCT t_i-q_i AS qt,t_i-s_i AS ts,s_i-r_i AS sr,centroid  FROM waveformqrst  WHERE t_i>s_i  AND t_i-q_i<1000  AND s_i-r_i<1000  AND q_i IS NOT NULL  AND t_i IS NOT NULL  AND s_i IS NOT NULL  AND r_i IS NOT NULL '

In [5]:
centroid = np.array([
[71.1394970414,37.1831854044,31.0994575937]
,[160.9690551506,118.464585339,38.6135332141]
,[256.8278445722,178.5887785328,73.9569611431]
,[170.7113402062,43.2392611684,123.2794243986]
,[380.5647636587,94.7071823204,281.2449355433]
,[415.7864838394,333.9813907933,77.2056807052]
,[731.8383838384,106.9353535354,620.3656565657]
,[728.2561307902,560.3923705722,163.3732970027]
]
)
kmeans = KMeans(n_clusters=8 ,n_init=1,init=centroid
).fit(X)

In [6]:
centroid = kmeans.cluster_centers_
print(centroid)
#print(kmeans)

[[  69.6188495315   38.4247657464   28.3880415143]
[ 160.9432284542  118.29124487     38.7540355677]
[ 256.804793331   178.3822507815   74.1394581452]
[ 168.6516410469   42.4929372663  121.9912754466]
[ 380.370030581    94.5675840979  281.1883792049]
[ 415.7864838394  333.9813907933   77.2056807052]
[ 731.8383838384  106.9353535354  620.3656565657]
[ 728.2561307902  560.3923705722  163.3732970027]]

In [7]:
printKmeans(kmeans)

Update Waves with the K-means

In [8]:
def updateQRST(wacentroid) :
conn = psycopg2.connect("dbname=mimic")
cur = conn.cursor()
select_stament = """
UPDATE waveformqrst AS t
SET
centroid = s.centroid
FROM unnest(%s) s(tq integer,ts integer,sr integer,centroid unknown)
WHERE t.t_i-t.q_i = s.tq
AND t.t_i-t.s_i = s.ts
AND t.s_i-t.r_i = s.sr;
"""
#    print(cur.mogrify(select_stament, (wacentroid,)))
cur.execute(select_stament, (wacentroid,))
conn.commit()
conn.close()

In [9]:
def fillEmptyCentroids(iterations=1280) :
waves = obtainMeasures(unlimit=True,centroid=None)
print("measures obtained:"+str(len(waves)))
waves = waves[:iterations] if iterations<len(waves) else waves
numberToAlpha = dict(enumerate(string.ascii_lowercase, 0))
i = 1
centro = []
for wave in waves :
try:
centValue = numberToAlpha[kmeans.predict([[wave[0],wave[1],wave[2]]])[0]]
centro.append((wave[0],wave[1],wave[2],str(centValue)))
except:
print("error in value")
i=i+1
updateQRST(centro)

In [10]:
fillEmptyCentroids(200)
# 5669 1083284

b'SELECT DISTINCT t_i-q_i AS qt,t_i-s_i AS ts,s_i-r_i AS sr,centroid  FROM waveformqrst  WHERE t_i>s_i  AND t_i-q_i<1000  AND s_i-r_i<1000  AND q_i IS NOT NULL  AND t_i IS NOT NULL  AND s_i IS NOT NULL  AND r_i IS NOT NULL  AND centroid is null'
measures obtained:4688

In [11]:
kmeans.predict([[1,18,14],[43,28,10],[49,37,7],[65,46,14], [26,13,8],[96,72,19]
,[247,236,11],[1242,  1136,   101]])

Out[11]:
array([0, 0, 0, 0, 0, 0, 2, 7], dtype=int32)

In [12]:
def centroidColor(clist) :
color = {
'a': "blue",
'b': "red",
'c': "green",
'd': "magenta",
'e': "cyan",
'f': "yellow",
'g': "purple",
'h': "chocolate",
None : "maroon"
}
return [color[x] for x in clist]

In [13]:
waves = obtainMeasures(unlimit=True,centroid="notNull")
qt = np.array([item[0] for item in waves])
ts = np.array([item[1] for item in waves])
sr = np.array([item[2] for item in waves])
color = np.array([item[3] for item in waves])
qtc = np.array([item[0] for item in centroid])
tsc = np.array([item[1] for item in centroid])
src = np.array([item[2] for item in centroid])
color = centroidColor(color)
fig = plt.figure()
ax.set_title("qt/ts")
ax.set_xlabel("qt")
ax.scatter(qt,ts, c=color)
ax.scatter(qtc,tsc,c='black', marker=">")
ax.set_title("ts/sr")
ax.set_xlabel("ts")
ax.scatter(ts,sr, c=color)
ax.scatter(tsc,src,c='black', marker=">")
ax.set_title("sr/qt")
ax.set_xlabel("sr")
ax.scatter(sr,qt, c=color)
ax.scatter(src,qtc,c='black', marker=">")
plt.show()

b'SELECT DISTINCT t_i-q_i AS qt,t_i-s_i AS ts,s_i-r_i AS sr,centroid  FROM waveformqrst  WHERE t_i>s_i  AND t_i-q_i<1000  AND s_i-r_i<1000  AND q_i IS NOT NULL  AND t_i IS NOT NULL  AND s_i IS NOT NULL  AND r_i IS NOT NULL AND centroid IS NOT NULL'

In [14]:
fig = plt.figure()
ax.scatter(tsc,qtc,src, c='black',marker="^",s=160)
ax.scatter(ts,qt,sr, c=color)
plt.show()

In [15]:
def findCentroid(centroid,window=0,dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
qt = str(centroid[0])
ts = str(centroid[1])
sr = str(centroid[2])
strWin = str(window)
select_stament = ("SELECT qrst.subject_id,qrst.recorddate,signame,fs,signallength,centroid,wave,"
" q_amp,q_i,r_amp,r_i,s_amp,s_i,t_amp,t_i "
" FROM waveformqrst qrst "
" LEFT JOIN waveformfields fields"
" ON qrst.subject_id = fields.subject_id "
" AND qrst.recorddate = fields.recorddate "
" WHERE "
" (t_i-q_i between floor("+qt+")-"+strWin+" and ceil("+qt+")+"+strWin+")"
" AND (t_i-s_i between floor("+ts+")-"+strWin+" and ceil("+ts+")+"+strWin+")"
" AND (s_i-r_i between floor("+sr+")-"+strWin+" and ceil("+sr+")+"+strWin+")"
" LIMIT 1")
cur.execute(select_stament)
notFound = True
for row in cur :
notFound = False
patient = "p"+str(row[0]).zfill(6)
onda = patient+'-'+row[1]
carpeta = patient[:3]+"/"+patient
qrst = [[row[7],row[8]],[row[9],row[10]],[row[11],row[12]],[row[13],row[14]]]
#        print(centroid,row,patient,onda,carpeta,window)
return {"subject_id":row[0],"recorddate":row[1],"fs":row[3],"onda":onda,"carpeta":carpeta,
"signalII":row[2].index("II"),"signallength":row[4],"centroid":row[5],
"wave":row[6],"begin":row[8],"end":row[14],"qrst":qrst}
conn.close()
if(notFound) :
return findCentroid(value,window=window+1)

In [16]:
def updateOriginalWave(subject_id,recorddate,begin,end,originalWave):
conn = psycopg2.connect("dbname=mimic")
cur = conn.cursor()
select_stament = ("UPDATE waveformqrst SET wave = (%s) "
"WHERE subject_id = (%s) AND recorddate =(%s) AND q_i = (%s) AND t_i = (%s)")
cur.execute(select_stament, (originalWave.tolist(),subject_id,recorddate,begin,end,))
conn.commit()
conn.close()

In [17]:
def printCentroid(wave) :
carpeta = wave["carpeta"]
onda = wave["onda"]
signalII = wave["signalII"]
fs = wave["fs"]
subject_id = wave["subject_id"]
recorddate = wave["recorddate"]
begin = wave["begin"]
end = wave["end"]
originalWave = wave["wave"]
qrst = wave["qrst"]
amp =[item[0] for item in qrst]
time = [item[1]-int(begin) for item in qrst]
if originalWave is None :
print("searching new Centroid")
signal = wfdb.rdsamp(onda,pbdir='mimic3wdb/matched/'+carpeta,channels =[signalII]).p_signals
signalNan = signal[~np.isnan(signal)]
subsignal = signalNan[-1800000:]
originalWave = subsignal[begin:begin+100]
updateOriginalWave(subject_id,recorddate,begin,end,originalWave)
cen = wave["centroid"] if wave["centroid"] is not None else "z"
ax.set_title(cen+" subject:"+str(subject_id))
ax.plot(originalWave[:100])

In [18]:
fig = plt.figure()
inx= 1
for value in centroid :
print(value)
wave = findCentroid(value)
printCentroid(wave)
inx=inx+1
plt.show()

[ 69.6188495315  38.4247657464  28.3880415143]
searching new Centroid
[ 160.9432284542  118.29124487     38.7540355677]
[ 256.804793331   178.3822507815   74.1394581452]
[ 168.6516410469   42.4929372663  121.9912754466]
[ 380.370030581    94.5675840979  281.1883792049]
[ 415.7864838394  333.9813907933   77.2056807052]
[ 731.8383838384  106.9353535354  620.3656565657]
[ 728.2561307902  560.3923705722  163.3732970027]

In [19]:
centroid

Out[19]:
array([[  69.6188495315,   38.4247657464,   28.3880415143],
[ 160.9432284542,  118.29124487  ,   38.7540355677],
[ 256.804793331 ,  178.3822507815,   74.1394581452],
[ 168.6516410469,   42.4929372663,  121.9912754466],
[ 380.370030581 ,   94.5675840979,  281.1883792049],
[ 415.7864838394,  333.9813907933,   77.2056807052],
[ 731.8383838384,  106.9353535354,  620.3656565657],
[ 728.2561307902,  560.3923705722,  163.3732970027]])

In [ ]: