K-means and Alphabet

Imports


In [1]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import psycopg2
import collections
import string
from psycopg2.extensions import register_adapter, AsIs
import wfdb
np.set_printoptions(suppress=True,precision=10)

Methods


In [2]:
def obtainMeasures(dbname="mimic",unlimit=False,centroid=None) :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    limit = "" if unlimit else " LIMIT 10000 "
    null = " AND centroid is null" if centroid is None else "AND centroid IS NOT NULL" if centroid =="notNull" else "" 
    select_stament = ("SELECT DISTINCT t_i-q_i AS qt,t_i-s_i AS ts,s_i-r_i AS sr,centroid "
                      " FROM waveformqrst "
                      " WHERE t_i>s_i "
                      " AND t_i-q_i<1000 "
                      " AND s_i-r_i<1000 "
                      " AND q_i IS NOT NULL "
                      " AND t_i IS NOT NULL "
                      " AND s_i IS NOT NULL "
                      " AND r_i IS NOT NULL "+null+limit
    )
    ## "OFFSET floor(random()* 630702) limit 100000"
    ##+"limit (select count(1)/2 from waveformqrst)"
    print(cur.mogrify(select_stament))
    cur.execute(select_stament)
    waves = []
    for row in cur :
        qt = row[0]
        ts = row[1]
        sr = row[2]
        if centroid  =="notNull" :
#            centroid = 10 if row[3] is None else row[3]
            waves.append([qt,ts,sr,row[3]])
        else :
            waves.append([qt,ts,sr])
    conn.close()
    return waves

In [3]:
fig_size = [12,9]
plt.rcParams["figure.figsize"] = fig_size
def printKmeans(kmeans) :
    labels, values = zip(*collections.Counter(kmeans.labels_).items())
    indexes = np.arange(len(labels))
    width = 0.5
    fig, ax = plt.subplots() 
    for i, v in enumerate(values):
        ax.text(i-0.1,v+1, str(v), color='blue', fontweight='bold')
    plt.bar(indexes, values, width)
    plt.title('Cantidad de Latidos por centroide')
    plt.ylabel('Cantidad de latidos')
    plt.xlabel('Centroides')
    plt.xticks(indexes + width * 0.01, labels)
    plt.show()

Implementation


In [4]:
waves = obtainMeasures(centroid="all",unlimit=True)
X = np.array(waves)


b'SELECT DISTINCT t_i-q_i AS qt,t_i-s_i AS ts,s_i-r_i AS sr,centroid  FROM waveformqrst  WHERE t_i>s_i  AND t_i-q_i<1000  AND s_i-r_i<1000  AND q_i IS NOT NULL  AND t_i IS NOT NULL  AND s_i IS NOT NULL  AND r_i IS NOT NULL '

In [5]:
centroid = np.array([
[71.1394970414,37.1831854044,31.0994575937]
,[160.9690551506,118.464585339,38.6135332141]
,[256.8278445722,178.5887785328,73.9569611431]
,[170.7113402062,43.2392611684,123.2794243986]
,[380.5647636587,94.7071823204,281.2449355433]
,[415.7864838394,333.9813907933,77.2056807052]
,[731.8383838384,106.9353535354,620.3656565657]
,[728.2561307902,560.3923705722,163.3732970027]
]
)
kmeans = KMeans(n_clusters=8 ,n_init=1,init=centroid
               ).fit(X)

In [6]:
centroid = kmeans.cluster_centers_
print(centroid)
#print(kmeans)


[[  69.6188495315   38.4247657464   28.3880415143]
 [ 160.9432284542  118.29124487     38.7540355677]
 [ 256.804793331   178.3822507815   74.1394581452]
 [ 168.6516410469   42.4929372663  121.9912754466]
 [ 380.370030581    94.5675840979  281.1883792049]
 [ 415.7864838394  333.9813907933   77.2056807052]
 [ 731.8383838384  106.9353535354  620.3656565657]
 [ 728.2561307902  560.3923705722  163.3732970027]]

In [7]:
printKmeans(kmeans)


Update Waves with the K-means


In [8]:
def updateQRST(wacentroid) :
    conn = psycopg2.connect("dbname=mimic")
    cur = conn.cursor()
    select_stament = """
    UPDATE waveformqrst AS t
    SET
        centroid = s.centroid
        FROM unnest(%s) s(tq integer,ts integer,sr integer,centroid unknown)
        WHERE t.t_i-t.q_i = s.tq
        AND t.t_i-t.s_i = s.ts
        AND t.s_i-t.r_i = s.sr;
    """
#    print(cur.mogrify(select_stament, (wacentroid,)))
    cur.execute(select_stament, (wacentroid,))
    conn.commit()
    conn.close()

In [9]:
def fillEmptyCentroids(iterations=1280) :
    waves = obtainMeasures(unlimit=True,centroid=None)
    print("measures obtained:"+str(len(waves)))
    waves = waves[:iterations] if iterations<len(waves) else waves
    numberToAlpha = dict(enumerate(string.ascii_lowercase, 0))
    i = 1
    centro = []
    for wave in waves :
        try:
            centValue = numberToAlpha[kmeans.predict([[wave[0],wave[1],wave[2]]])[0]]
            centro.append((wave[0],wave[1],wave[2],str(centValue)))
        except:
            print("error in value")
        i=i+1
    updateQRST(centro)

In [10]:
fillEmptyCentroids(200)
# 5669 1083284


b'SELECT DISTINCT t_i-q_i AS qt,t_i-s_i AS ts,s_i-r_i AS sr,centroid  FROM waveformqrst  WHERE t_i>s_i  AND t_i-q_i<1000  AND s_i-r_i<1000  AND q_i IS NOT NULL  AND t_i IS NOT NULL  AND s_i IS NOT NULL  AND r_i IS NOT NULL  AND centroid is null'
measures obtained:4688

In [11]:
kmeans.predict([[1,18,14],[43,28,10],[49,37,7],[65,46,14], [26,13,8],[96,72,19]
                ,[247,236,11],[1242,  1136,   101]])


Out[11]:
array([0, 0, 0, 0, 0, 0, 2, 7], dtype=int32)

In [12]:
def centroidColor(clist) :
    color = {
        'a': "blue",
        'b': "red",
        'c': "green",
        'd': "magenta",
        'e': "cyan",
        'f': "yellow",
        'g': "purple", 
        'h': "chocolate",
        None : "maroon"
    }
    return [color[x] for x in clist]

In [13]:
waves = obtainMeasures(unlimit=True,centroid="notNull")
qt = np.array([item[0] for item in waves])
ts = np.array([item[1] for item in waves])
sr = np.array([item[2] for item in waves])
color = np.array([item[3] for item in waves])
qtc = np.array([item[0] for item in centroid])
tsc = np.array([item[1] for item in centroid])
src = np.array([item[2] for item in centroid])
color = centroidColor(color)
fig = plt.figure()
ax = fig.add_subplot(221)
ax.set_title("qt/ts")
ax.set_xlabel("qt")
ax.scatter(qt,ts, c=color)
ax.scatter(qtc,tsc,c='black', marker=">")
ax = fig.add_subplot(222)
ax.set_title("ts/sr")
ax.set_xlabel("ts")
ax.scatter(ts,sr, c=color)
ax.scatter(tsc,src,c='black', marker=">")
ax = fig.add_subplot(223)
ax.set_title("sr/qt")
ax.set_xlabel("sr")
ax.scatter(sr,qt, c=color)
ax.scatter(src,qtc,c='black', marker=">")
plt.show()


b'SELECT DISTINCT t_i-q_i AS qt,t_i-s_i AS ts,s_i-r_i AS sr,centroid  FROM waveformqrst  WHERE t_i>s_i  AND t_i-q_i<1000  AND s_i-r_i<1000  AND q_i IS NOT NULL  AND t_i IS NOT NULL  AND s_i IS NOT NULL  AND r_i IS NOT NULL AND centroid IS NOT NULL'

In [14]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(tsc,qtc,src, c='black',marker="^",s=160)
ax.scatter(ts,qt,sr, c=color)
plt.show()



In [15]:
def findCentroid(centroid,window=0,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    qt = str(centroid[0])
    ts = str(centroid[1])
    sr = str(centroid[2])
    strWin = str(window)
    select_stament = ("SELECT qrst.subject_id,qrst.recorddate,signame,fs,signallength,centroid,wave,"
                      " q_amp,q_i,r_amp,r_i,s_amp,s_i,t_amp,t_i "
                      " FROM waveformqrst qrst "
                      " LEFT JOIN waveformfields fields"
                      " ON qrst.subject_id = fields.subject_id "
                      " AND qrst.recorddate = fields.recorddate "
                      " WHERE "
                      " (t_i-q_i between floor("+qt+")-"+strWin+" and ceil("+qt+")+"+strWin+")"
                      " AND (t_i-s_i between floor("+ts+")-"+strWin+" and ceil("+ts+")+"+strWin+")"
                      " AND (s_i-r_i between floor("+sr+")-"+strWin+" and ceil("+sr+")+"+strWin+")"
                      " LIMIT 1")
    cur.execute(select_stament)
    notFound = True
    for row in cur :
        notFound = False
        patient = "p"+str(row[0]).zfill(6)
        onda = patient+'-'+row[1]
        carpeta = patient[:3]+"/"+patient
        qrst = [[row[7],row[8]],[row[9],row[10]],[row[11],row[12]],[row[13],row[14]]]
#        print(centroid,row,patient,onda,carpeta,window)
        return {"subject_id":row[0],"recorddate":row[1],"fs":row[3],"onda":onda,"carpeta":carpeta,
                      "signalII":row[2].index("II"),"signallength":row[4],"centroid":row[5],
               "wave":row[6],"begin":row[8],"end":row[14],"qrst":qrst}
    conn.close()
    if(notFound) :
        return findCentroid(value,window=window+1)

In [16]:
def updateOriginalWave(subject_id,recorddate,begin,end,originalWave):
    conn = psycopg2.connect("dbname=mimic")
    cur = conn.cursor()
    select_stament = ("UPDATE waveformqrst SET wave = (%s) "
                      "WHERE subject_id = (%s) AND recorddate =(%s) AND q_i = (%s) AND t_i = (%s)")
    cur.execute(select_stament, (originalWave.tolist(),subject_id,recorddate,begin,end,))
    conn.commit()
    conn.close()

In [17]:
def printCentroid(wave) :
    carpeta = wave["carpeta"]
    onda = wave["onda"]
    signalII = wave["signalII"]
    fs = wave["fs"]
    subject_id = wave["subject_id"]
    recorddate = wave["recorddate"]
    begin = wave["begin"]
    end = wave["end"]
    originalWave = wave["wave"]
    qrst = wave["qrst"]
    amp =[item[0] for item in qrst]
    time = [item[1]-int(begin) for item in qrst]
    if originalWave is None :
        print("searching new Centroid")
        signal = wfdb.rdsamp(onda,pbdir='mimic3wdb/matched/'+carpeta,channels =[signalII]).p_signals
        signalNan = signal[~np.isnan(signal)]
        subsignal = signalNan[-1800000:]
        originalWave = subsignal[begin:begin+100]
        updateOriginalWave(subject_id,recorddate,begin,end,originalWave)
    cen = wave["centroid"] if wave["centroid"] is not None else "z"
    ax.set_title(cen+" subject:"+str(subject_id))
    ax.plot(originalWave[:100])

In [18]:
fig = plt.figure()
inx= 1
for value in centroid :
    print(value)
    ax = fig.add_subplot(3,3,inx)
    wave = findCentroid(value)
    printCentroid(wave)
    inx=inx+1
plt.show()


[ 69.6188495315  38.4247657464  28.3880415143]
searching new Centroid
[ 160.9432284542  118.29124487     38.7540355677]
[ 256.804793331   178.3822507815   74.1394581452]
[ 168.6516410469   42.4929372663  121.9912754466]
[ 380.370030581    94.5675840979  281.1883792049]
[ 415.7864838394  333.9813907933   77.2056807052]
[ 731.8383838384  106.9353535354  620.3656565657]
[ 728.2561307902  560.3923705722  163.3732970027]

In [19]:
centroid


Out[19]:
array([[  69.6188495315,   38.4247657464,   28.3880415143],
       [ 160.9432284542,  118.29124487  ,   38.7540355677],
       [ 256.804793331 ,  178.3822507815,   74.1394581452],
       [ 168.6516410469,   42.4929372663,  121.9912754466],
       [ 380.370030581 ,   94.5675840979,  281.1883792049],
       [ 415.7864838394,  333.9813907933,   77.2056807052],
       [ 731.8383838384,  106.9353535354,  620.3656565657],
       [ 728.2561307902,  560.3923705722,  163.3732970027]])

In [ ]: