In [1]:
import seaborn as sns
import numpy as np
import pandas as pd
import os
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn import linear_model

%matplotlib inline

In [24]:
path = "/docs/cancer/blood"
path = "/home/oier/GDrive/researchoier@gmail.com/blood/csv/"
output = "/home/oier/Desktop"
file = "Blood Samples.csv"
file = os.path.join(path,file)
data = pd.read_csv(file, sep =";")
data["Fecha"] = [ datetime.strptime(i, "%m/%d/%Y")  for i in data["Fecha"] ]
data = data.sort(columns="Fecha")

In [25]:
#choose a number
[(k,i) for k,i in enumerate(data.columns)]


Out[25]:
[(0, 'Fecha'),
 (1, 'Diagnostico'),
 (2, 'Servicio'),
 (3, 'Medico'),
 (4, 'glucosa (mg/dL)'),
 (5, 'urea (mg/dl)'),
 (6, 'creatinina (mg/dL)'),
 (7, 'acido urico (mg/dl)'),
 (8, 'Filtrado Glomerular estimado (MDRD-4 IDMS) (mL/ min/ 1.73m3)'),
 (9, 'Trigliceridos (mg/dL)'),
 (10, 'colesterol (mg/dL)'),
 (11, 'hdl-colesterol (mg/dL)'),
 (12, 'ldl-colesterol (mg/dL)'),
 (13, 'sodio(mEq/L)'),
 (14, 'potasio(mEq/L)'),
 (15, 'cloro(mEq/L)'),
 (16, 'calcio (mg/dl)'),
 (17, 'PCR (mg/L)'),
 (18, 'fosforo (mg/dl)'),
 (19, 'proteinas (g/dl)'),
 (20, 'albumia (g/dl)'),
 (21, 'alb/glob '),
 (22, 'alb/prot'),
 (23, 'ast/got (U/L)'),
 (24, 'alt/gpt (U/L)'),
 (25, 'gamma-gt (U/L)'),
 (26, 'fosf. Alcalina (U/L)'),
 (27, 'bilirrubina total (mg/dL)'),
 (28, 'CO2 total (mmol/L)'),
 (29, 'Lactato (mg/dL)'),
 (30, 'LDH (U/L)'),
 (31, 'amilasa (U/L)'),
 (32, 'hierro (microg/dl)'),
 (33, 'cap. Fij. Hierro (microg/dl)'),
 (34, 'transferrina (g/l)'),
 (35, 'saturac. Transf (%)'),
 (36, 'ferritina (ng/ml)'),
 (37, 'albumina(%)'),
 (38, 'alfa-1 (%)'),
 (39, 'alfa-2 (%)'),
 (40, 'beta (%)'),
 (41, 'Gamma (%)'),
 (42, 'A/G '),
 (43, 'albumina (g/dl)'),
 (44, 'alfa 1 (g/dl)'),
 (45, 'alfa 2 (g/dl)'),
 (46, 'beta (g/dl)'),
 (47, 'gamma (g/dl)'),
 (48, 'alfa-1-antitripsina (g/L)'),
 (49, 'nº leucocitos (miles)'),
 (50, 'neutrofilos (%)'),
 (51, 'linfocitos (%)'),
 (52, 'monocitos (%)'),
 (53, 'eosinofilos (%)'),
 (54, 'basofilos (%)'),
 (55, 'nº hematies (millones)'),
 (56, 'hematocrito (%)'),
 (57, 'Granulocitos inmad (%)'),
 (58, 'Eritroblastos (%)'),
 (59, 'hemoglobina(g/dl)'),
 (60, 'VCM (fl)'),
 (61, 'nº plaquetas (miles)'),
 (62, 'neutrofilos (miles)'),
 (63, 'linfocitos (miles)'),
 (64, 'monocitos (miles)'),
 (65, 'eosinofilos (miles)'),
 (66, 'basofilos (miles)'),
 (67, 'Granulocitos inmad (miles)'),
 (68, 'Eritroblastos (miles)'),
 (69, 'HCM (pg)'),
 (70, 'CHCM (g/dl)'),
 (71, 'ADE (%)'),
 (72, 'VPM (fl)'),
 (73, 'VSG 1º  Hora (mm)'),
 (74, 'Reticulocitos (%)'),
 (75, 'TSH (microUl/ml)'),
 (76, 'T4 Libre (ng/dl)'),
 (77, 'FSH (Ul/L)'),
 (78, 'LH(Ul/L)'),
 (79, 'PSA TOTAL (ng/ml)'),
 (80, '17-Beta-Estradiol (pg/ml)'),
 (81, 'prolactina (ng(ml)'),
 (82, 'Testosterona total (ng/ml)'),
 (83, 'Testosterona libre calculada (pg/ml)'),
 (84, 'Vitamina D (ng/mL)'),
 (85, 'dehidroepiandrosterona sulfato (microg/mL)'),
 (86, 'cortisol (microg/dl)'),
 (87, 'sex hormone binding globulin (nmol/L)'),
 (88, 'Antitransglutamin (U/mL)'),
 (89, 'beta-hcg(mUl/L)'),
 (90, 'aPTT (seg)'),
 (91, 'Tiempo protombina (%)'),
 (92, 'INR'),
 (93, 'ratio TTPA'),
 (94, 'Fibrinogeno (d) (mg/dl)'),
 (95, 'Alfa-fetoproteina (U/ml)'),
 (96, 'CEA (ng/ml)'),
 (97, 'Ca 19.9 (U/ml)'),
 (98, 'ceruloplasmina (g/l)'),
 (99, 'Test COOMBS Directo'),
 (100, 'haptoglobina (g/l)'),
 (101, 'vitamina b12 (pg/ml)'),
 (102, 'folatos (ng/ml)'),
 (103, 'pH '),
 (104, 'pO2 (mmHg)'),
 (105, 'pCO2 (mmHg)'),
 (106, 'Bicarbonato (mmol/L)'),
 (107, 'Exceso de bases (mmol/L)'),
 (108, 'Saturacion de O2 (%)'),
 (109, 'FIO2'),
 (110, 'proteina C (mg/L)'),
 (111, 'Notas')]

In [45]:
[(k,i) for k,i in enumerate(data["Fecha"])]


Out[45]:
[(0, datetime.datetime(2009, 12, 2, 0, 0)),
 (1, datetime.datetime(2009, 12, 3, 0, 0)),
 (2, datetime.datetime(2010, 4, 9, 0, 0)),
 (3, datetime.datetime(2010, 4, 12, 0, 0)),
 (4, datetime.datetime(2010, 6, 22, 0, 0)),
 (5, datetime.datetime(2010, 6, 23, 0, 0)),
 (6, datetime.datetime(2010, 8, 23, 0, 0)),
 (7, datetime.datetime(2011, 6, 28, 0, 0)),
 (8, datetime.datetime(2011, 6, 29, 0, 0)),
 (9, datetime.datetime(2011, 6, 30, 0, 0)),
 (10, datetime.datetime(2011, 7, 7, 0, 0)),
 (11, datetime.datetime(2011, 7, 8, 0, 0)),
 (12, datetime.datetime(2011, 8, 2, 0, 0)),
 (13, datetime.datetime(2011, 8, 22, 0, 0)),
 (14, datetime.datetime(2011, 8, 23, 0, 0)),
 (15, datetime.datetime(2011, 8, 30, 0, 0)),
 (16, datetime.datetime(2011, 8, 30, 0, 0)),
 (17, datetime.datetime(2011, 8, 31, 0, 0)),
 (18, datetime.datetime(2011, 12, 22, 0, 0)),
 (19, datetime.datetime(2012, 2, 3, 0, 0)),
 (20, datetime.datetime(2012, 5, 28, 0, 0)),
 (21, datetime.datetime(2012, 6, 22, 0, 0)),
 (22, datetime.datetime(2012, 7, 19, 0, 0)),
 (23, datetime.datetime(2012, 10, 1, 0, 0)),
 (24, datetime.datetime(2012, 12, 20, 0, 0)),
 (25, datetime.datetime(2013, 1, 30, 0, 0)),
 (26, datetime.datetime(2013, 6, 19, 0, 0)),
 (27, datetime.datetime(2013, 7, 5, 0, 0)),
 (28, datetime.datetime(2013, 9, 18, 0, 0)),
 (29, datetime.datetime(2013, 10, 21, 0, 0)),
 (30, datetime.datetime(2015, 4, 6, 0, 0))]

In [4]:
colname = data.columns[96]

In [5]:
m = data[colname]
mask = (m.isnull() == False)

y = data[colname][mask]
x = data["Fecha"][mask]

In [6]:
#linear model
clf = linear_model.LinearRegression()
X = np.arange(len(x)).reshape(len(x),1)
Y = y.reshape(len(y),1)
line = clf.fit( X , Y ).predict(X)

In [7]:
plt.plot(y.values)
plt.plot(np.arange(len(x)), line, c="r")
plt.title(colname)
labels = [ i.strftime("%Y/%m/%d") for i in x ]
plt.xticks(range(y.size), labels, rotation=90)
plt.show()



In [8]:
plt.boxplot(y.values[:-1])
plt.title(colname)
plt.axhline(y.values[-1], color="g")
plt.show()



In [9]:
plt.violinplot(y.values[:-1])
plt.title(colname)
plt.axhline(y.values[-1], color="g")
plt.show()



In [10]:
print (x)
print (y)


14   2011-06-29
23   2011-06-30
13   2011-07-07
24   2011-07-08
11   2011-12-22
9    2012-05-28
5    2012-12-20
2    2013-07-05
1    2013-09-18
Name: Fecha, dtype: datetime64[ns]
14    1.6
23    1.6
13    1.9
24    1.9
11    2.5
9     2.2
5     4.4
2     4.2
1     4.8
Name: CEA (ng/ml), dtype: float64

In [50]:
folder = "all_images"
subfolder = "boxplots"
out_path = os.path.join(output, folder, subfolder)

select = 30
print(out_path)

for i in np.arange(4,110,1):

    if( i in [8, 18, 28]):
       continue
    try:
        colname = data.columns[i]
        plt.close()
        #fl = "".join(colname, ".png")
        #out_plotname = os.path.join(out_path, fl)        
        
        m = data[colname]
        
        ax = m[select]
        if ax == None:
            continue
                    
        mask = (m.isnull() == False)
        y = data[colname][mask]
        x = data["Fecha"][mask]
        plt.boxplot(y.values[:-1])
        plt.title(colname)
        plt.axhline(ax, color="g")
        #plt.show()
        
        #save png
        plt.savefig("{}/{}_{}.png".format(out_path,colname,data["Fecha"][select]), bbox_inches='tight')
        
        plt.close()
    except:
        pass


/home/oier/Desktop/all_images/boxplots

In [51]:
folder = "all_images"
subfolder = "violins"
out_path = os.path.join(output, folder)
out_path = os.path.join(out_path, subfolder)
print(out_path)

select = 30

for i in np.arange(4,110,1):
    try:
        
            
        plt.close()
        colname = data.columns[i]
        m = data[colname]
        
        ax = m[select]
        if ax == None:
            continue
        
        
        mask = (m.isnull() == False)
        y = data[colname][mask]
        x = data["Fecha"][mask]
        plt.violinplot(y.values[:-1])
        plt.title(colname)
        
        plt.axhline(y.values[-1], color="g")
        
        plt.savefig("{}/{}_{}.png".format(out_path,colname,data["Fecha"][select]), bbox_inches='tight')
        #plt.show()
        plt.close()
    except:
        pass


/home/oier/Desktop/all_images/violins

In [43]:
folder = "all_images"
subfolder = "history"
out_path = os.path.join(output, folder)
out_path = os.path.join(out_path, subfolder)
print(out_path)

for i in np.arange(4,110,1):
    try:
        plt.close()
        
        colname = data.columns[i]
        m = data[colname]
        
        ax = m[select]
        if ax == None:
            continue
            
        mask = (m.isnull() == False)
        y = data[colname][mask]
        x = data["Fecha"][mask]
        
        #linear model
        clf = linear_model.LinearRegression()
        X = np.arange(len(x)).reshape(len(x),1)
        Y = y.reshape(len(y),1)
        line = clf.fit( X , Y ).predict(X)
        
        
        plt.plot(y.values)
        plt.plot(np.arange(len(x)), line, c="r")
        plt.title(colname)
        labels = [ i.strftime("%Y/%m/%d") for i in x ]
        plt.xticks(range(y.size), labels, rotation=90)
        #plt.show()
        plt.savefig("{}/{}_{}.png".format(out_path,colname,data["Fecha"][select]), bbox_inches='tight')
        
        plt.close()
    except:
        pass


/home/oier/Desktop/all_images/history

In [ ]: