In [259]:
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
In [260]:
df = pd.read_csv('ashy.csv')
In [261]:
df.head()
Out[261]:
In [262]:
df.dtypes
Out[262]:
In [263]:
ex_range_tests = ['BUN', 'Phosphorus (PHOS)', 'Creatinine (CREA)', 'Cholesterol (CHOL)', 'Calcium (CA)', 'Hematocrit (HCT)', 'MCV']
In [264]:
ex_range_df = df.loc[df['Lab Test'].isin(ex_range_tests)]
In [265]:
ex_range_df.loc[:,'Range Max'] = ex_range_df.loc[:,'Lab Range']
In [266]:
ex_range_df = ex_range_df.replace(to_replace={ 'Range Max' : {"\d*.\d*\s-\s" : ''} }, regex=True)
In [267]:
ex_range_df = ex_range_df.replace(to_replace={ 'Lab Results' : {"\s.*" : ''}, 'Range Max' : {"\s.*" : ''} }, regex=True)
In [268]:
ex_range_df['Lab Results'] = ex_range_df['Lab Results'].astype('float')
Questions: hyperthyroidism? anaemia? high cholesterol
In [269]:
ex_range_df['Date'] = pd.to_datetime(ex_range_df['Date'])
In [270]:
ex_range_df = ex_range_df.sort('Date')
In [271]:
ex_range_df.head()
Out[271]:
In [272]:
for test in ex_range_tests:
test_df = ex_range_df[ex_range_df['Lab Test'] == test][['Date', 'Lab Results', 'Range Max']]
plt.axhline(y=max(test_df['Range Max']), color='r')
plt.plot(test_df['Date'], test_df['Lab Results'])
plt.title(test)
plt.show()