In [1]:
%matplotlib inline
import pandas as pd
import seaborn as sns
from collections import Counter
from pmareport import pmareport
import numpy as np
import matplotlib.pyplot as plt
In [2]:
clinic = pmareport.Clinic()
clinic.drop_redundant()
In [7]:
df = clinic.df
In [24]:
def time_to_decimal(ser):
return ser.dt.hour + ser.dt.minute/60.0
In [27]:
df['schedd'] = time_to_decimal(df.sched)
df['startd'] = time_to_decimal(df.start)
df['endd'] = time_to_decimal(df.end)
df['arrived'] = time_to_decimal(df.arrive)
In [28]:
df.head()
Out[28]:
In [33]:
clinic.make_pairplot(pair_vars=['arrive', 'appt_pos_overall', 'appt_pos_doctor'])
Out[33]:
In [13]:
df.appt_pos_doctor.max()
Out[13]:
In [15]:
set(df.date.dt.year)
Out[15]:
In [ ]:
In [17]:
def get_appt_pos2(pid, doc=False):
appt_row = df[df.PATIENT_ID == pid]
day = appt_row.date.iloc[0]
start_time = appt_row.start.iloc[0]
if doc:
doctor = appt_row.PROVIDER_NAME.iloc[0]
appts_that_day = df[(df.date == day) & (df.PROVIDER_NAME == doctor)]
else:
appts_that_day = df[df.date == day]
start_times = appts_that_day.groupby('start').start.max()
appt_pos = list(start_times).index(start_time)
return appt_pos
In [98]:
def get_appt_pos_doctor(pid):
appt_row = df[df.PATIENT_ID == pid]
day = appt_row.date.iloc[0]
sched_time = appt_row.sched.iloc[0]
doctor = appt_row.PROVIDER_NAME.iloc[0]
appts_that_day = df[(df.date == day) & (df.PROVIDER_NAME == doctor)]
sched_times = appts_that_day.groupby('sched').sched.max()
appt_pos_overall = list(sched_times).index(sched_time)
return appt_pos_overall
In [112]:
df['appt_pos_verall'] = df.PATIENT_ID.apply(get_appt_pos)
In [18]:
df['appt_pos_start'] = df.PATIENT_ID.apply(lambda x : get_appt_pos2(x, doc=True))
In [44]:
df_incon = df[df.appt_pos_doctor != df.appt_pos_start]
In [45]:
len(df_incon)
Out[45]:
In [26]:
g = sns.FacetGrid(data=df, hue='PATIENT_CONDITION', size=4)
g = g.map(plt.scatter, 'appt_pos_start', 'appt_pos_doctor', edgecolor='w')
g.add_legend(fontsize=10, markerscale=2)
Out[26]:
In [46]:
row = df_incon.iloc[0]
day = row.date
doctor = row.PROVIDER_NAME
In [47]:
df[(df.PROVIDER_NAME == doctor) & (df.date == day)]
Out[47]:
In [4]:
pair_vars=[
'AGE',
'delay',
'appt_time',
'month',
'appt_pos_overall',
'appt_pos_doctor'
]
hue='PATIENT_CONDITION'
In [6]:
g = sns.PairGrid(
data=df,
vars=pair_vars,
hue=hue
)
g = g.map_diag(plt.hist, edgecolor="w")
g = g.map_offdiag(plt.scatter, edgecolor='w')
g.add_legend(fontsize=20, markerscale=2)
# g.savefig('appt_pos_pairgrid.png', dpi=300)
In [102]:
for i in df.PATIENT_ID[:10]:
print get_appt_pos_doctor(i), df[df.PATIENT_ID == i].PROVIDER_NAME.iloc[0]
In [38]:
df.columns
Out[38]:
In [41]:
def drop_redundant(
cols=[
'VISIT_DATE',
'PT_SCHEDULED_APPT',
'PT_ARRIVE_TIME',
'PT_START_TIME',
'PT_END_TIME'
]
):
for col in cols:
df.drop(col, axis=1, inplace=True)
In [42]:
drop_redundant()
In [43]:
df.head()
Out[43]:
In [ ]: