In [1]:
%matplotlib inline
import pandas as pd
import seaborn as sns
from collections import Counter
from pmareport import pmareport
import numpy as np
import matplotlib.pyplot as plt
import calendar
In [2]:
clinic = pmareport.Clinic()
df = clinic.df
In [3]:
df.head()
Out[3]:
In [ ]:
In [221]:
df['sex'] = df.SEX.apply(lambda x : 0 if x == 'M' else 1)
In [223]:
clinic.make_pairplot(pair_vars=['appt_pos_doctor', 'sex', 'delay', 'AGE', 'appt_time'], hue='PATIENT_CONDITION')
Out[223]:
In [ ]:
In [226]:
g = sns.FacetGrid(data=df, hue='PATIENT_CONDITION', col='PROVIDER_NAME', row='SEX', size=5, aspect=1)
g = g.map(plt.scatter, 'appt_time', 'AGE', edgecolor='w')
g.add_legend(fontsize=15, markerscale=3)
g.savefig('doctor_sex.png', dpi=300)
In [ ]:
In [6]:
g = sns.FacetGrid(data=df, hue='PATIENT_CONDITION', col='PROVIDER_NAME', size=5, aspect=1, col_wrap=3)
g = g.map(plt.scatter, 'AGE', 'appt_time', edgecolor='w')
g.add_legend(fontsize=15, markerscale=3)
# g.savefig('age_appt_time_by_doctor_by_month.png')
Out[6]:
In [86]:
df_date_doctor = df.groupby(['date', 'PROVIDER_NAME'])
df_date_doctor_count = df_date_doctor.count()
for i in set(df.PROVIDER_NAME):
plt.clf()
df_date_doctor_count.xs(i, level='PROVIDER_NAME').PATIENT_ID.plot()
plt.title(i)
# plt.savefig('doc_appts_{}.png'.format(i), dpi=300)
In [4]:
df_month_cond = df.groupby(['month', 'PATIENT_CONDITION'])
df_month_cond_cnt = df_month_cond.count()
for i in set(df.PATIENT_CONDITION):
#plt.clf()
data = df_month_cond_cnt.xs(i, level='PATIENT_CONDITION')
plt.plot(xrange(1,13), data.PATIENT_ID, label=i)
plt.xticks(xrange(0, 13), calendar.month_abbr)
plt.xlim(xmin=1)
plt.title('Number of appointments per month by condition')
plt.legend()
plt.savefig('appts_per_month_cond3.png', dpi=300)
In [77]:
df_month_doc = df.groupby(['month', 'PROVIDER_NAME'])
df_month_doc_cnt = df_month_doc.count()
for i in list(set(df.PROVIDER_NAME)):
#plt.clf()
data = df_month_doc_cnt.xs(i, level='PROVIDER_NAME')
plt.plot(data.index, data.PATIENT_ID, label=i)
plt.xticks(xrange(0, 13), calendar.month_abbr)
plt.xlim(xmin=1)
plt.title('Number of appointments per month by doctor')
plt.legend()
plt.savefig('appts_per_month_doc.png', dpi=300)
In [213]:
df_month_cond_mean = df_month_cond.mean()
for i in set(df.PATIENT_CONDITION):
#plt.clf()
data = df_month_cond_mean.xs(i, level='PATIENT_CONDITION')
plt.plot(xrange(1,13), data.appt_time, label=i)
plt.xticks(xrange(0, 13), calendar.month_abbr)
plt.xlim(xmin=1)
plt.title('Average appointment time per month by condition')
plt.ylabel('min')
plt.legend()
# plt.savefig('appt_time_per_month_cond.png', dpi=300)
In [123]:
df_month_doc_mean = df_month_doc.mean()
for i in set(df.PROVIDER_NAME):
#plt.clf()
data = df_month_doc_mean.xs(i, level='PROVIDER_NAME')
plt.plot(data.index, data.delay, label=i)
plt.xticks(xrange(0, 13), calendar.month_abbr)
plt.xlim(xmin=1)
plt.title('Average delay per month by doctor')
plt.ylabel('min')
plt.legend()
plt.savefig('delay_per_month_doc.png', dpi=300)
In [25]:
for i in list(set(df.PROVIDER_NAME)):
#plt.clf()
data = df_month_doc_cnt.xs(i, level='PROVIDER_NAME')
plt.plot(data.index, data.PATIENT_ID, label=i)
plt.xticks(xrange(0, 13), calendar.month_abbr)
plt.xlim(xmin=1)
plt.title('Number of appointments per month by doctor')
plt.legend()
plt.savefig('appts_per_month_doc.png', dpi=300)
In [88]:
a = df.groupby(['date', 'PROVIDER_NAME']).count()
In [124]:
for i in set(df.PROVIDER_NAME):
b = a.xs(i, level='PROVIDER_NAME')
c = b.groupby(b.index.month).PATIENT_ID.mean()
plt.plot(c.index, c, label=i)
plt.xticks(xrange(0, 13), calendar.month_abbr)
plt.xlim(xmin=1)
plt.legend()
Out[124]:
In [ ]:
In [99]:
for i in list(set(df.PROVIDER_NAME)):
#plt.clf()
data = df_month_doc_cnt.xs(i, level='PROVIDER_NAME')
plt.plot(data.index, data.PATIENT_ID, label=i)
plt.xticks(xrange(0, 13), calendar.month_abbr)
plt.xlim(xmin=1)
Out[99]:
In [37]:
import calendar
calendar.month_abbr[1]
Out[37]:
In [76]:
p = df_date_doctor_count.loc[(slice(None), df.PROVIDER_NAME[0]),:].PATIENT_ID
In [79]:
i = p.index
In [82]:
i.droplevel(1)
Out[82]:
In [114]:
for i in set(df.PROVIDER_NAME):
p2 = df_date_doctor_count.xs(i, level='PROVIDER_NAME').PATIENT_ID
print '{}\t{:.2f}\t{:.2f}'.format(i[:10], p2.mean(), p2.std())
In [205]:
df[(df.PROVIDER_NAME == 'M. Dupont') & (df.date == df.date.iloc[0])]
Out[205]:
In [ ]:
In [257]:
df_month_doc_mean = df.groupby(['month', 'PROVIDER_NAME']).mean()
for i in set(df.PROVIDER_NAME):
#plt.clf()
data = df_month_doc_mean.xs(i, level='PROVIDER_NAME')
plt.plot(data.index, data.since_prev_sched, label=i)
plt.xticks(xrange(0, 13), calendar.month_abbr)
plt.xlim(xmin=1)
plt.title('Average time between appointments (scheduled time)')
plt.ylabel('hrs')
plt.legend()
# plt.savefig('delay_per_month_doc.png', dpi=300)
Out[257]:
In [232]:
set(df.PATIENT_CONDITION)
Out[232]:
In [233]:
In [267]:
len(df[df.since_prev_sched < 5.0/60])
Out[267]:
In [269]:
len(df[(df.since_prev_sched < 16.0/60) & (df.cond == 'uc')])
Out[269]:
In [212]:
df.describe()
Out[212]:
In [161]:
In [164]:
df.head()
Out[164]:
In [255]:
In [256]:
In [ ]:
df.groupby()
In [136]:
for i in set(df.PROVIDER_NAME):
plt.clf()
s = df_date_doctor.start.min().xs(i, level='PROVIDER_NAME').dt
(s.hour + s.minute/60.0).plot()
e = df_date_doctor.end.max().xs(i, level='PROVIDER_NAME').dt
(e.hour + e.minute/60.0).plot()
plt.savefig('start_end_{}.png'.format(i), dpi=300)
In [137]:
for i in set(df.PROVIDER_NAME):
plt.clf()
s = df_date_doctor.start.min().xs(i, level='PROVIDER_NAME').dt
e = df_date_doctor.end.max().xs(i, level='PROVIDER_NAME').dt
((e.hour + e.minute/60.0) - (s.hour + s.minute/60.0)).plot()
plt.savefig('length_of_day_{}.png'.format(i), dpi=300)
In [133]:
appt_counts = df.groupby(['date', 'sched', 'PROVIDER_NAME']).count()
In [137]:
appt_counts[appt_counts.PATIENT_ID == 3]
Out[137]:
In [214]:
df.columns
Out[214]:
In [ ]:
In [ ]: