In [2]:
%matplotlib inline
import pandas as pd
import seaborn as sns
from collections import Counter
from pmareport import pmareport
import numpy as np
import matplotlib.pyplot as plt
import calendar

In [3]:
clinic = pmareport.Clinic()
df = clinic.df

In [4]:
df.head()


Out[4]:
PATIENT_ID PATIENT_CONDITION INSURANCE_TYPE AGE SEX PROVIDER_NAME date sched start end ... month num_appts appt_pos_overall appt_pos_doctor cond schedd startd endd arrived since_prev_sched
0 4591553 Uncommon Cold BlueSword/BlueDiamond 31 M L. Jedermann 2005-01-03 1900-01-01 07:59:00 1900-01-01 08:07:00 1900-01-01 08:22:00 ... 1 16 0 0 uc 479 487 502 475 NaN
1 4607440 TV Personality Disorder Medifix 48 F I. Petrov 2005-01-03 1900-01-01 08:15:00 1900-01-01 08:18:00 1900-01-01 08:53:00 ... 1 16 1 0 tvp 495 498 533 495 NaN
2 4590430 TV Personality Disorder Medifix 42 M E. Medelsvensson 2005-01-03 1900-01-01 08:30:00 1900-01-01 08:42:00 1900-01-01 09:17:00 ... 1 16 2 0 tvp 510 522 557 506 NaN
3 4619481 Saturday Night Fever Self Insured 22 M J. Smith 2005-01-03 1900-01-01 09:00:00 1900-01-01 09:11:00 1900-01-01 09:39:00 ... 1 16 3 0 snf 540 551 579 543 NaN
4 4610862 Uncommon Cold Self Insured 32 M L. Jedermann 2005-01-03 1900-01-01 10:00:00 1900-01-01 10:00:00 1900-01-01 10:16:00 ... 1 16 4 1 uc 600 600 616 596 121

5 rows × 24 columns


In [5]:
# Interesting plot
df.AGE.hist(bins=70)


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3f3cdf5dd0>

In [35]:
# Nmber of appointments plot
df_month_doc = df.groupby(['month', 'PROVIDER_NAME'])
df_month_doc_cnt = df_month_doc.count()
for i in list(set(df[df.cond == 'tvp'].PROVIDER_NAME)):
    #plt.clf()
    data = df_month_doc_cnt.xs(i, level='PROVIDER_NAME')
    plt.plot(data.index, data.PATIENT_ID, label=i, linewidth=3)
plt.xticks(xrange(0, 13), calendar.month_abbr, size=13)
plt.xlim(xmin=1)
plt.yticks(size=13)
plt.title('Number of appointments per month by doctor', size=16)
plt.legend(fontsize=13)
plt.savefig('appts_per_month_doc_tpd2.png', bbox='tight', dpi=300)



In [7]:
df[df.cond == 'tvp'].groupby(['PROVIDER_NAME', 'month']).count()


Out[7]:
PATIENT_ID PATIENT_CONDITION INSURANCE_TYPE AGE SEX date sched start end arrive ... appt_time num_appts appt_pos_overall appt_pos_doctor cond schedd startd endd arrived since_prev_sched
PROVIDER_NAME month
E. Medelsvensson 1 47 47 47 47 47 47 47 47 47 47 ... 47 47 47 47 47 47 47 47 47 26
2 55 55 55 55 55 55 55 55 55 55 ... 55 55 55 55 55 55 55 55 55 31
3 54 54 54 54 54 54 54 54 54 54 ... 54 54 54 54 54 54 54 54 54 31
4 65 65 65 65 65 65 65 65 65 65 ... 65 65 65 65 65 65 65 65 65 41
5 71 71 71 71 71 71 71 71 71 71 ... 71 71 71 71 71 71 71 71 71 51
6 54 54 54 54 54 54 54 54 54 54 ... 54 54 54 54 54 54 54 54 54 33
7 85 85 85 85 85 85 85 85 85 85 ... 85 85 85 85 85 85 85 85 85 63
8 60 60 60 60 60 60 60 60 60 60 ... 60 60 60 60 60 60 60 60 60 40
9 35 35 35 35 35 35 35 35 35 35 ... 35 35 35 35 35 35 35 35 35 15
10 48 48 48 48 48 48 48 48 48 48 ... 48 48 48 48 48 48 48 48 48 30
11 47 47 47 47 47 47 47 47 47 47 ... 47 47 47 47 47 47 47 47 47 26
12 32 32 32 32 32 32 32 32 32 32 ... 32 32 32 32 32 32 32 32 32 18
I. Petrov 1 40 40 40 40 40 40 40 40 40 40 ... 40 40 40 40 40 40 40 40 40 21
2 43 43 43 43 43 43 43 43 43 43 ... 43 43 43 43 43 43 43 43 43 24
3 57 57 57 57 57 57 57 57 57 57 ... 57 57 57 57 57 57 57 57 57 33
4 47 47 47 47 47 47 47 47 47 47 ... 47 47 47 47 47 47 47 47 47 28
5 78 78 78 78 78 78 78 78 78 78 ... 78 78 78 78 78 78 78 78 78 55
6 54 54 54 54 54 54 54 54 54 54 ... 54 54 54 54 54 54 54 54 54 33
N. Fulano 1 32 32 32 32 32 32 32 32 32 32 ... 32 32 32 32 32 32 32 32 32 13
2 56 56 56 56 56 56 56 56 56 56 ... 56 56 56 56 56 56 56 56 56 34
3 62 62 62 62 62 62 62 62 62 62 ... 62 62 62 62 62 62 62 62 62 39
4 69 69 69 69 69 69 69 69 69 69 ... 69 69 69 69 69 69 69 69 69 45
5 72 72 72 72 72 72 72 72 72 72 ... 72 72 72 72 72 72 72 72 72 50
6 67 67 67 67 67 67 67 67 67 67 ... 67 67 67 67 67 67 67 67 67 44
7 93 93 93 93 93 93 93 93 93 93 ... 93 93 93 93 93 93 93 93 93 71
8 40 40 40 40 40 40 40 40 40 40 ... 40 40 40 40 40 40 40 40 40 20
9 38 38 38 38 38 38 38 38 38 38 ... 38 38 38 38 38 38 38 38 38 18
10 38 38 38 38 38 38 38 38 38 38 ... 38 38 38 38 38 38 38 38 38 22
11 41 41 41 41 41 41 41 41 41 41 ... 41 41 41 41 41 41 41 41 41 26
12 28 28 28 28 28 28 28 28 28 28 ... 28 28 28 28 28 28 28 28 28 15

30 rows × 22 columns


In [8]:
def change(a, b):
    return (b-a)/float(a)

In [9]:
change(54, 85), change(67, 93)


Out[9]:
(0.5740740740740741, 0.3880597014925373)

In [10]:
set(df.PROVIDER_NAME)


Out[10]:
{'C. Siu Ming',
 'E. Medelsvensson',
 'I. Petrov',
 'J. Smith',
 'K. van der Merwe',
 'L. Jedermann',
 'M. Dupont',
 'N. Fulano',
 'W. Plinge'}

In [11]:
df.groupby(['PROVIDER_NAME', 'PATIENT_CONDITION']).count().PATIENT_ID


Out[11]:
PROVIDER_NAME     PATIENT_CONDITION      
C. Siu Ming       Uncommon Cold              461
E. Medelsvensson  TV Personality Disorder    653
I. Petrov         TV Personality Disorder    319
J. Smith          Saturday Night Fever       903
K. van der Merwe  Saturday Night Fever       871
L. Jedermann      Uncommon Cold              481
M. Dupont         Uncommon Cold              492
N. Fulano         TV Personality Disorder    636
W. Plinge         Saturday Night Fever       890
Name: PATIENT_ID, dtype: int64

In [15]:
# number of appointments by condition plot
df_month_cond = df.groupby(['month', 'PATIENT_CONDITION'])
df_month_cond_cnt = df_month_cond.count()
for i in set(df.PATIENT_CONDITION):
    #plt.clf()
    data = df_month_cond_cnt.xs(i, level='PATIENT_CONDITION')
    plt.plot(xrange(1,13), data.PATIENT_ID, label=i)
    plt.xticks(xrange(0, 13), calendar.month_abbr)
    plt.xlim(xmin=1)
plt.title('Number of appointments per month by condition')
plt.legend()
plt.savefig('appts_per_month_cond2.png', bbox='tight', dpi=200)



In [ ]:


In [29]:
# Average delay per month by condition
df_month_cond_mean = df.groupby(['month', 'PATIENT_CONDITION']).mean()
for i in ['Uncommon Cold', 'TV Personality Disorder', 'Saturday Night Fever']:
    #plt.clf()
    data = df_month_cond_mean.xs(i, level='PATIENT_CONDITION')
    plt.plot(data.index, data.delay, label=i, linewidth=3)
    plt.xticks(xrange(0, 13), calendar.month_abbr, size=13)
    plt.yticks(size=13)
    plt.xlim(xmin=1)
plt.title('Average patient wait time per month by condition', fontsize=16)
plt.ylabel('min', fontsize=13)
plt.legend(loc=2, fontsize=13)
plt.savefig('delay_per_month_cond.png', bbox='tight', dpi=300)



In [ ]: