In [1]:
# Set up autoreloading of modules so that I can debug code in external files
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import matplotlib as mp
import matplotlib.pyplot as plt

import bydatetime
import hillpylib as hm
from pandas import Timestamp

# Let's check what version of pandas, numpy and matplotlib we are using
print ("pandas version ", pd.__version__)
print ("numpy version ", np.version.version)
print ("matplotlib version ", mp.__version__)


pandas version  0.16.2
numpy version  1.9.2
matplotlib version  1.4.3

Put it all together

Below I've strung together all the pieces to do an entire Hillmaker run. Change inputs as needed (e.g. scenario_name and associated parameter values) and run all the cells below. You can skip rereading the main input file if that isn't changing.

Read main stop data file


In [3]:
file_stopdata = 'data/ShortStay.csv'
df = pd.read_csv(file_stopdata, parse_dates=['InRoomTS','OutRoomTS'])
df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 59877 entries, 0 to 59876
Data columns (total 4 columns):
PatID        59877 non-null int64
InRoomTS     59877 non-null datetime64[ns]
OutRoomTS    59877 non-null datetime64[ns]
PatType      59877 non-null object
dtypes: datetime64[ns](2), int64(1), object(1)
memory usage: 2.3+ MB

Set input parameters


In [4]:
# Required inputs
scenario_name = 'sstest_60'
in_fld_name = 'InRoomTS'
out_fld_name = 'OutRoomTS'
cat_fld_name = 'PatType'
start_analysis = '1/1/1996'
end_analysis = '3/30/1996 23:45'


# Optional inputs

# This next field wasn't in original Hillmaker. Use it to specify the name to use for the overall totals.
# At this point the totals actually aren't being calculated.
tot_fld_name = 'SSU'

bin_size_mins = 60

includecats = ['ART','IVT']

## Convert string dates to actual datetimes
start_analysis_dt = pd.Timestamp(start_analysis)
end_analysis_dt = pd.Timestamp(end_analysis)

# Mapper from weekday integer to string
daynum_to_dayname = {0: 'Mon', 1: 'Tue', 2: 'Wed', 3: 'Thu', 4: 'Fri', 5: 'Sat', 6: 'Sun'}

Create the by datetime table


In [13]:
df2 = df[df['PatType'].isin(includecats)]

In [14]:
df2.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 38940 entries, 0 to 59875
Data columns (total 4 columns):
PatID        38940 non-null int64
InRoomTS     38940 non-null datetime64[ns]
OutRoomTS    38940 non-null datetime64[ns]
PatType      38940 non-null object
dtypes: datetime64[ns](2), int64(1), object(1)
memory usage: 1.5+ MB

In [15]:
df2.groupby('PatType').describe()


Out[15]:
PatID
PatType
ART count 5761.000000
mean 29665.735983
std 17236.415692
min 10.000000
25% 14738.000000
50% 29511.000000
75% 44784.000000
max 59826.000000
IVT count 33179.000000
mean 30015.498357
std 17270.541655
min 1.000000
25% 15099.500000
50% 29956.000000
75% 45027.500000
max 59876.000000

In [16]:
df = df[df['PatType'].isin(includecats)]

In [17]:
df.groupby('PatType').describe()


Out[17]:
PatID
PatType
ART count 5761.000000
mean 29665.735983
std 17236.415692
min 10.000000
25% 14738.000000
50% 29511.000000
75% 44784.000000
max 59826.000000
IVT count 33179.000000
mean 30015.498357
std 17270.541655
min 1.000000
25% 15099.500000
50% 29956.000000
75% 45027.500000
max 59876.000000

In [5]:
bydt_df = bydatetime.make_bydatetime(df,
                                     in_fld_name,
                                     out_fld_name,
                                     cat_fld_name,
                                     start_analysis,
                                     end_analysis,
                                     tot_fld_name,
                                     bin_size_mins)


rng_bydt created: 0.0016
using derived categories: 0.0090
Seeded bydatetime DataFrame created: 0.0460
dayofweek, bin_of_day, bin_of_week computed: 0.3290
Multi-index on bydatetime DataFrame created: 0.3322
Multi-index fully lexsorted: 0.3887
Latest edits at 2015-06-29 08:29:47.731424
Num inner: 19795
Done processing 19795 stop recs: 17.6089
Done adding totals: 17.6986

In [6]:
bydt_df.dtypes


Out[6]:
category               object
datetime       datetime64[ns]
arrivals              float64
departures            float64
occupancy             float64
day_of_week             int64
bin_of_day              int64
bin_of_week             int64
dtype: object

In [50]:
bydt_df


Out[50]:
category datetime arrivals departures occupancy day_of_week bin_of_day bin_of_week
category datetime
ART 1996-01-01 00:00:00 ART 1996-01-01 00:00:00 0 0 0.000000 0 0 0
1996-01-01 01:00:00 ART 1996-01-01 01:00:00 0 0 0.000000 0 1 1
1996-01-01 02:00:00 ART 1996-01-01 02:00:00 0 0 0.000000 0 2 2
1996-01-01 03:00:00 ART 1996-01-01 03:00:00 0 0 0.000000 0 3 3
1996-01-01 04:00:00 ART 1996-01-01 04:00:00 0 0 0.000000 0 4 4
1996-01-01 05:00:00 ART 1996-01-01 05:00:00 0 0 0.000000 0 5 5
1996-01-01 06:00:00 ART 1996-01-01 06:00:00 0 0 0.000000 0 6 6
1996-01-01 07:00:00 ART 1996-01-01 07:00:00 0 0 0.000000 0 7 7
1996-01-01 08:00:00 ART 1996-01-01 08:00:00 0 0 0.000000 0 8 8
1996-01-01 09:00:00 ART 1996-01-01 09:00:00 0 0 0.000000 0 9 9
1996-01-01 10:00:00 ART 1996-01-01 10:00:00 0 0 0.000000 0 10 10
1996-01-01 11:00:00 ART 1996-01-01 11:00:00 0 0 0.000000 0 11 11
1996-01-01 12:00:00 ART 1996-01-01 12:00:00 0 0 0.000000 0 12 12
1996-01-01 13:00:00 ART 1996-01-01 13:00:00 0 0 0.000000 0 13 13
1996-01-01 14:00:00 ART 1996-01-01 14:00:00 0 0 0.000000 0 14 14
1996-01-01 15:00:00 ART 1996-01-01 15:00:00 0 0 0.000000 0 15 15
1996-01-01 16:00:00 ART 1996-01-01 16:00:00 0 0 0.000000 0 16 16
1996-01-01 17:00:00 ART 1996-01-01 17:00:00 0 0 0.000000 0 17 17
1996-01-01 18:00:00 ART 1996-01-01 18:00:00 0 0 0.000000 0 18 18
1996-01-01 19:00:00 ART 1996-01-01 19:00:00 0 0 0.000000 0 19 19
1996-01-01 20:00:00 ART 1996-01-01 20:00:00 0 0 0.000000 0 20 20
1996-01-01 21:00:00 ART 1996-01-01 21:00:00 0 0 0.000000 0 21 21
1996-01-01 22:00:00 ART 1996-01-01 22:00:00 0 0 0.000000 0 22 22
1996-01-01 23:00:00 ART 1996-01-01 23:00:00 0 0 0.000000 0 23 23
1996-01-02 00:00:00 ART 1996-01-02 00:00:00 0 0 0.000000 1 0 24
1996-01-02 01:00:00 ART 1996-01-02 01:00:00 0 0 0.000000 1 1 25
1996-01-02 02:00:00 ART 1996-01-02 02:00:00 0 0 0.000000 1 2 26
1996-01-02 03:00:00 ART 1996-01-02 03:00:00 0 0 0.000000 1 3 27
1996-01-02 04:00:00 ART 1996-01-02 04:00:00 0 0 0.000000 1 4 28
1996-01-02 05:00:00 ART 1996-01-02 05:00:00 0 0 0.000000 1 5 29
... ... ... ... ... ... ... ... ... ...
IVT 1996-03-29 18:00:00 IVT 1996-03-29 18:00:00 9 11 8.350000 4 18 114
1996-03-29 19:00:00 IVT 1996-03-29 19:00:00 2 6 4.700000 4 19 115
1996-03-29 20:00:00 IVT 1996-03-29 20:00:00 1 2 2.433333 4 20 116
1996-03-29 21:00:00 IVT 1996-03-29 21:00:00 0 2 0.833333 4 21 117
1996-03-29 22:00:00 IVT 1996-03-29 22:00:00 0 0 0.000000 4 22 118
1996-03-29 23:00:00 IVT 1996-03-29 23:00:00 0 0 0.000000 4 23 119
1996-03-30 00:00:00 IVT 1996-03-30 00:00:00 0 0 0.000000 5 0 120
1996-03-30 01:00:00 IVT 1996-03-30 01:00:00 0 0 0.000000 5 1 121
1996-03-30 02:00:00 IVT 1996-03-30 02:00:00 0 0 0.000000 5 2 122
1996-03-30 03:00:00 IVT 1996-03-30 03:00:00 0 0 0.000000 5 3 123
1996-03-30 04:00:00 IVT 1996-03-30 04:00:00 0 0 0.000000 5 4 124
1996-03-30 05:00:00 IVT 1996-03-30 05:00:00 0 0 0.000000 5 5 125
1996-03-30 06:00:00 IVT 1996-03-30 06:00:00 2 0 0.316667 5 6 126
1996-03-30 07:00:00 IVT 1996-03-30 07:00:00 3 1 2.766667 5 7 127
1996-03-30 08:00:00 IVT 1996-03-30 08:00:00 3 4 2.766667 5 8 128
1996-03-30 09:00:00 IVT 1996-03-30 09:00:00 3 4 2.450000 5 9 129
1996-03-30 10:00:00 IVT 1996-03-30 10:00:00 5 2 3.783333 5 10 130
1996-03-30 11:00:00 IVT 1996-03-30 11:00:00 2 6 3.816667 5 11 131
1996-03-30 12:00:00 IVT 1996-03-30 12:00:00 3 1 1.700000 5 12 132
1996-03-30 13:00:00 IVT 1996-03-30 13:00:00 0 3 0.750000 5 13 133
1996-03-30 14:00:00 IVT 1996-03-30 14:00:00 0 0 0.000000 5 14 134
1996-03-30 15:00:00 IVT 1996-03-30 15:00:00 0 0 0.000000 5 15 135
1996-03-30 16:00:00 IVT 1996-03-30 16:00:00 0 0 0.000000 5 16 136
1996-03-30 17:00:00 IVT 1996-03-30 17:00:00 0 0 0.000000 5 17 137
1996-03-30 18:00:00 IVT 1996-03-30 18:00:00 0 0 0.000000 5 18 138
1996-03-30 19:00:00 IVT 1996-03-30 19:00:00 0 0 0.000000 5 19 139
1996-03-30 20:00:00 IVT 1996-03-30 20:00:00 0 0 0.000000 5 20 140
1996-03-30 21:00:00 IVT 1996-03-30 21:00:00 0 0 0.000000 5 21 141
1996-03-30 22:00:00 IVT 1996-03-30 22:00:00 0 0 0.000000 5 22 142
1996-03-30 23:00:00 IVT 1996-03-30 23:00:00 0 0 0.000000 5 23 143

4320 rows × 8 columns


In [51]:
bydt_group = bydt_df.groupby(['datetime'])

In [54]:
tot_arrivals = bydt_group.arrivals.sum()
tot_departures = bydt_group.departures.sum()
tot_occ = bydt_group.occupancy.sum()

#bydt_totals = pd.DataFrame(tot_arrivals)

In [84]:
tot_data = [tot_arrivals,tot_departures,tot_occ]
tot_df = pd.concat(tot_data, axis = 1, keys = [s.name for s in tot_data])

In [90]:
tot_data = [tot_arrivals,tot_departures,tot_occ]
tot_df = pd.concat(tot_data, axis = 1, keys = [s.name for s in tot_data])
tot_df['day_of_week'] = tot_df.index.map(lambda x: x.weekday())
tot_df['bin_of_day'] =  tot_df.index.map(lambda x: hm.bin_of_day(x,bin_size_mins))
tot_df['bin_of_week'] = tot_df.index.map(lambda x: hm.bin_of_week(x,bin_size_mins))

In [91]:
tot_df['category'] = tot_fld_name
tot_df.set_index('category', append=True, inplace=True, drop=False)
tot_df = tot_df.reorder_levels(['category', 'datetime'])
tot_df['datetime'] = tot_df.index.levels[1]

In [ ]:
tot_df

In [94]:
tot_df.info()


<class 'pandas.core.frame.DataFrame'>
MultiIndex: 2160 entries, (Total, 1996-01-01 00:00:00) to (Total, 1996-03-30 23:00:00)
Data columns (total 8 columns):
arrivals       2160 non-null float64
departures     2160 non-null float64
occupancy      2160 non-null float64
day_of_week    2160 non-null int64
bin_of_day     2160 non-null int64
bin_of_week    2160 non-null int64
category       2160 non-null object
datetime       2160 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(3), int64(3), object(1)
memory usage: 158.4+ KB

In [96]:
bydt_df = pd.concat([bydt_df,tot_df])

In [ ]:
bydt_df.tail(n=25)

Compute summary stats


In [6]:
def get_occstats(group, stub=''):
    return {stub+'count': group.count(), stub+'mean': group.mean(), 
            stub+'min': group.min(),
            stub+'max': group.max(), 'stdev': group.std(), 
            stub+'p50': group.quantile(0.5), stub+'p55': group.quantile(0.55),
            stub+'p60': group.quantile(0.6), stub+'p65': group.quantile(0.65),
            stub+'p70': group.quantile(0.7), stub+'p75': group.quantile(0.75),
            stub+'p80': group.quantile(0.8), stub+'p85': group.quantile(0.85),
            stub+'p90': group.quantile(0.9), stub+'p95': group.quantile(0.95),
            stub+'p975': group.quantile(0.975), 
            stub+'p99': group.quantile(0.99)}

In [22]:
bydt_dfgrp2 = bydt_df.groupby(['category','day_of_week','bin_of_day'])

occ_stats = bydt_dfgrp2['occupancy'].apply(get_occstats)
arr_stats = bydt_dfgrp2['arrivals'].apply(get_occstats)
dep_stats = bydt_dfgrp2['departures'].apply(get_occstats)

occ_stats_summary = occ_stats.unstack()
arr_stats_summary = arr_stats.unstack()
dep_stats_summary = dep_stats.unstack()

In [32]:
occ_stats.dtype


Out[32]:
dtype('float64')

In [33]:
type(occ_stats)


Out[33]:
pandas.core.series.Series

In [36]:
occ_stats_summary.info()


<class 'pandas.core.frame.DataFrame'>
MultiIndex: 336 entries, (ART, 0, 0) to (IVT, 6, 23)
Data columns (total 17 columns):
count    336 non-null float64
max      336 non-null float64
mean     336 non-null float64
min      336 non-null float64
p50      336 non-null float64
p55      336 non-null float64
p60      336 non-null float64
p65      336 non-null float64
p70      336 non-null float64
p75      336 non-null float64
p80      336 non-null float64
p85      336 non-null float64
p90      336 non-null float64
p95      336 non-null float64
p975     336 non-null float64
p99      336 non-null float64
stdev    336 non-null float64
dtypes: float64(17)
memory usage: 46.2+ KB

Write summaries and by datetime out to CSV


In [23]:
file_bydt_csv = 'testing/bydate_' + scenario_name + '.csv'
bydt_df.to_csv(file_bydt_csv, index=False)

file_occ_csv = 'testing/occ_stats_' + scenario_name + '.csv'
file_arr_csv = 'testing/arr_stats_' + scenario_name + '.csv'
file_dep_csv = 'testing/dep_stats_' + scenario_name + '.csv'

occ_stats_summary.to_csv(file_occ_csv)
arr_stats_summary.to_csv(file_arr_csv)
dep_stats_summary.to_csv(file_dep_csv)

Debugging


In [10]:
ts = pd.Timestamp('19960103 00:00:00')
print(ts)


1996-01-03 00:00:00

In [25]:
24000/24


Out[25]:
1000.0

In [11]:
df_ART = df[(df.PatType == 'ART') & (df.InRoomTS < ts)]

In [12]:
df_ART.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 30 entries, 9 to 174
Data columns (total 4 columns):
PatID        30 non-null int64
InRoomTS     30 non-null datetime64[ns]
OutRoomTS    30 non-null datetime64[ns]
PatType      30 non-null object
dtypes: datetime64[ns](2), int64(1), object(1)
memory usage: 1.2+ KB

In [13]:
df_ART


Out[13]:
PatID InRoomTS OutRoomTS PatType
9 10 1996-01-02 06:35:00 1996-01-02 08:30:00 ART
12 13 1996-01-02 06:55:00 1996-01-02 08:45:00 ART
15 16 1996-01-02 07:18:00 1996-01-02 08:53:00 ART
17 18 1996-01-02 07:23:00 1996-01-02 08:30:00 ART
21 22 1996-01-02 07:33:00 1996-01-02 08:45:00 ART
24 25 1996-01-02 07:40:00 1996-01-02 08:55:00 ART
25 26 1996-01-02 07:50:00 1996-01-02 10:10:00 ART
27 28 1996-01-02 07:52:00 1996-01-02 09:10:00 ART
30 31 1996-01-02 08:00:00 1996-01-02 09:00:00 ART
31 32 1996-01-02 08:02:00 1996-01-02 10:00:00 ART
33 34 1996-01-02 08:23:00 1996-01-02 09:25:00 ART
35 36 1996-01-02 08:27:00 1996-01-02 10:15:00 ART
38 39 1996-01-02 08:43:00 1996-01-02 09:45:00 ART
42 43 1996-01-02 09:08:00 1996-01-02 10:10:00 ART
46 47 1996-01-02 09:12:00 1996-01-02 11:15:00 ART
50 51 1996-01-02 09:35:00 1996-01-02 10:40:00 ART
51 52 1996-01-02 09:36:00 1996-01-02 11:15:00 ART
60 61 1996-01-02 09:53:00 1996-01-02 12:00:00 ART
79 80 1996-01-02 10:38:00 1996-01-02 12:30:00 ART
83 84 1996-01-02 10:47:00 1996-01-02 12:15:00 ART
99 100 1996-01-02 11:23:00 1996-01-02 12:40:00 ART
100 101 1996-01-02 11:23:00 1996-01-02 12:50:00 ART
101 102 1996-01-02 11:29:00 1996-01-02 12:35:00 ART
108 109 1996-01-02 11:50:00 1996-01-02 13:45:00 ART
109 110 1996-01-02 11:54:00 1996-01-02 13:00:00 ART
124 125 1996-01-02 12:49:00 1996-01-02 13:50:00 ART
135 136 1996-01-02 13:17:00 1996-01-02 14:31:00 ART
144 145 1996-01-02 13:39:00 1996-01-02 15:30:00 ART
146 147 1996-01-02 13:45:00 1996-01-02 15:20:00 ART
174 175 1996-01-02 15:21:00 1996-01-02 17:00:00 ART

In [14]:
bydt_df.head()


Out[14]:
category datetime arrivals departures occupancy day_of_week bin_of_day bin_of_week
category datetime
ART 1996-01-01 00:00:00 ART 1996-01-01 00:00:00 0 0 0 0 0 0
1996-01-01 01:00:00 ART 1996-01-01 01:00:00 0 0 0 0 1 1
1996-01-01 02:00:00 ART 1996-01-01 02:00:00 0 0 0 0 2 2
1996-01-01 03:00:00 ART 1996-01-01 03:00:00 0 0 0 0 3 3
1996-01-01 04:00:00 ART 1996-01-01 04:00:00 0 0 0 0 4 4

In [15]:
bydt_df[25:50]


Out[15]:
category datetime arrivals departures occupancy day_of_week bin_of_day bin_of_week
category datetime
ART 1996-01-02 01:00:00 ART 1996-01-02 01:00:00 0 0 0.000000 1 1 25
1996-01-02 02:00:00 ART 1996-01-02 02:00:00 0 0 0.000000 1 2 26
1996-01-02 03:00:00 ART 1996-01-02 03:00:00 0 0 0.000000 1 3 27
1996-01-02 04:00:00 ART 1996-01-02 04:00:00 0 0 0.000000 1 4 28
1996-01-02 05:00:00 ART 1996-01-02 05:00:00 0 0 0.000000 1 5 29
1996-01-02 06:00:00 ART 1996-01-02 06:00:00 2 0 0.500000 1 6 30
1996-01-02 07:00:00 ART 1996-01-02 07:00:00 6 0 4.400000 1 7 31
1996-01-02 08:00:00 ART 1996-01-02 08:00:00 5 6 9.716667 1 8 32
1996-01-02 09:00:00 ART 1996-01-02 09:00:00 5 4 6.933333 1 9 33
1996-01-02 10:00:00 ART 1996-01-02 10:00:00 2 5 4.833333 1 10 34
1996-01-02 11:00:00 ART 1996-01-02 11:00:00 5 2 5.516667 1 11 35
1996-01-02 12:00:00 ART 1996-01-02 12:00:00 1 6 5.016667 1 12 36
1996-01-02 13:00:00 ART 1996-01-02 13:00:00 3 3 2.900000 1 13 37
1996-01-02 14:00:00 ART 1996-01-02 14:00:00 0 1 2.516667 1 14 38
1996-01-02 15:00:00 ART 1996-01-02 15:00:00 1 2 1.483333 1 15 39
1996-01-02 16:00:00 ART 1996-01-02 16:00:00 0 0 1.000000 1 16 40
1996-01-02 17:00:00 ART 1996-01-02 17:00:00 0 1 0.000000 1 17 41
1996-01-02 18:00:00 ART 1996-01-02 18:00:00 0 0 0.000000 1 18 42
1996-01-02 19:00:00 ART 1996-01-02 19:00:00 0 0 0.000000 1 19 43
1996-01-02 20:00:00 ART 1996-01-02 20:00:00 0 0 0.000000 1 20 44
1996-01-02 21:00:00 ART 1996-01-02 21:00:00 0 0 0.000000 1 21 45
1996-01-02 22:00:00 ART 1996-01-02 22:00:00 0 0 0.000000 1 22 46
1996-01-02 23:00:00 ART 1996-01-02 23:00:00 0 0 0.000000 1 23 47
1996-01-03 00:00:00 ART 1996-01-03 00:00:00 0 0 0.000000 2 0 48
1996-01-03 01:00:00 ART 1996-01-03 01:00:00 0 0 0.000000 2 1 49

In [11]:
import numpy as np
import pandas as pd
from pandas import Timestamp

import hillmaker as hm

file_stopdata = 'data/unit_stop_log_Experiment1_Scenario1_Rep1.csv'

scenario_name = 'log_unitocc_test'
in_fld_name = 'EnteredTS'
out_fld_name = 'ExitedTS'
cat_fld_name = 'Unit'
start_analysis = '3/24/2015 00:00'
end_analysis = '6/16/2016 00:00'

# Optional inputs

tot_fld_name = 'OBTot'
bin_size_mins = 60
includecats = ['LDR','PP']

In [21]:
stops_df = pd.read_csv(file_stopdata,index_col=0)
basedate = Timestamp('20150215 00:00:00')
stops_df['EnteredTS'] = df.apply(lambda row:
                           Timestamp(round((basedate + pd.DateOffset(hours=row['Entered'])).value,-9)), axis=1)

stops_df['ExitedTS'] = df.apply(lambda row:
                          Timestamp(round((basedate + pd.DateOffset(hours=row['Exited'])).value,-9)), axis=1)

stops_df = stops_df[stops_df[cat_fld_name].isin(includecats)]

In [22]:
stops_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 33036 entries, 6 to 52858
Data columns (total 16 columns):
EntityID                    33036 non-null int64
PatientType                 33036 non-null int64
Unit                        33036 non-null object
TriedToEnter                33036 non-null float64
Entered                     33036 non-null float64
TriedToExit                 33036 non-null float64
Exited                      33036 non-null float64
PlannedLOS                  33036 non-null float64
Entered_TriedToEnter        33036 non-null float64
TriedToExit_Entered         33036 non-null float64
Exit_TriedToExit            33036 non-null float64
Exit_Enter                  33036 non-null float64
Exit_TriedToEnter           33036 non-null float64
BlockedByDownstreamHours    33036 non-null float64
EnteredTS                   33036 non-null datetime64[ns]
ExitedTS                    33036 non-null datetime64[ns]
dtypes: datetime64[ns](2), float64(11), int64(2), object(1)
memory usage: 4.3+ MB

In [25]:
stops_df[100:125]


Out[25]:
EntityID PatientType Unit TriedToEnter Entered TriedToExit Exited PlannedLOS Entered_TriedToEnter TriedToExit_Entered Exit_TriedToExit Exit_Enter Exit_TriedToEnter BlockedByDownstreamHours EnteredTS ExitedTS
186 115 2 LDR 111.822783 111.822783 116.433687 116.433687 4.610904 0 4.610904 0 4.610904 4.610904 0 2015-02-19 15:49:22 2015-02-19 20:26:01
188 78 1 PP 81.429555 81.429555 117.100727 117.100727 35.671172 0 35.671172 0 35.671172 35.671172 0 2015-02-18 09:25:46 2015-02-19 21:06:03
190 107 1 LDR 96.741526 96.741526 117.603920 117.603920 20.862394 0 20.862394 0 20.862394 20.862394 0 2015-02-19 00:44:29 2015-02-19 21:36:14
191 113 1 LDR 109.355065 109.355065 117.860324 117.860324 8.505259 0 8.505259 0 8.505259 8.505259 0 2015-02-19 13:21:18 2015-02-19 21:51:37
192 81 1 PP 72.853390 72.853390 118.449390 118.449390 45.596000 0 45.596000 0 45.596000 45.596000 0 2015-02-18 00:51:12 2015-02-19 22:26:58
194 119 1 LDR 117.488525 117.488525 118.886377 118.886377 1.397852 0 1.397852 0 1.397852 1.397852 0 2015-02-19 21:29:19 2015-02-19 22:53:11
198 90 1 PP 89.582508 89.582508 119.693251 119.693251 30.110743 0 30.110743 0 30.110743 30.110743 0 2015-02-18 17:34:57 2015-02-19 23:41:36
201 106 1 PP 113.527647 113.527647 122.551096 122.551096 9.023449 0 9.023449 0 9.023449 9.023449 0 2015-02-19 17:31:40 2015-02-20 02:33:04
202 124 1 LDR 120.268748 120.268748 122.802096 122.802096 2.533347 0 2.533347 0 2.533347 2.533347 0 2015-02-20 00:16:07 2015-02-20 02:48:08
203 86 1 PP 82.139208 82.139208 122.854289 122.854289 40.715081 0 40.715081 0 40.715081 40.715081 0 2015-02-18 10:08:21 2015-02-20 02:51:15
205 101 1 PP 103.368092 103.368092 123.513883 123.513883 20.145791 0 20.145791 0 20.145791 20.145791 0 2015-02-19 07:22:05 2015-02-20 03:30:50
206 117 2 LDR 113.990152 113.990152 123.867909 123.867909 9.877757 0 9.877757 0 9.877757 9.877757 0 2015-02-19 17:59:25 2015-02-20 03:52:04
207 113 1 PP 117.860324 117.860324 123.887093 123.887093 6.026769 0 6.026769 0 6.026769 6.026769 0 2015-02-19 21:51:37 2015-02-20 03:53:14
208 120 1 LDR 118.498566 118.498566 125.034617 125.034617 6.536051 0 6.536051 0 6.536051 6.536051 0 2015-02-19 22:29:55 2015-02-20 05:02:05
209 121 1 LDR 119.138490 119.138490 125.367563 125.367563 6.229072 0 6.229072 0 6.229072 6.229072 0 2015-02-19 23:08:19 2015-02-20 05:22:03
211 114 2 LDR 110.944737 110.944737 126.387648 126.387648 15.442911 0 15.442911 0 15.442911 15.442911 0 2015-02-19 14:56:41 2015-02-20 06:23:16
219 127 1 LDR 126.928747 126.928747 129.348119 129.348119 2.419372 0 2.419372 0 2.419372 2.419372 0 2015-02-20 06:55:43 2015-02-20 09:20:53
221 125 1 LDR 123.145827 123.145827 129.616111 129.616111 6.470283 0 6.470283 0 6.470283 6.470283 0 2015-02-20 03:08:45 2015-02-20 09:36:58
223 112 1 PP 116.218261 116.218261 129.829434 129.829434 13.611174 0 13.611174 0 13.611174 13.611174 0 2015-02-19 20:13:06 2015-02-20 09:49:46
224 51 2 PP 13.205970 13.205970 132.229598 132.229598 119.023628 0 119.023628 0 119.023628 119.023628 0 2015-02-15 13:12:21 2015-02-20 12:13:47
228 131 1 LDR 128.786482 128.786482 134.581020 134.581020 5.794538 0 5.794538 0 5.794538 5.794538 0 2015-02-20 08:47:11 2015-02-20 14:34:52
230 122 1 LDR 119.553562 119.553562 134.719447 134.719447 15.165884 0 15.165884 0 15.165884 15.165884 0 2015-02-19 23:33:13 2015-02-20 14:43:10
232 126 1 LDR 126.257813 126.257813 135.234742 135.234742 8.976929 0 8.976929 0 8.976929 8.976929 0 2015-02-20 06:15:28 2015-02-20 15:14:05
233 134 1 LDR 132.345691 132.345691 135.949821 135.949821 3.604130 0 3.604130 0 3.604130 3.604130 0 2015-02-20 12:20:44 2015-02-20 15:56:59
234 111 1 PP 108.963959 108.963959 136.465741 136.465741 27.501782 0 27.501782 0 27.501782 27.501782 0 2015-02-19 12:57:50 2015-02-20 16:27:57

In [29]:
start = stops_df.ix[188]['EnteredTS']
end = stops_df.ix[188]['ExitedTS']
print(start, end)
print(type(start))

start_str = '2015-02-18 09:25:46'
end_str = '2015-02-19 21:06:03'

start_analysis_timestamp = Timestamp(start_str)
end_analysis_timestamp = Timestamp(end_str)

start_analysis_dt64 = np.datetime64(start_str)
end_analysis_dt64 = np.datetime64(end_str)

print(start_analysis_timestamp, start_analysis_dt64)

num_days_fromts = end_analysis_timestamp - start_analysis_timestamp
num_days_fromdt64 = end_analysis_dt64 - start_analysis_dt64

print(num_days_fromts, num_days_fromdt64)

print(type(num_days_fromts))
print(type(num_days_fromdt64))


2015-02-18 09:25:46 2015-02-19 21:06:03
<class 'pandas.tslib.Timestamp'>
2015-02-18 09:25:46 2015-02-18T09:25:46-0500
1 days 11:40:17 128417 seconds
<class 'pandas.tslib.Timedelta'>
<class 'numpy.timedelta64'>

In [35]:
print(start)
print(start.date())
start_tsdate = Timestamp(start.date())
print (start_tsdate)


2015-02-18 09:25:46
2015-02-18
2015-02-18 00:00:00

In [36]:
gap = start - Timestamp(start.date())
print(gap)
print(type(gap))


0 days 09:25:46
<class 'pandas.tslib.Timedelta'>

In [47]:
minutes = 60
dt = start

floor_seconds = minutes * 60
dt_date = Timestamp(dt.date())
delta = dt - dt_date
print(delta)
tot_seconds = delta.total_seconds()
print(tot_seconds)

floor_time = (tot_seconds // floor_seconds) * floor_seconds
print(floor_time)
gap_seconds = tot_seconds - floor_time
print(dt_date + pd.DateOffset(seconds=floor_time))


0 days 09:25:46
33946.0
32400.0
2015-02-18 09:00:00

In [ ]:
#%time hm.run_hillmaker(scenario_name,df,in_fld_name, out_fld_name,cat_fld_name,start_analysis,end_analysis,tot_fld_name,bin_size_mins,categories=includecats,outputpath='./testing')

In [20]:
df.head()


Out[20]:
EntityID PatientType Unit TriedToEnter Entered TriedToExit Exited PlannedLOS Entered_TriedToEnter TriedToExit_Entered Exit_TriedToExit Exit_Enter Exit_TriedToEnter BlockedByDownstreamHours EnteredTS ExitedTS
0 45 1 Obs 0.000000 0.000000 0.000000 0.000000 0 0 0 0 0 0 0 2015-02-15 00:00:00 2015-02-15 00:00:00
1 46 1 Obs 2.296064 2.296064 2.296064 2.296064 0 0 0 0 0 0 0 2015-02-15 02:17:46 2015-02-15 02:17:46
2 47 1 Obs 3.037196 3.037196 3.037196 3.037196 0 0 0 0 0 0 0 2015-02-15 03:02:14 2015-02-15 03:02:14
3 48 1 Obs 4.325327 4.325327 4.325327 4.325327 0 0 0 0 0 0 0 2015-02-15 04:19:31 2015-02-15 04:19:31
4 49 2 Obs 4.564865 4.564865 4.564865 4.564865 0 0 0 0 0 0 0 2015-02-15 04:33:54 2015-02-15 04:33:54

In [21]:
df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 52860 entries, 0 to 52859
Data columns (total 16 columns):
EntityID                    52860 non-null int64
PatientType                 52860 non-null int64
Unit                        52860 non-null object
TriedToEnter                52860 non-null float64
Entered                     52860 non-null float64
TriedToExit                 52860 non-null float64
Exited                      52860 non-null float64
PlannedLOS                  52860 non-null float64
Entered_TriedToEnter        52860 non-null float64
TriedToExit_Entered         52860 non-null float64
Exit_TriedToExit            52860 non-null float64
Exit_Enter                  52860 non-null float64
Exit_TriedToEnter           52860 non-null float64
BlockedByDownstreamHours    52860 non-null float64
EnteredTS                   52860 non-null datetime64[ns]
ExitedTS                    52860 non-null datetime64[ns]
dtypes: datetime64[ns](2), float64(11), int64(2), object(1)
memory usage: 6.9+ MB

Computing occupancy statistics

Need to compute a bunch of output stats to use for visualization, metamodeling and to evaluate scenarios.

Overall utilization

It would be nice if we could just Hillmaker with bin size of one week. Let's try it.


In [36]:
scenario_name = 'log_unitocc_test_steadystate'
hm.run_hillmaker(scenario_name,df,in_fld_name, out_fld_name,cat_fld_name,
                 start_analysis,end_analysis,tot_fld_name,1440,
                 categories=includecats,totals=False,outputpath='./testing')


rng_bydt created: 0.0016
using specified categories: 0.0124
Seeded bydatetime DataFrame created: 0.0252
dayofweek, bin_of_day, bin_of_week computed: 0.0520
Multi-index on bydatetime DataFrame created: 0.0550
Multi-index fully lexsorted: 0.0569
Latest edits at 2015-06-26 13:45:09.462399
Num inner: 15805
Done processing 15849 stop recs: 11.2492
Done with summary stats: 124.963865

In [33]:
occ_df = pd.read_csv('testing/occ_stats_summary_log_unitocc_test_steadystate.csv')

In [34]:
occ_df


Out[34]:
category day_of_week bin_of_day count max mean min p50 p55 p60 p65 p70 p75 p80 p85 p90 p95 p975 p99 stdev
0 LDR 0 0 70 12.551887 7.964751 2.796759 7.749132 7.959083 8.579539 8.811633 9.132047 9.477954 10.159817 10.511690 11.105409 11.642197 11.879875 12.249205 2.232001
1 LDR 1 0 70 13.000000 8.085966 3.474745 7.810266 8.134329 8.274419 8.379345 8.927502 9.247156 9.522454 9.824342 10.801959 12.081047 12.890282 12.938914 2.028946
2 LDR 2 0 70 12.457118 8.104782 3.336412 8.034381 8.335889 8.509238 8.790906 9.160778 9.600029 10.188361 10.530722 10.932208 11.341849 11.996434 12.428552 2.077740
3 LDR 3 0 70 12.875243 8.124963 0.000000 8.046973 8.395637 8.772951 9.116812 9.397748 9.934925 10.549181 10.789817 10.992679 11.717129 12.064749 12.450566 2.382311
4 LDR 4 0 69 12.850544 8.484828 2.375741 8.819942 8.849414 9.170447 9.495343 9.666833 9.906759 10.288935 10.986137 11.519086 11.644289 12.043488 12.356167 2.268249
5 LDR 5 0 69 12.730498 7.954601 3.427373 7.735104 7.927241 8.171174 8.579282 8.673562 8.993356 9.488528 9.804104 10.919875 11.644685 12.088810 12.328550 2.003962
6 LDR 6 0 70 12.716262 8.178603 2.697824 7.887957 8.298356 8.664336 9.110238 9.229181 9.622784 10.255775 10.700716 11.066123 11.503365 12.051033 12.572232 2.140319
7 PP 0 0 70 45.760185 35.467947 10.258866 35.786036 36.118146 36.308977 37.043928 38.028598 38.778987 38.944359 40.188310 41.729775 43.573232 44.748670 45.448272 5.265547
8 PP 1 0 70 46.000000 35.804196 20.487118 37.390312 38.021487 38.555859 39.470370 39.688156 39.889198 40.491523 41.203124 41.487889 42.126422 42.621901 44.102668 5.508826
9 PP 2 0 70 45.942662 36.399121 22.880706 37.192986 37.879600 38.649655 39.328046 40.003108 40.990822 41.402132 42.335948 43.095137 43.814442 44.781085 45.679695 5.725518
10 PP 3 0 70 45.975116 35.821909 22.050231 35.533119 35.890048 36.158843 37.352377 39.567219 40.368845 41.196301 42.169190 43.428039 45.210705 45.531679 45.864572 5.684326
11 PP 4 0 69 45.648241 35.814857 26.465972 35.063310 35.579757 36.714229 37.636322 38.665500 40.198646 41.280093 42.197894 43.353201 45.023525 45.609012 45.646533 5.281842
12 PP 5 0 69 45.934039 35.771442 20.479213 35.763704 36.696616 37.405123 37.913400 38.344368 39.438137 40.002079 40.124109 42.280470 44.786905 45.290767 45.815331 5.142711
13 PP 6 0 70 45.343079 34.618757 2.773785 35.204352 35.498030 36.162301 37.330407 38.270867 38.574155 38.881556 39.366436 40.225821 42.611019 44.288800 44.738506 6.010777

In [38]:
bydt_df


Out[38]:
category datetime arrivals departures occupancy day_of_week bin_of_day bin_of_week
0 LDR 2015-02-15 00:00:00 12 7 2.697824 6 0 6
1 LDR 2015-02-16 00:00:00 17 17 6.657824 0 0 0
2 LDR 2015-02-17 00:00:00 16 13 6.210556 1 0 1
3 LDR 2015-02-18 00:00:00 17 18 7.160961 2 0 2
4 LDR 2015-02-19 00:00:00 16 17 7.375949 3 0 3
5 LDR 2015-02-20 00:00:00 21 19 9.493576 4 0 4
6 LDR 2015-02-21 00:00:00 18 17 8.906806 5 0 5
7 LDR 2015-02-22 00:00:00 16 19 8.305035 6 0 6
8 LDR 2015-02-23 00:00:00 15 16 6.672234 0 0 0
9 LDR 2015-02-24 00:00:00 18 16 6.563194 1 0 1
10 LDR 2015-02-25 00:00:00 12 12 6.561250 2 0 2
11 LDR 2015-02-26 00:00:00 21 17 7.659653 3 0 3
12 LDR 2015-02-27 00:00:00 16 15 11.666377 4 0 4
13 LDR 2015-02-28 00:00:00 18 18 10.906319 5 0 5
14 LDR 2015-03-01 00:00:00 14 21 7.578762 6 0 6
15 LDR 2015-03-02 00:00:00 16 16 6.853449 0 0 0
16 LDR 2015-03-03 00:00:00 11 12 8.317731 1 0 1
17 LDR 2015-03-04 00:00:00 13 14 4.766898 2 0 2
18 LDR 2015-03-05 00:00:00 12 11 3.816921 3 0 3
19 LDR 2015-03-06 00:00:00 11 10 5.666470 4 0 4
20 LDR 2015-03-07 00:00:00 15 10 6.095891 5 0 5
21 LDR 2015-03-08 00:00:00 14 17 7.088588 6 0 6
22 LDR 2015-03-09 00:00:00 15 16 5.904595 0 0 0
23 LDR 2015-03-10 00:00:00 17 11 7.343958 1 0 1
24 LDR 2015-03-11 00:00:00 17 16 12.457118 2 0 2
25 LDR 2015-03-12 00:00:00 14 23 10.683090 3 0 3
26 LDR 2015-03-13 00:00:00 22 15 7.636644 4 0 4
27 LDR 2015-03-14 00:00:00 14 18 7.489954 5 0 5
28 LDR 2015-03-15 00:00:00 8 8 4.914109 6 0 6
29 LDR 2015-03-16 00:00:00 15 18 5.576748 0 0 0
... ... ... ... ... ... ... ... ...
946 PP 2016-05-18 00:00:00 22 14 41.315926 2 0 2
947 PP 2016-05-19 00:00:00 17 18 45.267836 3 0 3
948 PP 2016-05-20 00:00:00 17 17 40.829225 4 0 4
949 PP 2016-05-21 00:00:00 13 26 33.123426 5 0 5
950 PP 2016-05-22 00:00:00 25 16 35.501088 6 0 6
951 PP 2016-05-23 00:00:00 22 17 42.530683 0 0 0
952 PP 2016-05-24 00:00:00 18 28 38.527558 1 0 1
953 PP 2016-05-25 00:00:00 10 16 35.088692 2 0 2
954 PP 2016-05-26 00:00:00 18 16 29.997176 3 0 3
955 PP 2016-05-27 00:00:00 23 10 38.660347 4 0 4
956 PP 2016-05-28 00:00:00 13 27 36.260440 5 0 5
957 PP 2016-05-29 00:00:00 17 18 31.405139 6 0 6
958 PP 2016-05-30 00:00:00 18 13 35.316829 0 0 0
959 PP 2016-05-31 00:00:00 18 17 39.911956 1 0 1
960 PP 2016-06-01 00:00:00 11 10 35.280336 2 0 2
961 PP 2016-06-02 00:00:00 21 21 35.111817 3 0 3
962 PP 2016-06-03 00:00:00 8 14 32.954606 4 0 4
963 PP 2016-06-04 00:00:00 24 17 34.538530 5 0 5
964 PP 2016-06-05 00:00:00 15 16 38.636516 6 0 6
965 PP 2016-06-06 00:00:00 15 19 35.078715 0 0 0
966 PP 2016-06-07 00:00:00 13 13 32.519549 1 0 1
967 PP 2016-06-08 00:00:00 15 17 32.743981 2 0 2
968 PP 2016-06-09 00:00:00 15 19 29.413588 3 0 3
969 PP 2016-06-10 00:00:00 15 13 27.361725 4 0 4
970 PP 2016-06-11 00:00:00 20 13 31.369120 5 0 5
971 PP 2016-06-12 00:00:00 14 17 32.395104 6 0 6
972 PP 2016-06-13 00:00:00 21 14 35.499340 0 0 0
973 PP 2016-06-14 00:00:00 18 17 38.039722 1 0 1
974 PP 2016-06-15 00:00:00 15 19 40.487234 2 0 2
975 PP 2016-06-16 00:00:00 0 0 35.000000 3 0 3

976 rows × 8 columns


In [1]:
%matplotlib inline

In [2]:
import numpy as np
from numpy.random import randn
import pandas as pd
from scipy import stats
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
bydt_df = pd.read_csv('testing/bydatetime_log_unitocc_test_steadystate.csv')

In [12]:
pp_occ = bydt_df[(bydt_df['category'] == 'PP')]['occupancy']

In [15]:
plt.hist(pp_occ.values,20)


Out[15]:
(array([  1.,   0.,   0.,   1.,   0.,   0.,   0.,   0.,   3.,   5.,  12.,
         26.,  40.,  59.,  70.,  75.,  72.,  57.,  37.,  30.]),
 array([  2.77378472,   4.93509549,   7.09640625,   9.25771701,
         11.41902778,  13.58033854,  15.74164931,  17.90296007,
         20.06427083,  22.2255816 ,  24.38689236,  26.54820312,
         28.70951389,  30.87082465,  33.03213542,  35.19344618,
         37.35475694,  39.51606771,  41.67737847,  43.83868924,  46.        ]),
 <a list of 20 Patch objects>)

In [9]:
g = sns.FacetGrid(bydt_df, col="category", margin_titles=True)
bins = np.linspace(0, 60, 13)
g.map(plt.hist, "occupancy", color="steelblue", bins=bins, lw=0)


Out[9]:
<seaborn.axisgrid.FacetGrid at 0x7f65482d5390>

In [ ]: