In [1]:

    
import pandas as pd
import numpy as np
import matplotlib as mp
import matplotlib.pyplot as plt

import hillmaker as hm
#from hillmaker import bydatetime
#from hillmaker import hillpylib
from pandas import Timestamp

# Let's check what version of pandas, numpy and matplotlib we are using
print ("pandas version ", pd.__version__)
print ("numpy version ", np.version.version)
print ("matplotlib version ", mp.__version__)









    



pandas version  0.16.2
numpy version  1.9.2
matplotlib version  1.4.3



In [ ]:

    
hm



In [3]:

    
# Set up autoreloading of modules so that I can debug code in external files
%load_ext autoreload
%autoreload 2



In [ ]:

    
file_stopdata = 'data/ShortStay.csv'
df = pd.read_csv(file_stopdata, parse_dates=['InRoomTS','OutRoomTS'])



In [36]:

    
scenario_name = 'sstest_60'
in_fld_name = 'InRoomTS'
out_fld_name = 'OutRoomTS'
cat_fld_name = 'PatType'
start_analysis = '1/2/1996'
end_analysis = '3/31/1996 23:45'
bin_size_mins = 60

# This next field wasn't in original Hillmaker. Use it to specify the name to use for the overall totals.
tot_fld_name = 'Total'

## Convert string dates to actual datetimes
start_analysis_dt = pd.Timestamp(start_analysis)
end_analysis_dt = pd.Timestamp(end_analysis)



In [13]:

    
df.head()









    Out[13]:






  
    
      
      PatID
      InRoomTS
      OutRoomTS
      PatType
    
  
  
    
      0
       1
      1996-01-01 07:44:00
      1996-01-01 08:50:00
       IVT
    
    
      1
       2
      1996-01-01 08:28:00
      1996-01-01 09:20:00
       IVT
    
    
      2
       3
      1996-01-01 11:44:00
      1996-01-01 13:30:00
       MYE
    
    
      3
       4
      1996-01-01 11:51:00
      1996-01-01 12:55:00
       CAT
    
    
      4
       5
      1996-01-01 12:10:00
      1996-01-01 13:00:00
       IVT



In [37]:

    
bydt_df = bydatetime.make_bydatetime(df,
                                     in_fld_name,
                                     out_fld_name,
                                     cat_fld_name,
                                     start_analysis_dt,
                                     end_analysis_dt,
                                     tot_fld_name,
                                     bin_size_mins)









    



rng_bydt created: 0.0006284789997152984
found unique categories: 0.004088978999789106
Seeded bydatetime DataFrame created: 0.0356079929997577
dayofweek, bin_of_day, bin_of_week computed: 0.34738577700045425
Multi-index on bydatetime DataFrame created: 0.3504454660005649
Multi-index fully lexsorted: 0.3538809880001281
Num inner: 19793
Done processing 19793 stop recs: 14.286970832999941



In [17]:

    
bydt_df.head()









    Out[17]:






  
    
      
      
      category
      datetime
      arrivals
      departures
      occupancy
      day_of_week
      bin_of_day
      bin_of_week
    
    
      category
      datetime
      
      
      
      
      
      
      
      
    
  
  
    
      ART
      1996-01-02 00:00:00
       ART
      1996-01-02 00:00:00
       0
       0
       0
       1
       0
       24
    
    
      1996-01-02 01:00:00
       ART
      1996-01-02 01:00:00
       0
       0
       0
       1
       1
       25
    
    
      1996-01-02 02:00:00
       ART
      1996-01-02 02:00:00
       0
       0
       0
       1
       2
       26
    
    
      1996-01-02 03:00:00
       ART
      1996-01-02 03:00:00
       0
       0
       0
       1
       3
       27
    
    
      1996-01-02 04:00:00
       ART
      1996-01-02 04:00:00
       0
       0
       0
       1
       4
       28



In [38]:

    
file_bydt_csv = 'data/bydate_' + scenario_name + '.csv'
bydt_df.to_csv(file_bydt_csv)

With this data frame we can compute all kinds of interesting summary statistics by category, by day of week and time of day. To facilitate this type of "group by" analysis, pandas takes what is known as the Split-Apply-Combine approach. The pandas documentation has a nice discussion of this. To really understand split-apply-combine, check out the article by Hadley Wickham who created the plyr package for R. I also created a tutorial on Getting started with Python (with pandas and matplotlib) for group by analysis that covers some of the basics. A companion tutorial shows how to do the same analysis using R instead of Python.

Pandas provides a GroupBy object to facilitate computing aggregate statistics by grouping fields.



In [18]:

    
# Create a GroupBy object for the summary stats    
bydt_dfgrp1 = bydt_df.groupby(['category','bin_of_week'])



In [19]:

    
# Having a group by object makes it easy to compute statistics such as the mean of all of the fields other than the grouping fields.
# You'll see that the result is simply another DataFrame.
bydt_dfgrp1.mean()









    Out[19]:






  
    
      
      
      arrivals
      departures
      occupancy
      day_of_week
      bin_of_day
    
    
      category
      bin_of_week
      
      
      
      
      
    
  
  
    
      ART
      0  
       0.000000
       0.000000
       0.000000
       0
        0
    
    
      1  
       0.000000
       0.000000
       0.000000
       0
        1
    
    
      2  
       0.000000
       0.000000
       0.000000
       0
        2
    
    
      3  
       0.000000
       0.000000
       0.000000
       0
        3
    
    
      4  
       0.000000
       0.000000
       0.000000
       0
        4
    
    
      5  
       1.000000
       0.000000
       0.309722
       0
        5
    
    
      6  
       5.916667
       0.000000
       4.154167
       0
        6
    
    
      7  
       3.583333
       4.916667
       6.440278
       0
        7
    
    
      8  
       4.166667
       3.833333
       5.390278
       0
        8
    
    
      9  
       2.916667
       4.083333
       5.215278
       0
        9
    
    
      10 
       3.166667
       3.000000
       4.443056
       0
       10
    
    
      11 
       2.750000
       3.333333
       4.855556
       0
       11
    
    
      12 
       3.000000
       3.500000
       3.987500
       0
       12
    
    
      13 
       2.000000
       2.666667
       3.558333
       0
       13
    
    
      14 
       0.583333
       2.583333
       2.095833
       0
       14
    
    
      15 
       0.333333
       1.000000
       0.668056
       0
       15
    
    
      16 
       0.000000
       0.333333
       0.319444
       0
       16
    
    
      17 
       0.083333
       0.250000
       0.090278
       0
       17
    
    
      18 
       0.000000
       0.000000
       0.000000
       0
       18
    
    
      19 
       0.000000
       0.000000
       0.000000
       0
       19
    
    
      20 
       0.000000
       0.000000
       0.000000
       0
       20
    
    
      21 
       0.000000
       0.000000
       0.000000
       0
       21
    
    
      22 
       0.000000
       0.000000
       0.000000
       0
       22
    
    
      23 
       0.000000
       0.000000
       0.000000
       0
       23
    
    
      24 
       0.000000
       0.000000
       0.000000
       1
        0
    
    
      25 
       0.000000
       0.000000
       0.000000
       1
        1
    
    
      26 
       0.000000
       0.000000
       0.000000
       1
        2
    
    
      27 
       0.000000
       0.000000
       0.000000
       1
        3
    
    
      28 
       0.000000
       0.000000
       0.000000
       1
        4
    
    
      29 
       0.000000
       0.000000
       0.000000
       1
        5
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      OTH
      138
       0.000000
       0.000000
       0.000000
       5
       18
    
    
      139
       0.000000
       0.000000
       0.000000
       5
       19
    
    
      140
       0.000000
       0.000000
       0.000000
       5
       20
    
    
      141
       0.000000
       0.000000
       0.000000
       5
       21
    
    
      142
       0.000000
       0.000000
       0.000000
       5
       22
    
    
      143
       0.000000
       0.000000
       0.000000
       5
       23
    
    
      144
       0.000000
       0.000000
       0.000000
       6
        0
    
    
      145
       0.000000
       0.000000
       0.000000
       6
        1
    
    
      146
       0.000000
       0.000000
       0.000000
       6
        2
    
    
      147
       0.000000
       0.000000
       0.000000
       6
        3
    
    
      148
       0.000000
       0.000000
       0.000000
       6
        4
    
    
      149
       0.000000
       0.000000
       0.000000
       6
        5
    
    
      150
       0.000000
       0.000000
       0.000000
       6
        6
    
    
      151
       0.000000
       0.000000
       0.000000
       6
        7
    
    
      152
       0.000000
       0.000000
       0.000000
       6
        8
    
    
      153
       0.000000
       0.000000
       0.000000
       6
        9
    
    
      154
       0.000000
       0.000000
       0.000000
       6
       10
    
    
      155
       0.000000
       0.000000
       0.000000
       6
       11
    
    
      156
       0.000000
       0.000000
       0.000000
       6
       12
    
    
      157
       0.000000
       0.000000
       0.000000
       6
       13
    
    
      158
       0.000000
       0.000000
       0.000000
       6
       14
    
    
      159
       0.000000
       0.000000
       0.000000
       6
       15
    
    
      160
       0.000000
       0.000000
       0.000000
       6
       16
    
    
      161
       0.000000
       0.000000
       0.000000
       6
       17
    
    
      162
       0.000000
       0.000000
       0.000000
       6
       18
    
    
      163
       0.000000
       0.000000
       0.000000
       6
       19
    
    
      164
       0.000000
       0.000000
       0.000000
       6
       20
    
    
      165
       0.000000
       0.000000
       0.000000
       6
       21
    
    
      166
       0.000000
       0.000000
       0.000000
       6
       22
    
    
      167
       0.000000
       0.000000
       0.000000
       6
       23
    
  

840 rows × 5 columns



In [20]:

    
# Let's explore some of the means.
bydt_dfgrp1.mean()[100:120]









    Out[20]:






  
    
      
      
      arrivals
      departures
      occupancy
      day_of_week
      bin_of_day
    
    
      category
      bin_of_week
      
      
      
      
      
    
  
  
    
      ART
      100
       0.000000
       0.000000
       0.000000
       4
        4
    
    
      101
       0.769231
       0.000000
       0.167949
       4
        5
    
    
      102
       7.153846
       0.000000
       3.471795
       4
        6
    
    
      103
       3.692308
       5.538462
       6.602564
       4
        7
    
    
      104
       4.307692
       4.153846
       5.932051
       4
        8
    
    
      105
       3.692308
       4.461538
       5.807692
       4
        9
    
    
      106
       4.769231
       3.923077
       6.028205
       4
       10
    
    
      107
       3.230769
       3.846154
       6.094872
       4
       11
    
    
      108
       3.769231
       4.461538
       4.975641
       4
       12
    
    
      109
       2.615385
       3.461538
       4.260256
       4
       13
    
    
      110
       0.846154
       3.000000
       2.921795
       4
       14
    
    
      111
       0.538462
       2.000000
       0.970513
       4
       15
    
    
      112
       0.461538
       0.307692
       0.635897
       4
       16
    
    
      113
       0.000000
       0.692308
       0.230769
       4
       17
    
    
      114
       0.000000
       0.000000
       0.000000
       4
       18
    
    
      115
       0.000000
       0.000000
       0.000000
       4
       19
    
    
      116
       0.000000
       0.000000
       0.000000
       4
       20
    
    
      117
       0.000000
       0.000000
       0.000000
       4
       21
    
    
      118
       0.000000
       0.000000
       0.000000
       4
       22
    
    
      119
       0.000000
       0.000000
       0.000000
       4
       23

Now that we've seen how the a GroupBy object works, let's see how we can compute a whole bunch of summary statistics at once. Specifically we want to compute the mean, standard deviation, min, max and several percentiles. First let's create a slightly different GroupBy object.

Now let's define a function that will return a bunch of statistics in a dictionary for a column of data.



In [23]:

    
def get_occstats(group, stub=''):
    return {stub+'count': group.count(), stub+'mean': group.mean(), 
            stub+'min': group.min(),
            stub+'max': group.max(), 'stdev': group.std(), 
            stub+'p50': group.quantile(0.5), stub+'p55': group.quantile(0.55),
            stub+'p60': group.quantile(0.6), stub+'p65': group.quantile(0.65),
            stub+'p70': group.quantile(0.7), stub+'p75': group.quantile(0.75),
            stub+'p80': group.quantile(0.8), stub+'p85': group.quantile(0.85),
            stub+'p90': group.quantile(0.9), stub+'p95': group.quantile(0.95),
            stub+'p975': group.quantile(0.975), 
            stub+'p99': group.quantile(0.99)}



In [39]:

    
bydt_dfgrp2 = bydt_df.groupby(['category','day_of_week','bin_of_day'])

Now we can use the apply function to apply the get_occstats() function to a data series. We'll create separate output data series for occupancy, arrivals and departures.



In [40]:

    
occ_stats = bydt_dfgrp2['occupancy'].apply(get_occstats)
arr_stats = bydt_dfgrp2['arrivals'].apply(get_occstats)
dep_stats = bydt_dfgrp2['departures'].apply(get_occstats)

So, what is occ_stats?



In [26]:

    
type(occ_stats)









    Out[26]:





pandas.core.series.Series

It's a pandas Series object. What does its index look like?



In [27]:

    
occ_stats.index









    Out[27]:





MultiIndex(levels=[['ART', 'CAT', 'IVT', 'MYE', 'OTH'], [0, 1, 2, 3, 4, 5, 6], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], ['count', 'max', 'mean', 'min', 'p50', 'p55', 'p60', 'p65', 'p70', 'p75', 'p80', 'p85', 'p90', 'p95', 'p975', 'p99', 'stdev']],
           labels=[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, ...]],
           names=['category', 'day_of_week', 'bin_of_day', None])

Notice it's a MultiIndex with 4 levels: category, dayofweek, binofday, statistic. It would be nice to "un-pivot" the statistic from the index and have it correspond to a set of columns. That's what unstack() will do. It will leave us with a DataFrame with all of the statistics as columns and a 3 level multi-index of category, dayofweek and binofday. Perfect for plotting.



In [29]:

    
occ_stats.unstack()[100:125]









    Out[29]:






  
    
      
      
      
      count
      max
      mean
      min
      p50
      p55
      p60
      p65
      p70
      p75
      p80
      p85
      p90
      p95
      p975
      p99
      stdev
    
    
      category
      day_of_week
      bin_of_day
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      ART
      4
      4 
       13
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
    
    
      5 
       13
       0.866667
       0.167949
       0.000000
       0.000000
       0.030000
       0.090000
       0.210000
       0.276667
       0.316667
       0.326667
       0.340000
       0.360000
       0.566667
       0.716667
       0.806667
       0.257432
    
    
      6 
       13
       6.033333
       3.471795
       1.283333
       3.333333
       3.503333
       3.693333
       3.923333
       4.246667
       4.616667
       4.736667
       4.846667
       4.936667
       5.393333
       5.713333
       5.905333
       1.371644
    
    
      7 
       13
       8.283333
       6.602564
       2.750000
       7.383333
       7.513333
       7.626667
       7.706667
       7.760000
       7.800000
       8.000000
       8.140000
       8.160000
       8.213333
       8.248333
       8.269333
       1.846153
    
    
      8 
       13
       8.400000
       5.932051
       2.166667
       6.383333
       6.483333
       6.580000
       6.670000
       6.820000
       7.000000
       7.480000
       7.803333
       7.813333
       8.050000
       8.225000
       8.330000
       1.769422
    
    
      9 
       13
       7.433333
       5.807692
       3.100000
       6.016667
       6.206667
       6.340000
       6.360000
       6.626667
       7.016667
       7.026667
       7.103333
       7.313333
       7.403333
       7.418333
       7.427333
       1.332389
    
    
      10
       13
       9.700000
       6.028205
       3.383333
       5.933333
       6.083333
       6.203333
       6.263333
       6.290000
       6.300000
       6.680000
       7.186667
       7.946667
       8.800000
       9.250000
       9.520000
       1.636595
    
    
      11
       13
       8.400000
       6.094872
       4.083333
       5.666667
       6.176667
       6.520000
       6.530000
       6.666667
       6.866667
       7.266667
       7.676667
       8.106667
       8.310000
       8.355000
       8.382000
       1.386081
    
    
      12
       13
       8.000000
       4.975641
       3.366667
       4.800000
       4.920000
       5.070000
       5.280000
       5.356667
       5.366667
       5.736667
       6.076667
       6.356667
       7.070000
       7.535000
       7.814000
       1.283759
    
    
      13
       13
       7.200000
       4.260256
       2.300000
       3.816667
       4.356667
       4.760000
       4.890000
       5.053333
       5.233333
       5.383333
       5.720000
       6.430000
       6.880000
       7.040000
       7.136000
       1.610808
    
    
      14
       13
       5.933333
       2.921795
       1.166667
       2.683333
       2.703333
       2.800000
       3.050000
       3.246667
       3.416667
       3.586667
       3.980000
       4.820000
       5.433333
       5.683333
       5.833333
       1.373347
    
    
      15
       13
       3.233333
       0.970513
       0.000000
       0.916667
       0.966667
       1.066667
       1.266667
       1.346667
       1.366667
       1.466667
       1.546667
       1.586667
       2.253333
       2.743333
       3.037333
       0.887259
    
    
      16
       13
       2.000000
       0.635897
       0.000000
       0.516667
       0.526667
       0.540000
       0.560000
       0.640000
       0.750000
       1.340000
       1.746667
       1.786667
       1.880000
       1.940000
       1.976000
       0.739208
    
    
      17
       13
       0.833333
       0.230769
       0.000000
       0.000000
       0.050000
       0.116667
       0.216667
       0.283333
       0.333333
       0.583333
       0.750000
       0.750000
       0.783333
       0.808333
       0.823333
       0.330113
    
    
      18
       13
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
    
    
      19
       13
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
    
    
      20
       13
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
    
    
      21
       13
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
    
    
      22
       13
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
    
    
      23
       13
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
    
    
      5
      0 
       13
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
    
    
      1 
       13
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
    
    
      2 
       13
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
    
    
      3 
       13
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
    
    
      4 
       13
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000
       0.000000



In [41]:

    
occ_stats_summary = occ_stats.unstack()
arr_stats_summary = arr_stats.unstack()
dep_stats_summary = dep_stats.unstack()



In [32]:

    
print (occ_stats_summary[200:220].values) # Let's peek into the middle of the table.









    



[[  1.30000000e+01   5.00000000e-01   8.33333333e-02   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00   1.00000000e-01   2.16666667e-01
    3.66666667e-01   4.50000000e-01   4.75000000e-01   4.90000000e-01
    1.73472167e-01]
 [  1.30000000e+01   2.35000000e+00   5.34615385e-01   0.00000000e+00
    4.66666667e-01   4.76666667e-01   4.86666667e-01   4.96666667e-01
    5.60000000e-01   6.50000000e-01   6.90000000e-01   7.83333333e-01
    9.83333333e-01   1.57000000e+00   1.96000000e+00   2.19400000e+00
    6.33642525e-01]
 [  1.30000000e+01   6.01666667e+00   3.26410256e+00   1.25000000e+00
    3.10000000e+00   3.26000000e+00   3.41333333e+00   3.55333333e+00
    3.63333333e+00   3.68333333e+00   3.78333333e+00   4.11333333e+00
    4.90333333e+00   5.50666667e+00   5.76166667e+00   5.91466667e+00
    1.26978502e+00]
 [  1.30000000e+01   9.55000000e+00   5.77179487e+00   3.43333333e+00
    5.43333333e+00   5.47333333e+00   5.51000000e+00   5.54000000e+00
    5.90333333e+00   6.43333333e+00   6.69333333e+00   7.14000000e+00
    7.96000000e+00   8.76000000e+00   9.15500000e+00   9.39200000e+00
    1.64928975e+00]
 [  1.30000000e+01   8.13333333e+00   5.15641026e+00   1.16666667e+00
    5.41666667e+00   5.44666667e+00   5.57666667e+00   5.90666667e+00
    6.11666667e+00   6.26666667e+00   6.43666667e+00   6.72333333e+00
    7.24333333e+00   7.70333333e+00   7.91833333e+00   8.04733333e+00
    1.92178710e+00]
 [  1.30000000e+01   7.41666667e+00   4.99871795e+00   1.21666667e+00
    5.20000000e+00   5.27000000e+00   5.39333333e+00   5.62333333e+00
    6.18666667e+00   6.91666667e+00   6.99666667e+00   7.06333333e+00
    7.10333333e+00   7.23666667e+00   7.32666667e+00   7.38066667e+00
    1.87804643e+00]
 [  1.30000000e+01   7.38333333e+00   4.92051282e+00   3.21666667e+00
    4.86666667e+00   5.04666667e+00   5.25666667e+00   5.52666667e+00
    5.71666667e+00   5.86666667e+00   6.07666667e+00   6.23000000e+00
    6.27000000e+00   6.72333333e+00   7.05333333e+00   7.25133333e+00
    1.29336186e+00]
 [  1.30000000e+01   8.55000000e+00   4.81410256e+00   1.71666667e+00
    5.16666667e+00   5.32666667e+00   5.45666667e+00   5.52666667e+00
    5.55666667e+00   5.56666667e+00   6.13666667e+00   6.59000000e+00
    6.81000000e+00   7.55000000e+00   8.05000000e+00   8.35000000e+00
    1.97326865e+00]
 [  1.30000000e+01   6.91666667e+00   3.95128205e+00   2.60000000e+00
    3.20000000e+00   3.44000000e+00   3.73000000e+00   4.12000000e+00
    4.29000000e+00   4.35000000e+00   5.07000000e+00   5.75000000e+00
    6.35000000e+00   6.69666667e+00   6.80666667e+00   6.87266667e+00
    1.49453420e+00]
 [  1.30000000e+01   8.88333333e+00   3.88974359e+00   9.83333333e-01
    3.31666667e+00   3.36666667e+00   3.59333333e+00   4.17333333e+00
    4.70666667e+00   5.21666667e+00   5.35666667e+00   5.65000000e+00
    6.25000000e+00   7.42333333e+00   8.15333333e+00   8.59133333e+00
    2.16674720e+00]
 [  1.30000000e+01   5.08333333e+00   2.66025641e+00   0.00000000e+00
    2.41666667e+00   2.85666667e+00   3.16666667e+00   3.21666667e+00
    3.42666667e+00   3.71666667e+00   4.26666667e+00   4.70666667e+00
    4.92666667e+00   5.03333333e+00   5.05833333e+00   5.07333333e+00
    1.66110932e+00]
 [  1.30000000e+01   3.15000000e+00   2.07820513e+00   7.33333333e-01
    2.06666667e+00   2.09666667e+00   2.14000000e+00   2.21000000e+00
    2.32000000e+00   2.45000000e+00   2.67000000e+00   2.83000000e+00
    2.87000000e+00   2.99000000e+00   3.07000000e+00   3.11800000e+00
    6.77221468e-01]
 [  1.30000000e+01   3.06666667e+00   1.21666667e+00   1.66666667e-01
    1.08333333e+00   1.17333333e+00   1.25000000e+00   1.30000000e+00
    1.31666667e+00   1.31666667e+00   1.32666667e+00   1.45000000e+00
    1.80000000e+00   2.37666667e+00   2.72166667e+00   2.92866667e+00
    6.90779216e-01]
 [  1.30000000e+01   2.56666667e+00   1.24102564e+00   0.00000000e+00
    1.25000000e+00   1.39000000e+00   1.50333333e+00   1.56333333e+00
    1.73666667e+00   1.96666667e+00   1.98666667e+00   2.01333333e+00
    2.05333333e+00   2.26666667e+00   2.41666667e+00   2.50666667e+00
    7.91377590e-01]
 [  1.30000000e+01   1.81666667e+00   4.84615385e-01   0.00000000e+00
    4.16666667e-01   4.16666667e-01   4.33333333e-01   4.83333333e-01
    5.66666667e-01   6.66666667e-01   7.16666667e-01   8.00000000e-01
    9.50000000e-01   1.32666667e+00   1.57166667e+00   1.71866667e+00
    5.03481469e-01]
 [  1.30000000e+01   1.38333333e+00   4.14102564e-01   0.00000000e+00
    2.50000000e-01   4.00000000e-01   5.16666667e-01   5.66666667e-01
    6.50000000e-01   7.50000000e-01   7.50000000e-01   7.70000000e-01
    8.30000000e-01   1.06333333e+00   1.22333333e+00   1.31933333e+00
    4.32630100e-01]
 [  1.30000000e+01   1.63333333e+00   2.42307692e-01   0.00000000e+00
    0.00000000e+00   0.00000000e+00   3.33333333e-03   1.33333333e-02
    1.66666667e-02   1.66666667e-02   4.46666667e-01   7.36666667e-01
    7.46666667e-01   1.10333333e+00   1.36833333e+00   1.52733333e+00
    5.00444959e-01]
 [  1.30000000e+01   7.50000000e-01   1.88461538e-01   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
    1.26666667e-01   3.16666667e-01   5.16666667e-01   6.66666667e-01
    7.16666667e-01   7.40000000e-01   7.45000000e-01   7.48000000e-01
    3.11090124e-01]
 [  1.30000000e+01   9.00000000e-01   1.19230769e-01   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00   4.00000000e-02   1.70000000e-01
    4.80000000e-01   7.10000000e-01   8.05000000e-01   8.62000000e-01
    2.84299509e-01]
 [  1.30000000e+01   5.83333333e-01   7.05128205e-02   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   6.66666667e-02
    2.66666667e-01   4.33333333e-01   5.08333333e-01   5.53333333e-01
    1.79525332e-01]]

Let's fire these guys out to csv files so we can check them out and maybe play with them in spreadsheet.



In [33]:

    
file_occ_csv = 'data/occ_stats_' + scenario_name + '.csv'
file_arr_csv = 'data/arr_stats_' + scenario_name + '.csv'
file_dep_csv = 'data/dep_stats_' + scenario_name + '.csv'

occ_stats_summary.to_csv('data/occ_stats_summary.csv')
arr_stats_summary.to_csv('data/arr_stats_summary.csv')
dep_stats_summary.to_csv('data/dep_stats_summary.csv')

Put it all together

Below I've strung together all the pieces to do an entire Hillmaker run. Change inputs as needed (e.g. scenario_name and associated parameter values) and run all the cells below. You can skip rereading the main input file if that isn't changing.

Read main stop data file



In [42]:

    
file_stopdata = 'data/ShortStay.csv'
df = pd.read_csv(file_stopdata, parse_dates=['InRoomTS','OutRoomTS'])

Set input parameters



In [43]:

    
scenario_name = 'sstest_60'
in_fld_name = 'InRoomTS'
out_fld_name = 'OutRoomTS'
cat_fld_name = 'PatType'
start_analysis = '1/2/1996'
end_analysis = '3/31/1996 23:45'
bin_size_mins = 60

# This next field wasn't in original Hillmaker. Use it to specify the name to use for the overall totals.
tot_fld_name = 'Total'

## Convert string dates to actual datetimes
start_analysis_dt = pd.Timestamp(start_analysis)
end_analysis_dt = pd.Timestamp(end_analysis)

Create the by datetime table



In [44]:

    
bydt_df = bydatetime.make_bydatetime(df,
                                     in_fld_name,
                                     out_fld_name,
                                     cat_fld_name,
                                     start_analysis_dt,
                                     end_analysis_dt,
                                     tot_fld_name,
                                     bin_size_mins)









    



rng_bydt created: 0.0009996110002248315
found unique categories: 0.005060685000898957
Seeded bydatetime DataFrame created: 0.03853595900000073
dayofweek, bin_of_day, bin_of_week computed: 0.3139086690007389
Multi-index on bydatetime DataFrame created: 0.3179300960000546
Multi-index fully lexsorted: 0.32116212800065114
Num inner: 19793
Done processing 19793 stop recs: 15.905689233000885

Compute summary stats



In [23]:

    
def get_occstats(group, stub=''):
    return {stub+'count': group.count(), stub+'mean': group.mean(), 
            stub+'min': group.min(),
            stub+'max': group.max(), 'stdev': group.std(), 
            stub+'p50': group.quantile(0.5), stub+'p55': group.quantile(0.55),
            stub+'p60': group.quantile(0.6), stub+'p65': group.quantile(0.65),
            stub+'p70': group.quantile(0.7), stub+'p75': group.quantile(0.75),
            stub+'p80': group.quantile(0.8), stub+'p85': group.quantile(0.85),
            stub+'p90': group.quantile(0.9), stub+'p95': group.quantile(0.95),
            stub+'p975': group.quantile(0.975), 
            stub+'p99': group.quantile(0.99)}



In [45]:

    
bydt_dfgrp2 = bydt_df.groupby(['category','day_of_week','bin_of_day'])

occ_stats = bydt_dfgrp2['occupancy'].apply(get_occstats)
arr_stats = bydt_dfgrp2['arrivals'].apply(get_occstats)
dep_stats = bydt_dfgrp2['departures'].apply(get_occstats)

occ_stats_summary = occ_stats.unstack()
arr_stats_summary = arr_stats.unstack()
dep_stats_summary = dep_stats.unstack()

Write summaries and by datetime out to CSV



In [46]:

    
file_bydt_csv = 'data/bydate_' + scenario_name + '.csv'
bydt_df.to_csv(file_bydt_csv)

file_occ_csv = 'data/occ_stats_' + scenario_name + '.csv'
file_arr_csv = 'data/arr_stats_' + scenario_name + '.csv'
file_dep_csv = 'data/dep_stats_' + scenario_name + '.csv'

occ_stats_summary.to_csv('data/occ_stats_summary.csv')
arr_stats_summary.to_csv('data/arr_stats_summary.csv')
dep_stats_summary.to_csv('data/dep_stats_summary.csv')

	PatID	InRoomTS	OutRoomTS	PatType
0	1	1996-01-01 07:44:00	1996-01-01 08:50:00	IVT
1	2	1996-01-01 08:28:00	1996-01-01 09:20:00	IVT
2	3	1996-01-01 11:44:00	1996-01-01 13:30:00	MYE
3	4	1996-01-01 11:51:00	1996-01-01 12:55:00	CAT
4	5	1996-01-01 12:10:00	1996-01-01 13:00:00	IVT

		category	datetime	arrivals	departures	occupancy	day_of_week	bin_of_day	bin_of_week
category	datetime
ART	1996-01-02 00:00:00	ART	1996-01-02 00:00:00	0	0	0	1	0	24
	1996-01-02 01:00:00	ART	1996-01-02 01:00:00	0	0	0	1	1	25
	1996-01-02 02:00:00	ART	1996-01-02 02:00:00	0	0	0	1	2	26
	1996-01-02 03:00:00	ART	1996-01-02 03:00:00	0	0	0	1	3	27
	1996-01-02 04:00:00	ART	1996-01-02 04:00:00	0	0	0	1	4	28

		arrivals	departures	occupancy	day_of_week	bin_of_day
category	bin_of_week
ART	0	0.000000	0.000000	0.000000	0	0
	1	0.000000	0.000000	0.000000	0	1
	2	0.000000	0.000000	0.000000	0	2
	3	0.000000	0.000000	0.000000	0	3
	4	0.000000	0.000000	0.000000	0	4
	5	1.000000	0.000000	0.309722	0	5
	6	5.916667	0.000000	4.154167	0	6
	7	3.583333	4.916667	6.440278	0	7
	8	4.166667	3.833333	5.390278	0	8
	9	2.916667	4.083333	5.215278	0	9
	10	3.166667	3.000000	4.443056	0	10
	11	2.750000	3.333333	4.855556	0	11
	12	3.000000	3.500000	3.987500	0	12
	13	2.000000	2.666667	3.558333	0	13
	14	0.583333	2.583333	2.095833	0	14
	15	0.333333	1.000000	0.668056	0	15
	16	0.000000	0.333333	0.319444	0	16
	17	0.083333	0.250000	0.090278	0	17
	18	0.000000	0.000000	0.000000	0	18
	19	0.000000	0.000000	0.000000	0	19
	20	0.000000	0.000000	0.000000	0	20
	21	0.000000	0.000000	0.000000	0	21
	22	0.000000	0.000000	0.000000	0	22
	23	0.000000	0.000000	0.000000	0	23
	24	0.000000	0.000000	0.000000	1	0
	25	0.000000	0.000000	0.000000	1	1
	26	0.000000	0.000000	0.000000	1	2
	27	0.000000	0.000000	0.000000	1	3
	28	0.000000	0.000000	0.000000	1	4
	29	0.000000	0.000000	0.000000	1	5
...	...	...	...	...	...	...
OTH	138	0.000000	0.000000	0.000000	5	18
	139	0.000000	0.000000	0.000000	5	19
	140	0.000000	0.000000	0.000000	5	20
	141	0.000000	0.000000	0.000000	5	21
	142	0.000000	0.000000	0.000000	5	22
	143	0.000000	0.000000	0.000000	5	23
	144	0.000000	0.000000	0.000000	6	0
	145	0.000000	0.000000	0.000000	6	1
	146	0.000000	0.000000	0.000000	6	2
	147	0.000000	0.000000	0.000000	6	3
	148	0.000000	0.000000	0.000000	6	4
	149	0.000000	0.000000	0.000000	6	5
	150	0.000000	0.000000	0.000000	6	6
	151	0.000000	0.000000	0.000000	6	7
	152	0.000000	0.000000	0.000000	6	8
	153	0.000000	0.000000	0.000000	6	9
	154	0.000000	0.000000	0.000000	6	10
	155	0.000000	0.000000	0.000000	6	11
	156	0.000000	0.000000	0.000000	6	12
	157	0.000000	0.000000	0.000000	6	13
	158	0.000000	0.000000	0.000000	6	14
	159	0.000000	0.000000	0.000000	6	15
	160	0.000000	0.000000	0.000000	6	16
	161	0.000000	0.000000	0.000000	6	17
	162	0.000000	0.000000	0.000000	6	18
	163	0.000000	0.000000	0.000000	6	19
	164	0.000000	0.000000	0.000000	6	20
	165	0.000000	0.000000	0.000000	6	21
	166	0.000000	0.000000	0.000000	6	22
	167	0.000000	0.000000	0.000000	6	23

			count	max	mean	min	p50	p55	p60	p65	p70	p75	p80	p85	p90	p95	p975	p99	stdev
category	day_of_week	bin_of_day
ART	4	4	13	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
		5	13	0.866667	0.167949	0.000000	0.000000	0.030000	0.090000	0.210000	0.276667	0.316667	0.326667	0.340000	0.360000	0.566667	0.716667	0.806667	0.257432
		6	13	6.033333	3.471795	1.283333	3.333333	3.503333	3.693333	3.923333	4.246667	4.616667	4.736667	4.846667	4.936667	5.393333	5.713333	5.905333	1.371644
		7	13	8.283333	6.602564	2.750000	7.383333	7.513333	7.626667	7.706667	7.760000	7.800000	8.000000	8.140000	8.160000	8.213333	8.248333	8.269333	1.846153
		8	13	8.400000	5.932051	2.166667	6.383333	6.483333	6.580000	6.670000	6.820000	7.000000	7.480000	7.803333	7.813333	8.050000	8.225000	8.330000	1.769422
		9	13	7.433333	5.807692	3.100000	6.016667	6.206667	6.340000	6.360000	6.626667	7.016667	7.026667	7.103333	7.313333	7.403333	7.418333	7.427333	1.332389
		10	13	9.700000	6.028205	3.383333	5.933333	6.083333	6.203333	6.263333	6.290000	6.300000	6.680000	7.186667	7.946667	8.800000	9.250000	9.520000	1.636595
		11	13	8.400000	6.094872	4.083333	5.666667	6.176667	6.520000	6.530000	6.666667	6.866667	7.266667	7.676667	8.106667	8.310000	8.355000	8.382000	1.386081
		12	13	8.000000	4.975641	3.366667	4.800000	4.920000	5.070000	5.280000	5.356667	5.366667	5.736667	6.076667	6.356667	7.070000	7.535000	7.814000	1.283759
		13	13	7.200000	4.260256	2.300000	3.816667	4.356667	4.760000	4.890000	5.053333	5.233333	5.383333	5.720000	6.430000	6.880000	7.040000	7.136000	1.610808
		14	13	5.933333	2.921795	1.166667	2.683333	2.703333	2.800000	3.050000	3.246667	3.416667	3.586667	3.980000	4.820000	5.433333	5.683333	5.833333	1.373347
		15	13	3.233333	0.970513	0.000000	0.916667	0.966667	1.066667	1.266667	1.346667	1.366667	1.466667	1.546667	1.586667	2.253333	2.743333	3.037333	0.887259
		16	13	2.000000	0.635897	0.000000	0.516667	0.526667	0.540000	0.560000	0.640000	0.750000	1.340000	1.746667	1.786667	1.880000	1.940000	1.976000	0.739208
		17	13	0.833333	0.230769	0.000000	0.000000	0.050000	0.116667	0.216667	0.283333	0.333333	0.583333	0.750000	0.750000	0.783333	0.808333	0.823333	0.330113
		18	13	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
		19	13	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
		20	13	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
		21	13	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
		22	13	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
		23	13	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
	5	0	13	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
		1	13	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
		2	13	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
		3	13	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
		4	13	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000