In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [29]:
import numpy as np
from numpy.random import randn
import pandas as pd
from scipy import stats
import matplotlib as mpl
import matplotlib.pyplot as plt
#import seaborn as sns

from pandas import Timestamp
from datetime import datetime
from datetime import timedelta

import re

import hillmaker as hm

In [4]:
file_stopdata = 'data/unit_stop_log_Experiment1_Scenario1_Rep1.csv'

scenario_name = 'log_unitocc_test_steadystate'
in_fld_name = 'EnteredTS'
out_fld_name = 'ExitedTS'
cat_fld_name = 'Unit'
start_analysis = '6/24/2015 00:00'
end_analysis = '6/16/2016 00:00'

# Optional inputs

tot_fld_name = 'OBTot'
bin_size_mins = 1440
includecats = ['LDR','PP']

In [5]:
rx = re.compile(r'Scenario([0-9]){1,3}_Rep([0-9]){1,2}')

In [7]:
stops_df = pd.read_csv(file_stopdata,index_col=0)
basedate = Timestamp('20150215 00:00:00')
stops_df['EnteredTS'] = stops_df.apply(lambda row:
                           Timestamp(round((basedate + pd.DateOffset(hours=row['Entered'])).value,-9)), axis=1)

stops_df['ExitedTS'] = stops_df.apply(lambda row:
                          Timestamp(round((basedate + pd.DateOffset(hours=row['Exited'])).value,-9)), axis=1)

stops_df = stops_df[stops_df[cat_fld_name].isin(includecats)]

start_analysis_dt = pd.Timestamp(start_analysis)
end_analysis_dt = pd.Timestamp(end_analysis)

stops_df.shape


Out[7]:
(33036, 15)

In [8]:
# Now filter the stops dataframe so only records that will count for occupancy are included. This
# will allow us to analyze the blocking related columns directly (since no blocking during warmup will be included)
stops_df = stops_df[(stops_df['EnteredTS'] <= end_analysis_dt) & (stops_df['ExitedTS'] >= start_analysis_dt)]
stops_df.shape


Out[8]:
(11765, 15)

In [9]:
m = re.search(rx, file_stopdata)

scenario_num = m.group(1)
rep_num = m.group(2)

print (scenario_num, rep_num)


1 1

In [66]:
def dt_floor_original(dt, binsizemins):
    """
   Find floor of a datetime object to specified number of minutes.
   
   dt : Pandas Timestamp object
   floor_minutes : Closest number of minutes to round to.
   """
    #nsmin=minutes*60*1000000000   # 5 minutes in nanoseconds
    #totns = dt.total_seconds*1000000000
    #pd.DatetimeIndex(((df.index.astype(np.int64) // ns5min + 1 ) * ns5min))
    #ns = (totns // nsmin) * nsmin
    #return pd.to_datetime(ns, unit='ns')
    floor_seconds = binsizemins * 60
    #dateDelta=pd.Timedelta(minutes=binsizemins)
    #floor_seconds = dateDelta.total_seconds()
    dt_date = Timestamp(dt.date())
    delta = dt - dt_date
    # #print(delta)
    tot_seconds = (dt - dt_date).seconds
    # #print(tot_seconds)
    #
    floor_time = (tot_seconds // floor_seconds) * floor_seconds
    # #print(floor_time)
    # #gap_seconds = tot_seconds - floor_time
    # #print(dt_date + pd.DateOffset(seconds=floor_time))
    return dt_date + pd.DateOffset(seconds=floor_time)
    #return dt + pd.Timedelta(seconds=floor_time)

def dt_floor(dt, binsizemins=60):
    """Round a datetime object to a multiple of a timedelta
    dt : datetime.datetime object, default now.
    dateDelta : timedelta object, we round to a multiple of this, default 1 minute.
    Author: Thierry Husson 2012 - Use it as you want but don't blame me.
            Stijn Nevens 2014 - Changed to use only datetime objects as variables
    """
    dateDelta=pd.Timedelta(minutes=binsizemins)
    roundTo = dateDelta.total_seconds()


    totseconds = (dt - dt.min).seconds
    # // is a floor division, not a comment on following line:
    rounding = (totseconds // roundTo) * roundTo
    return dt + timedelta(0,rounding-totseconds, -dt.microsecond)

In [12]:
testdate = Timestamp('20150215 14:24:00')

In [76]:
%load_ext Cython

In [79]:
%%cython
import pandas as pd
from pandas import Timestamp

def dt_floor_cython(dt, binsizemins):
    """
   Find floor of a datetime object to specified number of minutes.
   
   dt : Pandas Timestamp object
   floor_minutes : Closest number of minutes to round to.
   """
    #nsmin=minutes*60*1000000000   # 5 minutes in nanoseconds
    #totns = dt.total_seconds*1000000000
    #pd.DatetimeIndex(((df.index.astype(np.int64) // ns5min + 1 ) * ns5min))
    #ns = (totns // nsmin) * nsmin
    #return pd.to_datetime(ns, unit='ns')
    floor_seconds = binsizemins * 60
    #dateDelta=pd.Timedelta(minutes=binsizemins)
    #floor_seconds = dateDelta.total_seconds()
    dt_date = Timestamp(dt.date())
    delta = dt - dt_date
    # #print(delta)
    tot_seconds = (dt - dt_date).seconds
    # #print(tot_seconds)
    #
    floor_time = (tot_seconds // floor_seconds) * floor_seconds
    # #print(floor_time)
    # #gap_seconds = tot_seconds - floor_time
    # #print(dt_date + pd.DateOffset(seconds=floor_time))
    return dt_date + pd.DateOffset(seconds=floor_time)
    #return dt + pd.Timedelta(seconds=floor_time)

In [50]:
testdate.minute


Out[50]:
24

In [38]:
dateDelta = timedelta(minutes=60)

In [39]:
dateDelta


Out[39]:
datetime.timedelta(0, 3600)

In [40]:
dateDeltaPD = pd.Timedelta(minutes=60)

In [41]:
dateDeltaPD


Out[41]:
Timedelta('0 days 01:00:00')

In [24]:
%%timeit
x=5
x^2


The slowest run took 29.09 times longer than the fastest. This could mean that an intermediate result is being cached 
10000000 loops, best of 3: 85.9 ns per loop

In [59]:
dt_floor(testdate,60)


Out[59]:
Timestamp('2015-02-15 14:12:44')

In [80]:
%timeit dt_floor_original(testdate,1440)


The slowest run took 62.47 times longer than the fastest. This could mean that an intermediate result is being cached 
10000 loops, best of 3: 77.2 µs per loop

In [81]:
%timeit dt_floor_cython(testdate,1440)


The slowest run took 5.32 times longer than the fastest. This could mean that an intermediate result is being cached 
1000 loops, best of 3: 74.8 µs per loop

In [8]:
fn_bydatetime = 'testing/bydatetime_' + scenario_name + '.csv'
fn_occ_summary = 'testing/occ_stats_summary_' + scenario_name + '.csv'
fn_arr_summary = 'testing/arr_stats_summary_' + scenario_name + '.csv'
fn_dep_summary = 'testing/dep_stats_summary_' + scenario_name + '.csv'

In [82]:
%prun -l 50 hm.run_hillmaker(scenario_name,stops_df,in_fld_name, out_fld_name,cat_fld_name,start_analysis,end_analysis,tot_fld_name,bin_size_mins,categories=includecats,outputpath='./testing')


min of intime: 2015-06-12 17:06:07
max of outtime: 2016-06-27 20:40:20
rng_bydt created: 0.0544
using specified categories: 0.0910
Seeded bydatetime DataFrame created: 0.1749
dayofweek, bin_of_day, bin_of_week computed: 0.4160
Multi-index on bydatetime DataFrame created: 0.4255
Multi-index fully lexsorted: 0.4301
Latest edits at 2015-07-08 16:44:12.359813
Num inner: 11686
{'inner': 11686, 'right': 44, 'left': 35}
Done processing 11765 stop recs: 21.1049
Done adding totals: 21.2154
Done with summary stats: 173.354698
Done with overall summary stats: 173.687779
 

5368175 function calls (5360007 primitive calls) in 17.618 seconds

Ordered by: internal time List reduced from 955 to 10 due to restriction <10>

ncalls tottime percall cumtime percall filename:lineno(function) 70524 4.363 0.000 8.160 0.000 hillpylib.py:97(dt_floor) 17631 1.436 0.000 5.527 0.000 hillpylib.py:146(occ_frac) 70524 1.129 0.000 1.235 0.000 offsets.py:198(apply) 1 0.871 0.871 16.276 16.276 bydatetime.py:25(make_bydatetime) 70526 0.800 0.000 1.124 0.000 offsets.py:177(_determine_offset) 17631 0.749 0.000 0.780 0.000 hillpylib.py:136(numbins) 70524 0.648 0.000 2.123 0.000 offsets.py:44(wrapper) 68659 0.494 0.000 0.494 0.000 {method 'set_value' of 'pandas.index.IndexEngine' objects} 70968 0.476 0.000 0.480 0.000 {method 'get_value' of 'pandas.index.IndexEngine' objects} 68659 0.466 0.000 1.868 0.000 indexing.py:1518(setitem)


In [40]:
hm.run_hillmaker(scenario_name,stops_df,in_fld_name, out_fld_name,cat_fld_name,start_analysis,end_analysis,tot_fld_name,bin_size_mins,categories=includecats,outputpath='./testing')


min of intime: 2015-06-12 17:06:07
max of outtime: 2016-06-27 20:40:20
rng_bydt created: 0.0038
using specified categories: 0.0110
Seeded bydatetime DataFrame created: 0.0277
dayofweek, bin_of_day, bin_of_week computed: 0.0621
Multi-index on bydatetime DataFrame created: 0.0652
Multi-index fully lexsorted: 0.0692
Latest edits at 2015-07-07 09:09:59.825809
Num inner: 17552
{'left': 35, 'inner': 17552, 'right': 44}
Done processing 17631 stop recs: 13.7587
Done adding totals: 13.7851
Done with summary stats: 110.532059
Done with overall summary stats: 110.667795

In [9]:
occ_df = pd.read_csv(fn_occ_summary)

In [10]:
bydt_df = pd.read_csv(fn_bydatetime)

In [11]:
def num_gt_0(column):
    return (column != 0).sum()

def get_stats(group, stub=''):
    return {stub+'count': group.count(), stub+'mean': group.mean(),
            stub+'min': group.min(), stub+'num_gt_0': num_gt_0(group),
            stub+'max': group.max(), stub+'stdev': group.std(), 
            stub+'p01': group.quantile(0.01), stub+'p025': group.quantile(0.025),
            stub+'p05': group.quantile(0.05), stub+'p25': group.quantile(0.25),
            stub+'p50': group.quantile(0.5), stub+'p75': group.quantile(0.75),
            stub+'p90': group.quantile(0.9), stub+'p95': group.quantile(0.95),
            stub+'p975': group.quantile(0.975), stub+'p99': group.quantile(0.99)}

In [14]:
pp_occ = bydt_df[(bydt_df['category'] == 'PP')]['occupancy']

In [15]:
pp_occ.describe()


Out[15]:
count    359.000000
mean      35.928327
std        4.930759
min       22.050231
25%       32.463605
50%       35.783588
75%       39.436146
max       46.000000
Name: occupancy, dtype: float64

In [16]:
get_stats(pp_occ)


Out[16]:
{'count': 359,
 'max': 46.0,
 'mean': 35.9283265629836,
 'min': 22.050231481481489,
 'num_gt_0': 359,
 'p01': 25.770185648148146,
 'p025': 26.367486111111116,
 'p05': 27.656700231481484,
 'p25': 32.463605324074081,
 'p50': 35.783587962962962,
 'p75': 39.436145833333327,
 'p90': 42.362648148148139,
 'p95': 44.468702546296292,
 'p975': 45.347137152777769,
 'p99': 45.783168518518508,
 'stdev': 4.9307588888398506}

In [17]:
ldr_occ = bydt_df[(bydt_df['category'] == 'LDR')]['occupancy']

In [22]:
ldr_occ.describe()


Out[22]:
count    359.000000
mean       8.160660
std        2.162119
min        0.000000
25%        6.568762
50%        8.158715
75%        9.571510
max       13.000000
Name: occupancy, dtype: float64

In [15]:
get_stats(ldr_occ)


Out[15]:
{'count': 359,
 'max': 12.999999999999998,
 'mean': 8.1606601735788704,
 'min': 0.0,
 'num_gt_0': 358,
 'p01': 3.3891692129629636,
 'p025': 4.0458969907407418,
 'p05': 4.772981481481481,
 'p25': 6.5687615740740739,
 'p50': 8.1587152777777785,
 'p75': 9.5715104166666656,
 'p90': 11.049131944444444,
 'p95': 11.785155092592589,
 'p975': 12.445212384259259,
 'p99': 12.860917592592592,
 'stdev': 2.1621193043684652}

In [15]:
plt.hist(pp_occ.values,12)


Out[15]:
(array([  2.,   4.,  16.,  26.,  28.,  50.,  62.,  42.,  54.,  31.,  23.,
         21.]), array([ 22.05023148,  24.04604552,  26.04185957,  28.03767361,
         30.03348765,  32.0293017 ,  34.02511574,  36.02092978,
         38.01674383,  40.01255787,  42.00837191,  44.00418596,  46.        ]), <a list of 12 Patch objects>)

In [16]:
plt.hist(ldr_occ.values,20)


Out[16]:
(array([  1.,   0.,   0.,   1.,   1.,   3.,  11.,   9.,  22.,  39.,  29.,
         44.,  42.,  38.,  37.,  21.,  25.,  17.,   8.,  11.]),
 array([  0.  ,   0.65,   1.3 ,   1.95,   2.6 ,   3.25,   3.9 ,   4.55,
          5.2 ,   5.85,   6.5 ,   7.15,   7.8 ,   8.45,   9.1 ,   9.75,
         10.4 ,  11.05,  11.7 ,  12.35,  13.  ]),
 <a list of 20 Patch objects>)

In [17]:
bydt_df.head()


Out[17]:
category datetime arrivals departures occupancy day_of_week bin_of_day bin_of_week
0 LDR 2015-06-24 17 12 8.436771 2 0 2
1 LDR 2015-06-25 15 18 7.693773 3 0 3
2 LDR 2015-06-26 22 21 9.906759 4 0 4
3 LDR 2015-06-27 11 15 6.563241 5 0 5
4 LDR 2015-06-28 14 15 4.467477 6 0 6

In [10]:
sns.tsplot(pp_occ);



In [32]:
pp_occ.head()


Out[32]:
946    41.985243
947    40.804190
948    39.997558
949    40.432025
950    35.925370
Name: occupancy, dtype: float64

In [33]:
occ_df


Out[33]:
category day_of_week bin_of_day count max mean min p50 p55 p60 p65 p70 p75 p80 p85 p90 p95 p975 p99 stdev
0 LDR 0 0 135 12.891366 7.984419 2.796759 7.720822 8.228215 8.595220 8.831941 9.171532 9.397662 9.869197 10.489184 11.033111 11.783839 12.132996 12.554980 2.181408
1 LDR 1 0 135 13.000000 8.230036 3.474745 8.092384 8.242804 8.470944 8.932141 9.299539 9.580023 9.918157 10.352272 10.915093 11.597917 12.728136 12.902694 2.046906
2 LDR 2 0 136 12.918009 8.144066 2.000000 8.163414 8.338695 8.533981 8.961140 9.328594 9.610564 10.099630 10.491646 10.854664 11.841606 12.358048 12.786104 2.121306
3 LDR 3 0 135 12.875243 8.408280 3.629352 8.463785 8.773997 9.020667 9.242135 9.678604 9.956875 10.488127 10.782909 11.263389 11.737558 12.120885 12.384748 2.098490
4 LDR 4 0 135 12.850544 8.424046 2.375741 8.401725 8.836523 9.129600 9.381046 9.591648 9.927517 10.293407 10.810131 11.301012 11.583255 12.083503 12.166122 2.142989
5 LDR 5 0 135 12.730498 8.138321 3.427373 7.931053 8.195449 8.535616 8.873404 9.068139 9.482662 9.733801 10.411692 10.906898 11.534293 11.949912 12.477259 1.940512
6 LDR 6 0 135 12.798495 8.308331 3.461505 8.316933 8.514780 8.731028 9.122241 9.315606 9.656620 10.069782 10.488385 11.100748 11.840792 12.359170 12.695952 2.039818
7 OBTot 0 0 135 58.312072 44.240580 32.207986 44.938137 45.654442 46.086981 46.621472 47.173657 47.777378 49.067007 50.312773 51.346285 53.838125 55.698014 56.525428 5.763216
8 OBTot 1 0 135 59.000000 44.509344 29.828657 45.239167 46.273299 46.602109 47.166617 47.507778 48.425787 49.093884 50.310515 52.097472 53.868200 54.859369 56.294068 5.859946
9 OBTot 2 0 136 58.326308 44.650573 29.872940 45.185909 46.155315 46.753565 47.937468 48.680897 49.369057 50.428646 50.892355 52.050064 53.626707 56.587617 57.801300 6.279447
10 OBTot 3 0 135 56.434688 44.321968 25.876238 44.813333 46.170649 46.513352 46.912650 47.862970 48.794352 50.148699 51.394428 52.081204 54.238525 55.475351 56.050589 6.477675
11 OBTot 4 0 135 56.045845 44.151101 30.947407 43.724873 44.343772 45.694400 46.483206 47.833481 49.680961 50.452731 51.791394 52.767613 53.961748 54.867369 55.274939 6.284958
12 OBTot 5 0 135 57.769444 44.115600 28.197037 44.696157 45.227959 46.116780 46.538221 47.207935 47.916794 49.094421 49.684709 51.029664 52.394648 53.374421 55.014473 5.437980
13 OBTot 6 0 135 57.201782 44.186353 32.765845 43.902824 44.450814 45.144995 46.361491 47.691912 48.499120 49.038299 51.020660 51.865164 53.340824 54.413748 55.862120 5.687215
14 PP 0 0 135 45.898113 36.256160 22.002211 36.118796 37.021725 38.002609 38.538626 38.903889 39.773322 41.288799 41.744350 42.642273 44.549983 45.260234 45.658693 5.020267
15 PP 1 0 135 46.000000 36.279308 24.317338 36.715498 37.809584 38.390900 38.881549 39.575275 39.927106 40.509484 41.464398 42.331120 45.041851 45.858452 45.957411 5.204713
16 PP 2 0 136 45.942662 36.506508 22.880706 36.855712 37.409022 38.495382 38.999158 40.035203 41.107538 41.975868 42.440692 43.167083 44.152859 45.396745 45.598332 5.430886
17 PP 3 0 135 46.000000 35.913688 21.342292 36.023796 36.743223 37.289817 38.302381 39.357468 40.441128 40.871049 41.399215 42.815160 44.673609 45.369503 45.920645 5.552446
18 PP 4 0 135 45.691690 35.727056 23.001354 35.308588 36.194297 36.983352 37.649626 38.955292 40.236007 41.019296 42.059389 42.757160 43.982604 45.339833 45.647387 5.264489
19 PP 5 0 135 45.952465 35.977279 20.479213 36.260440 36.824409 37.562956 38.360508 39.253412 40.041192 40.583894 41.566420 42.727738 44.126557 45.072064 45.556062 5.137616
20 PP 6 0 135 46.000000 35.878022 22.288310 35.657662 36.376359 36.938812 38.335972 38.771775 39.187645 40.141877 40.800571 43.074398 44.549737 45.264345 45.796022 5.048644

Computing occupancy statistics

Need to compute a bunch of output stats to use for visualization, metamodeling and to evaluate scenarios.


In [18]:
ldr_occ_stats = get_stats(ldr_occ)
pp_occ_stats = get_stats(pp_occ)

In [26]:
grp_all = stops_df.groupby(['Unit'])

In [28]:
blocked_uncond_stats = grp_all['Entered_TriedToEnter'].apply(get_stats,'delay_')
blocked_uncond_stats


Out[28]:
Unit                
LDR   delay_count       5871.000000
      delay_max           10.393432
      delay_mean           0.216498
      delay_min            0.000000
      delay_num_gt_0     518.000000
      delay_p01            0.000000
      delay_p025           0.000000
      delay_p05            0.000000
      delay_p25            0.000000
      delay_p50            0.000000
      delay_p75            0.000000
      delay_p90            0.000000
      delay_p95            1.532182
      delay_p975           3.138912
      delay_p99            5.734486
      delay_stdev          0.965574
Obs   delay_count       5866.000000
      delay_max            0.000000
      delay_mean           0.000000
      delay_min            0.000000
      delay_num_gt_0       0.000000
      delay_p01            0.000000
      delay_p025           0.000000
      delay_p05            0.000000
      delay_p25            0.000000
      delay_p50            0.000000
      delay_p75            0.000000
      delay_p90            0.000000
      delay_p95            0.000000
      delay_p975           0.000000
      delay_p99            0.000000
      delay_stdev          0.000000
PP    delay_count       5894.000000
      delay_max           12.390646
      delay_mean           0.157927
      delay_min            0.000000
      delay_num_gt_0     306.000000
      delay_p01            0.000000
      delay_p025           0.000000
      delay_p05            0.000000
      delay_p25            0.000000
      delay_p50            0.000000
      delay_p75            0.000000
      delay_p90            0.000000
      delay_p95            0.112590
      delay_p975           2.299069
      delay_p99            5.088920
      delay_stdev          0.924263
dtype: float64

In [29]:
grp_blocked = stops_df[(stops_df['Entered_TriedToEnter'] > 0)].groupby(['Unit'])

In [30]:
blocked_cond_stats = grp_blocked['Entered_TriedToEnter'].apply(get_stats,'delay_')
blocked_cond_stats


Out[30]:
Unit                
LDR   delay_count       518.000000
      delay_max          10.393432
      delay_mean          2.453789
      delay_min           0.002380
      delay_num_gt_0    518.000000
      delay_p01           0.025957
      delay_p025          0.057112
      delay_p05           0.132183
      delay_p25           0.675939
      delay_p50           1.830630
      delay_p75           3.422857
      delay_p90           6.155394
      delay_p95           7.230414
      delay_p975          8.136480
      delay_p99           9.678853
      delay_stdev         2.255046
PP    delay_count       306.000000
      delay_max          12.390646
      delay_mean          3.041901
      delay_min           0.007135
      delay_num_gt_0    306.000000
      delay_p01           0.032551
      delay_p025          0.061621
      delay_p05           0.186629
      delay_p25           0.780411
      delay_p50           2.180326
      delay_p75           4.310726
      delay_p90           6.909315
      delay_p95           8.817371
      delay_p975         10.462178
      delay_p99          11.956207
      delay_stdev         2.775599
dtype: float64

In [23]:
blocked_cond_stats.index


Out[23]:
MultiIndex(levels=[[1, 2], ['LDR', 'PP'], ['test_count', 'test_max', 'test_mean', 'test_min', 'test_num_gt_0', 'test_p01', 'test_p025', 'test_p05', 'test_p25', 'test_p50', 'test_p75', 'test_p90', 'test_p95', 'test_p975', 'test_p99', 'test_stdev']],
           labels=[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]],
           names=['PatientType', 'Unit', None])

In [24]:
blocked_cond_stats[(1,'LDR','test_mean')]


Out[24]:
2.4305144504988401

Create dictionary to append to result data frame

Each new record (a scenario and rep) is stored in a dictionary. Then append the dictionary to the results dataframe.


In [31]:
newrec = {}
newrec['scenario'] = scenario_num
newrec['rep'] = rep_num

newrec['occ_mean_ldr'] = ldr_occ_stats['mean']
newrec['occ_p05_ldr'] = ldr_occ_stats['p05']
newrec['occ_p25_ldr'] = ldr_occ_stats['p25']
newrec['occ_p50_ldr'] = ldr_occ_stats['p50']
newrec['occ_p75_ldr'] = ldr_occ_stats['p75']
newrec['occ_p95_ldr'] = ldr_occ_stats['p95']

newrec['occ_mean_pp'] = pp_occ_stats['mean']
newrec['occ_p05_pp'] = pp_occ_stats['p05']
newrec['occ_p25_pp'] = pp_occ_stats['p25']
newrec['occ_p50_pp'] = pp_occ_stats['p50']
newrec['occ_p75_pp'] = pp_occ_stats['p75']
newrec['occ_p95_pp'] = pp_occ_stats['p95']

newrec['pct_waitq_ldr'] = blocked_uncond_stats[('LDR','delay_num_gt_0')]/blocked_uncond_stats[('LDR','delay_count')]
newrec['waitq_ldr_mean'] = blocked_cond_stats[('LDR','delay_mean')]
newrec['waitq_ldr_p95'] = blocked_cond_stats[('LDR','delay_p95')]

newrec['pct_blocked_ldr'] = blocked_uncond_stats[('PP','delay_num_gt_0')]/blocked_uncond_stats[('PP','delay_count')]
newrec['blocked_ldr_mean'] = blocked_cond_stats[('PP','delay_mean')]
newrec['blocked_ldr_p95'] = blocked_cond_stats[('PP','delay_p95')]

print(newrec)


{'occ_mean_pp': 35.9283265629836, 'blocked_ldr_mean': 3.0419013564409974, 'scenario': '1', 'occ_p95_ldr': 11.785155092592589, 'pct_blocked_ldr': 0.051917203936206309, 'occ_mean_ldr': 8.1606601735788704, 'rep': '1', 'waitq_ldr_mean': 2.4537886287572235, 'occ_p05_pp': 27.656700231481484, 'occ_p75_ldr': 9.5715104166666656, 'occ_p95_pp': 44.468702546296292, 'occ_p25_ldr': 6.5687615740740739, 'occ_p25_pp': 32.463605324074081, 'occ_p50_ldr': 8.1587152777777785, 'pct_waitq_ldr': 0.088230284448986548, 'occ_p05_ldr': 4.772981481481481, 'waitq_ldr_p95': 7.2304141622390485, 'occ_p50_pp': 35.783587962962962, 'occ_p75_pp': 39.436145833333327, 'blocked_ldr_p95': 8.8173711671234933}

Misc debugging cells


In [4]:
a_start = pd.Timestamp(start_analysis)
a_end = pd.Timestamp(end_analysis)
print(a_start,a_end)


2015-06-24 00:00:00 2016-06-16 00:00:00

In [5]:
left_PP_df = df[(df['EnteredTS'] < a_start) & (a_start <= df['ExitedTS']) &  (df['ExitedTS'] < a_end) & (df['Unit'] == 'PP')]

In [9]:
right_PP_df = df[(a_start <= df['EnteredTS']) & (df['EnteredTS'] < a_end) &  (df['ExitedTS'] >= a_end) & (df['Unit'] == 'PP')]

In [10]:
print(right_PP_df.shape)
right_PP_df[:][['EnteredTS','ExitedTS']]


(37, 17)
Out[10]:
EnteredTS ExitedTS
25283 2016-06-11 09:09:53 2016-06-16 00:54:04
25286 2016-06-14 10:14:50 2016-06-16 01:21:06
25287 2016-06-14 23:06:20 2016-06-16 02:48:32
25294 2016-06-13 09:06:27 2016-06-16 06:08:35
25297 2016-06-15 03:07:51 2016-06-16 06:53:49
25312 2016-06-15 18:30:35 2016-06-16 13:05:38
25320 2016-06-13 21:05:12 2016-06-16 16:17:26
25327 2016-06-13 22:45:53 2016-06-16 18:11:57
25328 2016-06-14 17:21:16 2016-06-16 18:32:38
25342 2016-06-14 19:21:25 2016-06-16 21:59:03
25352 2016-06-15 13:32:11 2016-06-17 02:01:55
25353 2016-06-15 13:49:54 2016-06-17 03:19:07
25356 2016-06-15 00:26:28 2016-06-17 05:26:46
25357 2016-06-15 14:36:07 2016-06-17 05:39:05
25364 2016-06-13 16:04:52 2016-06-17 07:32:43
25370 2016-06-09 08:08:10 2016-06-17 08:06:48
25373 2016-06-15 23:53:28 2016-06-17 10:16:26
25376 2016-06-14 00:11:03 2016-06-17 12:44:17
25377 2016-06-15 13:58:22 2016-06-17 13:37:55
25397 2016-06-14 17:00:10 2016-06-17 19:04:18
25402 2016-06-11 10:37:57 2016-06-17 19:48:32
25409 2016-06-14 10:31:08 2016-06-17 21:34:41
25418 2016-06-09 14:54:37 2016-06-18 00:57:20
25421 2016-06-14 14:30:26 2016-06-18 02:35:50
25454 2016-06-12 11:42:31 2016-06-18 17:39:15
25457 2016-06-12 23:10:09 2016-06-18 18:34:41
25465 2016-06-15 14:39:03 2016-06-18 22:51:33
25474 2016-06-11 20:51:25 2016-06-19 03:34:02
25499 2016-06-11 19:19:52 2016-06-19 15:04:00
25521 2016-06-13 23:29:16 2016-06-19 23:40:32
25533 2016-06-15 14:56:16 2016-06-20 05:24:55
25549 2016-06-14 10:47:13 2016-06-20 10:16:54
25556 2016-06-13 18:49:26 2016-06-20 12:12:00
25626 2016-06-14 22:30:37 2016-06-22 02:12:03
25681 2016-06-13 06:55:22 2016-06-23 03:23:22
25791 2016-06-14 10:23:24 2016-06-24 22:51:52
25965 2016-06-10 00:06:54 2016-06-27 20:40:20

In [39]:
print(left_PP_df.shape)
left_PP_df[:][['EnteredTS','ExitedTS']]


(30, 17)
Out[39]:
EnteredTS ExitedTS
6558 2015-06-21 13:54:53 2015-06-24 00:20:17
6562 2015-06-22 13:42:20 2015-06-24 00:44:04
6567 2015-06-22 01:06:01 2015-06-24 04:27:04
6575 2015-06-21 15:20:06 2015-06-24 09:40:08
6576 2015-06-15 17:39:59 2015-06-24 09:47:58
6583 2015-06-20 11:16:04 2015-06-24 13:46:49
6586 2015-06-22 19:32:53 2015-06-24 16:39:49
6590 2015-06-23 20:59:23 2015-06-24 19:16:10
6592 2015-06-19 08:52:54 2015-06-24 19:54:19
6594 2015-06-19 19:10:27 2015-06-24 20:44:15
6596 2015-06-20 10:29:36 2015-06-24 22:45:08
6598 2015-06-22 00:58:57 2015-06-24 22:53:08
6605 2015-06-22 09:25:36 2015-06-25 00:49:20
6609 2015-06-22 23:26:20 2015-06-25 02:46:38
6611 2015-06-20 15:16:59 2015-06-25 05:04:10
6618 2015-06-14 07:09:52 2015-06-25 06:27:32
6621 2015-06-21 01:42:47 2015-06-25 06:54:31
6628 2015-06-21 21:59:22 2015-06-25 10:09:14
6636 2015-06-23 18:19:03 2015-06-25 16:17:33
6652 2015-06-20 22:15:38 2015-06-25 22:47:47
6664 2015-06-12 17:06:07 2015-06-26 08:25:24
6670 2015-06-23 01:53:41 2015-06-26 09:44:11
6682 2015-06-23 04:38:19 2015-06-26 15:36:45
6699 2015-06-18 11:46:20 2015-06-26 21:07:09
6722 2015-06-23 06:40:50 2015-06-27 05:05:47
6729 2015-06-21 06:01:21 2015-06-27 10:19:00
6735 2015-06-22 10:13:35 2015-06-27 11:54:41
6744 2015-06-23 19:21:20 2015-06-27 17:10:36
6830 2015-06-22 21:55:52 2015-06-29 12:15:50
6892 2015-06-23 23:42:39 2015-06-30 14:15:47