In [1]:
# Set up autoreloading of modules so that I can debug code in external files
%load_ext autoreload
%autoreload 2
In [2]:
import pandas as pd
import numpy as np
import matplotlib as mp
import matplotlib.pyplot as plt
import bydatetime
import hillpylib as hm
from pandas import Timestamp
# Let's check what version of pandas, numpy and matplotlib we are using
print ("pandas version ", pd.__version__)
print ("numpy version ", np.version.version)
print ("matplotlib version ", mp.__version__)
In [3]:
file_stopdata = 'data/ShortStay.csv'
df = pd.read_csv(file_stopdata, parse_dates=['InRoomTS','OutRoomTS'])
df.info()
In [4]:
# Required inputs
scenario_name = 'sstest_60'
in_fld_name = 'InRoomTS'
out_fld_name = 'OutRoomTS'
cat_fld_name = 'PatType'
start_analysis = '1/1/1996'
end_analysis = '3/30/1996 23:45'
# Optional inputs
# This next field wasn't in original Hillmaker. Use it to specify the name to use for the overall totals.
# At this point the totals actually aren't being calculated.
tot_fld_name = 'SSU'
bin_size_mins = 60
includecats = ['ART','IVT']
## Convert string dates to actual datetimes
start_analysis_dt = pd.Timestamp(start_analysis)
end_analysis_dt = pd.Timestamp(end_analysis)
# Mapper from weekday integer to string
daynum_to_dayname = {0: 'Mon', 1: 'Tue', 2: 'Wed', 3: 'Thu', 4: 'Fri', 5: 'Sat', 6: 'Sun'}
In [13]:
df2 = df[df['PatType'].isin(includecats)]
In [14]:
df2.info()
In [15]:
df2.groupby('PatType').describe()
Out[15]:
In [16]:
df = df[df['PatType'].isin(includecats)]
In [17]:
df.groupby('PatType').describe()
Out[17]:
In [5]:
bydt_df = bydatetime.make_bydatetime(df,
in_fld_name,
out_fld_name,
cat_fld_name,
start_analysis,
end_analysis,
tot_fld_name,
bin_size_mins)
In [6]:
bydt_df.dtypes
Out[6]:
In [50]:
bydt_df
Out[50]:
In [51]:
bydt_group = bydt_df.groupby(['datetime'])
In [54]:
tot_arrivals = bydt_group.arrivals.sum()
tot_departures = bydt_group.departures.sum()
tot_occ = bydt_group.occupancy.sum()
#bydt_totals = pd.DataFrame(tot_arrivals)
In [84]:
tot_data = [tot_arrivals,tot_departures,tot_occ]
tot_df = pd.concat(tot_data, axis = 1, keys = [s.name for s in tot_data])
In [90]:
tot_data = [tot_arrivals,tot_departures,tot_occ]
tot_df = pd.concat(tot_data, axis = 1, keys = [s.name for s in tot_data])
tot_df['day_of_week'] = tot_df.index.map(lambda x: x.weekday())
tot_df['bin_of_day'] = tot_df.index.map(lambda x: hm.bin_of_day(x,bin_size_mins))
tot_df['bin_of_week'] = tot_df.index.map(lambda x: hm.bin_of_week(x,bin_size_mins))
In [91]:
tot_df['category'] = tot_fld_name
tot_df.set_index('category', append=True, inplace=True, drop=False)
tot_df = tot_df.reorder_levels(['category', 'datetime'])
tot_df['datetime'] = tot_df.index.levels[1]
In [ ]:
tot_df
In [94]:
tot_df.info()
In [96]:
bydt_df = pd.concat([bydt_df,tot_df])
In [ ]:
bydt_df.tail(n=25)
In [6]:
def get_occstats(group, stub=''):
return {stub+'count': group.count(), stub+'mean': group.mean(),
stub+'min': group.min(),
stub+'max': group.max(), 'stdev': group.std(),
stub+'p50': group.quantile(0.5), stub+'p55': group.quantile(0.55),
stub+'p60': group.quantile(0.6), stub+'p65': group.quantile(0.65),
stub+'p70': group.quantile(0.7), stub+'p75': group.quantile(0.75),
stub+'p80': group.quantile(0.8), stub+'p85': group.quantile(0.85),
stub+'p90': group.quantile(0.9), stub+'p95': group.quantile(0.95),
stub+'p975': group.quantile(0.975),
stub+'p99': group.quantile(0.99)}
In [22]:
bydt_dfgrp2 = bydt_df.groupby(['category','day_of_week','bin_of_day'])
occ_stats = bydt_dfgrp2['occupancy'].apply(get_occstats)
arr_stats = bydt_dfgrp2['arrivals'].apply(get_occstats)
dep_stats = bydt_dfgrp2['departures'].apply(get_occstats)
occ_stats_summary = occ_stats.unstack()
arr_stats_summary = arr_stats.unstack()
dep_stats_summary = dep_stats.unstack()
In [32]:
occ_stats.dtype
Out[32]:
In [33]:
type(occ_stats)
Out[33]:
In [36]:
occ_stats_summary.info()
In [23]:
file_bydt_csv = 'testing/bydate_' + scenario_name + '.csv'
bydt_df.to_csv(file_bydt_csv, index=False)
file_occ_csv = 'testing/occ_stats_' + scenario_name + '.csv'
file_arr_csv = 'testing/arr_stats_' + scenario_name + '.csv'
file_dep_csv = 'testing/dep_stats_' + scenario_name + '.csv'
occ_stats_summary.to_csv(file_occ_csv)
arr_stats_summary.to_csv(file_arr_csv)
dep_stats_summary.to_csv(file_dep_csv)
In [10]:
ts = pd.Timestamp('19960103 00:00:00')
print(ts)
In [25]:
24000/24
Out[25]:
In [11]:
df_ART = df[(df.PatType == 'ART') & (df.InRoomTS < ts)]
In [12]:
df_ART.info()
In [13]:
df_ART
Out[13]:
In [14]:
bydt_df.head()
Out[14]:
In [15]:
bydt_df[25:50]
Out[15]:
In [11]:
import numpy as np
import pandas as pd
from pandas import Timestamp
import hillmaker as hm
file_stopdata = 'data/unit_stop_log_Experiment1_Scenario1_Rep1.csv'
scenario_name = 'log_unitocc_test'
in_fld_name = 'EnteredTS'
out_fld_name = 'ExitedTS'
cat_fld_name = 'Unit'
start_analysis = '3/24/2015 00:00'
end_analysis = '6/16/2016 00:00'
# Optional inputs
tot_fld_name = 'OBTot'
bin_size_mins = 60
includecats = ['LDR','PP']
In [21]:
stops_df = pd.read_csv(file_stopdata,index_col=0)
basedate = Timestamp('20150215 00:00:00')
stops_df['EnteredTS'] = df.apply(lambda row:
Timestamp(round((basedate + pd.DateOffset(hours=row['Entered'])).value,-9)), axis=1)
stops_df['ExitedTS'] = df.apply(lambda row:
Timestamp(round((basedate + pd.DateOffset(hours=row['Exited'])).value,-9)), axis=1)
stops_df = stops_df[stops_df[cat_fld_name].isin(includecats)]
In [22]:
stops_df.info()
In [25]:
stops_df[100:125]
Out[25]:
In [29]:
start = stops_df.ix[188]['EnteredTS']
end = stops_df.ix[188]['ExitedTS']
print(start, end)
print(type(start))
start_str = '2015-02-18 09:25:46'
end_str = '2015-02-19 21:06:03'
start_analysis_timestamp = Timestamp(start_str)
end_analysis_timestamp = Timestamp(end_str)
start_analysis_dt64 = np.datetime64(start_str)
end_analysis_dt64 = np.datetime64(end_str)
print(start_analysis_timestamp, start_analysis_dt64)
num_days_fromts = end_analysis_timestamp - start_analysis_timestamp
num_days_fromdt64 = end_analysis_dt64 - start_analysis_dt64
print(num_days_fromts, num_days_fromdt64)
print(type(num_days_fromts))
print(type(num_days_fromdt64))
In [35]:
print(start)
print(start.date())
start_tsdate = Timestamp(start.date())
print (start_tsdate)
In [36]:
gap = start - Timestamp(start.date())
print(gap)
print(type(gap))
In [47]:
minutes = 60
dt = start
floor_seconds = minutes * 60
dt_date = Timestamp(dt.date())
delta = dt - dt_date
print(delta)
tot_seconds = delta.total_seconds()
print(tot_seconds)
floor_time = (tot_seconds // floor_seconds) * floor_seconds
print(floor_time)
gap_seconds = tot_seconds - floor_time
print(dt_date + pd.DateOffset(seconds=floor_time))
In [ ]:
#%time hm.run_hillmaker(scenario_name,df,in_fld_name, out_fld_name,cat_fld_name,start_analysis,end_analysis,tot_fld_name,bin_size_mins,categories=includecats,outputpath='./testing')
In [20]:
df.head()
Out[20]:
In [21]:
df.info()
In [36]:
scenario_name = 'log_unitocc_test_steadystate'
hm.run_hillmaker(scenario_name,df,in_fld_name, out_fld_name,cat_fld_name,
start_analysis,end_analysis,tot_fld_name,1440,
categories=includecats,totals=False,outputpath='./testing')
In [33]:
occ_df = pd.read_csv('testing/occ_stats_summary_log_unitocc_test_steadystate.csv')
In [34]:
occ_df
Out[34]:
In [38]:
bydt_df
Out[38]:
In [1]:
%matplotlib inline
In [2]:
import numpy as np
from numpy.random import randn
import pandas as pd
from scipy import stats
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
In [3]:
bydt_df = pd.read_csv('testing/bydatetime_log_unitocc_test_steadystate.csv')
In [12]:
pp_occ = bydt_df[(bydt_df['category'] == 'PP')]['occupancy']
In [15]:
plt.hist(pp_occ.values,20)
Out[15]:
In [9]:
g = sns.FacetGrid(bydt_df, col="category", margin_titles=True)
bins = np.linspace(0, 60, 13)
g.map(plt.hist, "occupancy", color="steelblue", bins=bins, lw=0)
Out[9]:
In [ ]: