In [226]:
import os
import pandas as pd
import numpy as np
import seasonal
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import gmean
pd.options.mode.chained_assignment = None
# Seaborn Style
sns.set(style="whitegrid", color_codes=True)
#Fix plots inline for notebook
%matplotlib inline
# Data Location within CC DS
data_location = os.path.join("..","data","external")
#######################################################
# Helpful Functions I may need
def getFiscalYear(dt):
year = dt.year
if dt.month>6: year += 1
return year
def wavg(group, weight_column, value_column):
d = group[value_column]
w = group[weight_column]
try:
return (d * w).sum() / w.sum()
except ZeroDivisionError:
return d.mean()
#######################################################
# Create Lists of Columns for Subsetting Data for Analysis
# Create Mapping of Column Names for Cleanliness
#
compset_oar_list = [
'Date',
'Region',
'Occ-Trans.',
'Occ-Grp.',
'Occ-Cont.',
'Occ-Total',
'ADR-Trans.',
'ADR-Grp.',
'ADR-Cont.',
'ADR-Total',
'RevPAR-Trans.',
'RevPAR-Grp.',
'RevPAR-Cont.',
'RevPAR-Total'
]
compset_oar_mapping = {
'Occ-Trans.':'OCC_TRANS',
'Occ-Grp.':'OCC_GRP',
'Occ-Cont.':'OCC_CONT',
'Occ-Total':'OCC_TOTAL',
'ADR-Trans.':'ADR_TRANS',
'ADR-Grp.':'ADR_GRP',
'ADR-Cont.':'ADR_CONT',
'ADR-Total':'ADR_TOTAL',
'RevPAR-Trans.':'RevPAR_TRANS',
'RevPAR-Grp.':'RevPAR_GRP',
'RevPAR-Cont.':'RevPAR_CONT',
'RevPAR-Total':'RevPAR_TOTAL'
}
m_cnty_oar_list = [
'Year',
'Month',
'Occ-Trans.',
'Occ-Grp.',
'Occ-Cont.',
'Occ-Total',
'ADR-Trans.',
'ADR-Grp.',
'ADR-Cont.',
'ADR-Total',
'RevPAR-Trans.',
'RevPAR-Grp.',
'RevPAR-Cont.',
'RevPAR-Total'
]
m_cnty_oar_mapping = {
'Occ-Trans.':'OCC_TRANS',
'Occ-Grp.':'OCC_GRP',
'Occ-Cont.':'OCC_CONT',
'Occ-Total':'OCC_TOTAL',
'ADR-Trans.':'ADR_TRANS',
'ADR-Grp.':'ADR_GRP',
'ADR-Cont.':'ADR_CONT',
'ADR-Total':'ADR_TOTAL',
'RevPAR-Trans.':'RevPAR_TRANS',
'RevPAR-Grp.':'RevPAR_GRP',
'RevPAR-Cont.':'RevPAR_CONT',
'RevPAR-Total':'RevPAR_TOTAL'
}
region_mapping = {
'Santa Barbara/Santa Maria, CA':'Santa Barbara County, CA',
'San Jose/Santa Cruz, CA':'San Jose-Santa Cruz, CA',
'Napa Valley, CA':'Napa County, CA',
}
region_exclude = ['Long Beach/Torrance']
small_group = [
'Napa County, CA',
'Santa Barbara, CA',
'Sonoma County, CA',
'South Lake Tahoe, CA',
'Laguna Beach, CA',
'Newport Beach/Dana Point, CA'
]
large_group = [
'San Diego, CA',
'San Francisco/San Mateo, CA',
'Palm Springs, CA',
'San Jose-Santa Cruz, CA'
]
comp_set_amalgam = [
'Napa County, CA',
'Laguna Beach, CA',
'Palm Springs, CA',
'San Diego, CA',
'Santa Barbara County, CA',
'San Francisco/San Mateo, CA',
'Sonoma County, CA',
'South Lake Tahoe, CA',
'San Jose-Santa Cruz, CA',
'Newport Beach/Dana Point, CA'
]
# Read in Monterey County XLS
# Documentation for Monterey County XLS goes here...
file_path = os.path.join(data_location, "Monterey-County-City-STR-ALL.xlsx")
m_cnty = pd.read_excel(file_path, sheetname = "County of Monterey 05-16 Seg")
# Read in Compset data
file_path = os.path.join(data_location, "Comp-Set-STR-ALL.xlsx")
compset_hm = pd.read_excel(file_path, sheetname = "Comp-Set OCC-ADR-RevPARv2")
compset_econ = pd.read_excel(file_path, sheetname = "Comp-Set S-D-Rv3")
# Read in Budget data
file_path = os.path.join(data_location, "TID_BUDGET.xlsx")
budget = pd.read_excel(file_path, sheetname = "income")
#Helper Data to expand number of years so they are even for excel viz
file_path = os.path.join(data_location, "Region-Years.xlsx")
region_years = pd.read_excel(file_path, sheetname = "Regions")
In [201]:
###################################################
# Process Competitive Set down, We just want total supply so we can weight our annual averages
compset_econ_v1 = compset_econ[['Year','Month','Date','Region','Supply-Total']]
compset_econ_v1['Month'] = compset_econ_v1['Month'].apply(str)
compset_econ_v1['Year'] = compset_econ_v1['Year'].apply(str)
compset_econ_v1['Date'] = compset_econ_v1['Year'] + "-" + compset_econ_v1['Month']
compset_econ_v1['Date'] = pd.to_datetime(compset_econ_v1['Date'], format = '%Y-%m')
compset_econ_v1['Region'] = compset_econ_v1['Region'].str.replace('+','')
compset_econ_v1 = compset_econ_v1.replace({'Region':region_mapping})
compset_total = compset_econ_v1.groupby('Region')[['Region','Year','Supply-Total']].mean()
compset_total = compset_total.reset_index()
###################################################
# Process monterey_county
m_cnty_v1 = m_cnty[m_cnty_oar_list]
m_cnty_v2 = m_cnty_v1.rename(columns=m_cnty_oar_mapping)
m_cnty_v2['Month'] = m_cnty_v2['Month'].apply(str)
m_cnty_v2['Year'] = m_cnty_v2['Year'].apply(str)
m_cnty_v2['Date'] = m_cnty_v2['Year'] + "-" + m_cnty_v2['Month']
m_cnty_v2['Date'] = pd.to_datetime(m_cnty_v2['Date'], format = "%Y-%b")
m_cnty_v2['Month'] = m_cnty_v2['Date'].apply(lambda x: x.month)
m_cnty_v2['Region'] = 'Monterey County, CA'
###################################################
# Process compset data
compset_hm_v1 = compset_hm = compset_hm[compset_oar_list]
compset_hm_v2 = compset_hm_v1.rename(columns=compset_oar_mapping)
#Date & Time Manipulation
compset_hm_v2['Date'] = pd.to_datetime(compset_hm_v2['Date'])
compset_hm_v2['Month'] = compset_hm_v2['Date'].apply(lambda x: str(x.month))
compset_hm_v2['Year'] = compset_hm_v2['Date'].apply(lambda x: str(x.year))
#String Manipulation
compset_hm_v2['Region'] = compset_hm_v2['Region'].str.replace('+','')
In [202]:
###############################################
# Combine datasets for Hotel Metrics
compset_hm_v3 = pd.merge(compset_hm_v2, compset_total, on=['Region'], how = 'right')
tot_set = compset_hm_v3.append(m_cnty_v2)
#tot_set = tot_set.append(m_city_v2) # These datasets differ
# Sort before Calculations (Do I need to do this?)
tot_set = tot_set[(tot_set['Date'] < '2016-07-01') & (tot_set['Date'] > '2009-01-01')]
tot_set = tot_set.sort_values(by=['Date'], ascending = True)
tot_set = tot_set.round(2)
tot_set = tot_set.reset_index()
#This is an IMPORTANT STEP-HERE WE APPLY OUR FISCAL YEAR (Jun-Jul) to the YEAR variable.
# Is this best practices? I don't think so... not sure. need training.
tot_set['Year'] = tot_set['Date'].apply(getFiscalYear)
#String Manipulation, The mappings change over time, so we need to make sure all the names are consistent
tot_set = tot_set.replace({'Region':region_mapping})
tot_set['Month'] = tot_set['Month'].apply(str)
# Drop Duplicates, The City of Monterey is included in the Comp Set as well as the city data, the city data went back further
tot_set = tot_set.drop_duplicates(subset=['Region','Year','Month'], keep='last')
# Regions we want to keep, We only had one period of Long Beach. Exclude it
tot_set = tot_set[~(tot_set['Region'].isin(region_exclude))]
# Let's merge everything together, the comp set data and our data about the county and city
tot_set = pd.merge(tot_set, region_years, on = ['Region','Year'], how = 'right')
In [228]:
##############################################
# Calculate metrics, in this case, simple growth MOM & YOY
# May consider YTD
#Average Daily Rate MOM & YOY & Annual changes
tot_set['ADR_GRP_MOM'] = tot_set.groupby(['Region'])['ADR_GRP'].pct_change(1)
tot_set['ADR_TRANS_MOM'] = tot_set.groupby(['Region'])['ADR_TRANS'].pct_change(1)
tot_set['ADR_TOTAL_MOM'] = tot_set.groupby(['Region'])['ADR_TOTAL'].pct_change(1)
tot_set['ADR_GRP_YOY'] = tot_set.groupby(['Region'])['ADR_GRP'].pct_change(12)
tot_set['ADR_TRANS_YOY'] = tot_set.groupby(['Region'])['ADR_TRANS'].pct_change(12)
tot_set['ADR_TOTAL_YOY'] = tot_set.groupby(['Region'])['ADR_TOTAL'].pct_change(12)
#Occupancy MOM & YOY & Annual
tot_set['OCC_GRP_MOM'] = tot_set.groupby(['Region'])['OCC_GRP'].pct_change(1)
tot_set['OCC_TRANS_MOM'] = tot_set.groupby(['Region'])['OCC_TRANS'].pct_change(1)
tot_set['OCC_TOTAL_MOM'] = tot_set.groupby(['Region'])['OCC_TOTAL'].pct_change(1)
tot_set['OCC_GRP_YOY'] = tot_set.groupby(['Region'])['OCC_GRP'].pct_change(12)
tot_set['OCC_TRANS_YOY'] = tot_set.groupby(['Region'])['OCC_TRANS'].pct_change(12)
tot_set['OCC_TOTAL_YOY'] = tot_set.groupby(['Region'])['OCC_TOTAL'].pct_change(12)
#RevPar MOM & YOY & Annual
tot_set['RevPAR_GRP_MOM'] = tot_set.groupby(['Region'])['RevPAR_GRP'].pct_change(1)
tot_set['RevPAR_TRANS_MOM'] = tot_set.groupby(['Region'])['RevPAR_TRANS'].pct_change(1)
tot_set['RevPAR_TOTAL_MOM'] = tot_set.groupby(['Region'])['RevPAR_TOTAL'].pct_change(1)
tot_set['RevPAR_GRP_YOY'] = tot_set.groupby(['Region'])['RevPAR_GRP'].pct_change(12)
tot_set['RevPAR_TRANS_YOY'] = tot_set.groupby(['Region'])['RevPAR_TRANS'].pct_change(12)
tot_set['RevPAR_TOTAL_YOY'] = tot_set.groupby(['Region'])['RevPAR_TOTAL'].pct_change(12)
tot_set.groupby(['Region','Year'])
Out[228]:
In [214]:
agg_set = pd.DataFrame()
agg_set['ADR_GRP_ANN'] = tot_set.groupby(['Region','Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_GRP_YOY')
agg_set
Out[214]:
In [222]:
##################################################
# Let's visualize this data for output to Tammy & Rob
agg_set = pd.DataFrame()
#########################################################################################
# Now that metrics are calculated lets create the aggregated set for Annual Averages and Rankings
agg_set['ADR_GRP_ANN'] = tot_set.groupby(['Region','Year'])['ADR_GRP_YOY'].mean()
agg_set['ADR_TRANS_ANN'] = tot_set.groupby(['Region','Year'])['ADR_TRANS_YOY'].mean()
agg_set['ADR_TOTAL_ANN'] = tot_set.groupby(['Region','Year'])['ADR_TOTAL_YOY'].mean()
agg_set['OCC_GRP_ANN'] = tot_set.groupby(['Region','Year'])['OCC_GRP_YOY'].mean()
agg_set['OCC_TRANS_ANN'] = tot_set.groupby(['Region','Year'])['OCC_TRANS_YOY'].mean()
agg_set['OCC_TOTAL_ANN'] = tot_set.groupby(['Region','Year'])['OCC_TOTAL_YOY'].mean()
agg_set['RevPAR_GRP_ANN'] = tot_set.groupby(['Region','Year'])['RevPAR_GRP_YOY'].mean()
agg_set['RevPAR_TRANS_ANN'] = tot_set.groupby(['Region','Year'])['RevPAR_TRANS_YOY'].mean()
agg_set['RevPAR_TOTAL_ANN'] = tot_set.groupby(['Region','Year'])['RevPAR_TOTAL_YOY'].mean()
# THIS SECTION IS FOR WEIGHTED AVERAGES
agg_set['ADR_GRP_ANN'] = tot_set.groupby(['Region','Year'])['ADR_GRP_YOY'].mean()
agg_set['ADR_TRANS_ANN'] = tot_set.groupby(['Region','Year'])['ADR_TRANS_YOY'].mean()
agg_set['ADR_TOTAL_ANN'] = tot_set.groupby(['Region','Year'])['ADR_TOTAL_YOY'].mean()
agg_set['OCC_GRP_ANN'] = tot_set.groupby(['Region','Year'])['OCC_GRP_YOY'].mean()
agg_set['OCC_TRANS_ANN'] = tot_set.groupby(['Region','Year'])['OCC_TRANS_YOY'].mean()
agg_set['OCC_TOTAL_ANN'] = tot_set.groupby(['Region','Year'])['OCC_TOTAL_YOY'].mean()
agg_set['RevPAR_GRP_ANN'] = tot_set.groupby(['Region','Year'])['RevPAR_GRP_YOY'].mean()
agg_set['RevPAR_TRANS_ANN'] = tot_set.groupby(['Region','Year'])['RevPAR_TRANS_YOY'].mean()
agg_set['RevPAR_TOTAL_ANN'] = tot_set.groupby(['Region','Year'])['RevPAR_TOTAL_YOY'].mean()
# Actuals Averaged
agg_set['ADR_GRP_AVG'] = tot_set.groupby(['Region','Year'])['ADR_GRP'].mean()
agg_set['ADR_TRANS_AVG'] = tot_set.groupby(['Region','Year'])['ADR_TRANS'].mean()
agg_set['ADR_TOTAL_AVG'] = tot_set.groupby(['Region','Year'])['ADR_TOTAL'].mean()
# Actuals Averaged
agg_set['OCC_GRP_AVG'] = tot_set.groupby(['Region','Year'])['OCC_GRP'].mean()
agg_set['OCC_TRANS_AVG'] = tot_set.groupby(['Region','Year'])['OCC_TRANS'].mean()
agg_set['OCC_TOTAL_AVG'] = tot_set.groupby(['Region','Year'])['OCC_TOTAL'].mean()
#Actuals Averaged
agg_set['RevPAR_GRP_AVG'] = tot_set.groupby(['Region','Year'])['RevPAR_GRP'].mean()
agg_set['RevPAR_TRANS_AVG'] = tot_set.groupby(['Region','Year'])['RevPAR_TRANS'].mean()
agg_set['RevPAR_TOTAL_AVG'] = tot_set.groupby(['Region','Year'])['RevPAR_TOTAL'].mean()
agg_set = agg_set.reset_index()
##########################################################################################
# Subset agg_set so we can amalgamate the comp set growth
comp_agg = pd.DataFrame()
comp_set = tot_set[tot_set['Region'].isin(comp_set_amalgam)]
# Annual Average Daily Rate using Year Over Year Percentages, averaging across comp set Regions
comp_agg['ADR_GRP_ANN'] = comp_set.groupby(['Year'])['ADR_GRP_YOY'].mean()
comp_agg['ADR_TRANS_ANN'] = comp_set.groupby(['Year'])['ADR_TRANS_YOY'].mean()
comp_agg['ADR_TOTAL_ANN'] = comp_set.groupby(['Year'])['ADR_TOTAL_YOY'].mean()
# Annual Occupancy using Year Over Year Percentages, averaging across comp set Regions
comp_agg['OCC_GRP_ANN'] = comp_set.groupby(['Year'])['OCC_GRP_YOY'].mean()
comp_agg['OCC_TRANS_ANN'] = comp_set.groupby(['Year'])['OCC_TRANS_YOY'].mean()
comp_agg['OCC_TOTAL_ANN'] = comp_set.groupby(['Year'])['OCC_TOTAL_YOY'].mean()
# Annual RevPAR using Year Over Year Percentages, averaging across comp set Regions
comp_agg['RevPAR_GRP_ANN'] = comp_set.groupby(['Year'])['RevPAR_GRP_YOY'].mean()
comp_agg['RevPAR_TRANS_ANN'] = comp_set.groupby(['Year'])['RevPAR_TRANS_YOY'].mean()
comp_agg['RevPAR_TOTAL_ANN'] = comp_set.groupby(['Year'])['RevPAR_TOTAL_YOY'].mean()
# THIS SECTION IS FOR WEIGHTED AVERAGES
# Annual Average Daily Rate using Year Over Year Percentages, weighting the data using the total supply of the destination
comp_agg['ADR_GRP_ANN_WT'] = comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_GRP_YOY')
comp_agg['ADR_TRANS_ANN_WT'] = comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_TRANS_YOY')
comp_agg['ADR_TOTAL_ANN_WT'] = comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_TOTAL_YOY')
# Annual Occupancy using Year Over Year Percentages, weighting the data using the total supply of the destination
comp_agg['OCC_GRP_ANN_WT'] = comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_GRP_YOY')
comp_agg['OCC_TRANS_ANN_WT'] = comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_TRANS_YOY')
comp_agg['OCC_TOTAL_ANN_WT'] = comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_TOTAL_YOY')
# Annual RevPAR using Year Over Year Percentages, weighting the data using the total supply of the destination
comp_agg['RevPAR_GRP_ANN_WT'] = comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_GRP_YOY')
comp_agg['RevPAR_TRANS_ANN_WT'] = comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_TRANS_YOY')
comp_agg['RevPAR_TOTAL_ANN_WT'] = comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_TOTAL_YOY')
# Actuals Averaged
comp_agg['ADR_GRP_AVG'] = comp_set.groupby(['Year'])['ADR_GRP'].mean()
comp_agg['ADR_TRANS_AVG'] = comp_set.groupby(['Year'])['ADR_TRANS'].mean()
comp_agg['ADR_TOTAL_AVG'] = comp_set.groupby(['Year'])['ADR_TOTAL'].mean()
# Actuals Averaged
comp_agg['OCC_GRP_AVG'] = comp_set.groupby(['Year'])['OCC_GRP'].mean()
comp_agg['OCC_TRANS_AVG'] = comp_set.groupby(['Year'])['OCC_TRANS'].mean()
comp_agg['OCC_TOTAL_AVG'] = comp_set.groupby(['Year'])['OCC_TOTAL'].mean()
#Actuals Averaged
comp_agg['RevPAR_GRP_AVG'] = comp_set.groupby([ 'Year'])['RevPAR_GRP'].mean()
comp_agg['RevPAR_TRANS_AVG'] = comp_set.groupby([ 'Year'])['RevPAR_TRANS'].mean()
comp_agg['RevPAR_TOTAL_AVG'] = comp_set.groupby([ 'Year'])['RevPAR_TOTAL'].mean()
comp_agg = comp_agg.reset_index()
comp_agg['Region'] = 'Competitive Set'
In [225]:
########################################################################################
# Small group aggregation. Lets take the small groups from our comp set and re-run the annual averages
small_group_agg = pd.DataFrame()
small_group_set = tot_set[tot_set['Region'].isin(small_group)]
small_group_agg['ADR_GRP_ANN'] = small_group_set.groupby(['Year'])['ADR_GRP_YOY'].mean()
small_group_agg['ADR_TRANS_ANN'] = small_group_set.groupby(['Year'])['ADR_TRANS_YOY'].mean()
small_group_agg['ADR_TOTAL_ANN'] = small_group_set.groupby(['Year'])['ADR_TOTAL_YOY'].mean()
small_group_agg['OCC_GRP_ANN'] = small_group_set.groupby(['Year'])['OCC_GRP_YOY'].mean()
small_group_agg['OCC_TRANS_ANN'] = small_group_set.groupby(['Year'])['OCC_TRANS_YOY',].mean()
small_group_agg['OCC_TOTAL_ANN'] = small_group_set.groupby(['Year'])['OCC_TOTAL_YOY'].mean()
small_group_agg['RevPAR_GRP_ANN'] = small_group_set.groupby(['Year'])[['Date', 'RevPAR_GRP_YOY', 'Region']].mean()
small_group_agg['RevPAR_TRANS_ANN'] = small_group_set.groupby(['Year'])[['Date', 'RevPAR_TRANS_YOY', 'Region']].mean()
small_group_agg['RevPAR_TOTAL_ANN'] = small_group_set.groupby(['Year'])[['Date', 'RevPAR_TOTAL_YOY', 'Region']].mean()
# THIS SECTION IS FOR WEIGHTED AVERAGES
# Annual Average Daily Rate using Year Over Year Percentages, weighting the data using the total supply of the destination
small_group_agg['ADR_GRP_ANN_WT'] = small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_GRP_YOY')
small_group_agg['ADR_TRANS_ANN_WT'] = small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_TRANS_YOY')
small_group_agg['ADR_TOTAL_ANN_WT'] = small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_TOTAL_YOY')
# Annual Occupancy using Year Over Year Percentages, weighting the data using the total supply of the destination
small_group_agg['OCC_GRP_ANN_WT'] = small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_GRP_YOY')
small_group_agg['OCC_TRANS_ANN_WT'] = small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_TRANS_YOY')
small_group_agg['OCC_TOTAL_ANN_WT'] = small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_TOTAL_YOY')
# Annual RevPAR using Year Over Year Percentages, weighting the data using the total supply of the destination
small_group_agg['RevPAR_GRP_ANN_WT'] = small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_GRP_YOY')
small_group_agg['RevPAR_TRANS_ANN_WT'] = small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_TRANS_YOY')
small_group_agg['RevPAR_TOTAL_ANN_WT'] = small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_TOTAL_YOY')
# Actuals Averaged
small_group_agg['ADR_GRP_AVG'] = small_group_set.groupby(['Year'])['ADR_GRP'].mean()
small_group_agg['ADR_TRANS_AVG'] = small_group_set.groupby(['Year'])['ADR_TRANS'].mean()
small_group_agg['ADR_TOTAL_AVG'] = small_group_set.groupby(['Year'])['ADR_TOTAL'].mean()
# Actuals Averaged
small_group_agg['OCC_GRP_AVG'] = small_group_set.groupby(['Year'])['OCC_GRP'].mean()
small_group_agg['OCC_TRANS_AVG'] = small_group_set.groupby(['Year'])['OCC_TRANS'].mean()
small_group_agg['OCC_TOTAL_AVG'] = small_group_set.groupby(['Year'])['OCC_TOTAL'].mean()
#Actuals Averaged
small_group_agg['RevPAR_GRP_AVG'] = small_group_set.groupby([ 'Year'])['RevPAR_GRP'].mean()
small_group_agg['RevPAR_TRANS_AVG'] = small_group_set.groupby([ 'Year'])['RevPAR_TRANS'].mean()
small_group_agg['RevPAR_TOTAL_AVG'] = small_group_set.groupby([ 'Year'])['RevPAR_TOTAL'].mean()
small_group_agg = small_group_agg.reset_index()
small_group_agg['Region'] = 'Small Group Meetings'
##########################################################################################
# Large group aggregation. Lets take the large groups from our comp set and re-run the annual averages
large_group_agg = pd.DataFrame()
large_group_set = tot_set[tot_set['Region'].isin(large_group)]
large_group_agg['ADR_GRP_ANN'] = large_group_set.groupby(['Year'])['ADR_GRP_YOY'].mean()
large_group_agg['ADR_TRANS_ANN'] = large_group_set.groupby(['Year'])['ADR_TRANS_YOY'].mean()
large_group_agg['ADR_TOTAL_ANN'] = large_group_set.groupby(['Year'])['ADR_TOTAL_YOY'].mean()
large_group_agg['OCC_GRP_ANN'] = large_group_set.groupby(['Year'])['OCC_GRP_YOY'].mean()
large_group_agg['OCC_TRANS_ANN'] = large_group_set.groupby(['Year'])['OCC_TRANS_YOY',].mean()
large_group_agg['OCC_TOTAL_ANN'] = large_group_set.groupby(['Year'])['OCC_TOTAL_YOY'].mean()
large_group_agg['RevPAR_GRP_ANN'] = large_group_set.groupby(['Year'])[['Date','RevPAR_GRP_YOY','Region']].mean()
large_group_agg['RevPAR_TRANS_ANN'] = large_group_set.groupby(['Year'])[['Date','RevPAR_TRANS_YOY','Region']].mean()
large_group_agg['RevPAR_TOTAL_ANN'] = large_group_set.groupby(['Year'])[['Date','RevPAR_TOTAL_YOY','Region']].mean()
# THIS SECTION IS FOR WEIGHTED AVERAGES
# Annual Average Daily Rate using Year Over Year Percentages, weighting the data using the total supply of the destination
large_group_agg['ADR_GRP_ANN_WT'] = large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_GRP_YOY')
large_group_agg['ADR_TRANS_ANN_WT'] = large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_TRANS_YOY')
large_group_agg['ADR_TOTAL_ANN_WT'] = large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_TOTAL_YOY')
# Annual Occupancy using Year Over Year Percentages, weighting the data using the total supply of the destination
large_group_agg['OCC_GRP_ANN_WT'] = large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_GRP_YOY')
large_group_agg['OCC_TRANS_ANN_WT'] = large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_TRANS_YOY')
large_group_agg['OCC_TOTAL_ANN_WT'] = large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_TOTAL_YOY')
# Annual RevPAR using Year Over Year Percentages, weighting the data using the total supply of the destination
large_group_agg['RevPAR_GRP_ANN_WT'] = large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_GRP_YOY')
large_group_agg['RevPAR_TRANS_ANN_WT'] = large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_TRANS_YOY')
large_group_agg['RevPAR_TOTAL_ANN_WT'] = large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_TOTAL_YOY')
# Actuals Averaged
large_group_agg['ADR_GRP_AVG'] = large_group_set.groupby(['Year'])['ADR_GRP'].mean()
large_group_agg['ADR_TRANS_AVG'] = large_group_set.groupby(['Year'])['ADR_TRANS'].mean()
large_group_agg['ADR_TOTAL_AVG'] = large_group_set.groupby(['Year'])['ADR_TOTAL'].mean()
# Actuals Averaged
large_group_agg['OCC_GRP_AVG'] = large_group_set.groupby(['Year'])['OCC_GRP'].mean()
large_group_agg['OCC_TRANS_AVG'] = large_group_set.groupby(['Year'])['OCC_TRANS'].mean()
large_group_agg['OCC_TOTAL_AVG'] = large_group_set.groupby(['Year'])['OCC_TOTAL'].mean()
#Actuals Averaged
large_group_agg['RevPAR_GRP_AVG'] = large_group_set.groupby([ 'Year'])['RevPAR_GRP'].mean()
large_group_agg['RevPAR_TRANS_AVG'] = large_group_set.groupby([ 'Year'])['RevPAR_TRANS'].mean()
large_group_agg['RevPAR_TOTAL_AVG'] = large_group_set.groupby([ 'Year'])['RevPAR_TOTAL'].mean()
large_group_agg = large_group_agg.reset_index()
large_group_agg['Region'] = 'Large Group Meetings'
################################################################################################
# Concatenate All Frames together for a final frmae for all analysis
column_order = [
'Region',
'Year',
'ADR_GRP_ANN',
'ADR_GRP_ANN_WT',
'ADR_TRANS_ANN',
'ADR_TRANS_ANN_WT',
'ADR_TOTAL_ANN',
'ADR_TOTAL_ANN_WT',
'OCC_GRP_ANN',
'OCC_GRP_ANN_WT',
'OCC_TRANS_ANN',
'OCC_TRANS_ANN_WT',
'OCC_TOTAL_ANN',
'OCC_TOTAL_ANN_WT',
'RevPAR_GRP_ANN',
'RevPAR_GRP_ANN_WT',
'RevPAR_TRANS_ANN',
'RevPAR_TRANS_ANN_WT',
'RevPAR_TOTAL_ANN',
'RevPAR_TOTAL_ANN_WT',
'ADR_GRP_AVG',
'ADR_TRANS_AVG',
'ADR_TOTAL_AVG',
'OCC_GRP_AVG',
'OCC_TRANS_AVG',
'OCC_TOTAL_AVG',
'RevPAR_GRP_AVG',
'RevPAR_TRANS_AVG',
'RevPAR_TOTAL_AVG'
]
frames = [agg_set,comp_agg,small_group_agg,large_group_agg]
final_agg = pd.concat(frames)
final_agg = final_agg[column_order]
final_agg.to_csv('excel_visualizations.csv', index = False)