In [107]:
import os
import pandas as pd
import numpy as np
import seasonal
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import gmean
pd.options.mode.chained_assignment = None
# Seaborn Style
sns.set(style="whitegrid", color_codes=True)

#Fix plots inline for notebook
%matplotlib inline

# Data Location within CC DS
data_location = os.path.join("..","data","external")

#######################################################
# Helpful Functions I may need

def getFiscalYear(dt):
    year = dt.year
    if dt.month>6: year += 1
    return year

def wavg(group, weight_column, value_column):
    d = group[value_column]
    w = group[weight_column]
    try:
        return (d * w).sum() / w.sum()
    except ZeroDivisionError:
        return d.mean()

#######################################################
# Create Lists of Columns for Subsetting Data for Analysis
# Create Mapping of Column Names for Cleanliness

compset_oar_list = [
                   'Date',
                   'Region',
                   'Occ-Trans.',
                   'Occ-Grp.',
                   'Occ-Cont.',
                   'Occ-Total',
                   'ADR-Trans.',
                   'ADR-Grp.',
                   'ADR-Cont.',
                   'ADR-Total',
                   'RevPAR-Trans.',
                   'RevPAR-Grp.',
                   'RevPAR-Cont.',
                    'RevPAR-Total'
                    ]

compset_oar_mapping = {
                   'Occ-Trans.':'OCC_TRANS',
                   'Occ-Grp.':'OCC_GRP',
                   'Occ-Cont.':'OCC_CONT',
                   'Occ-Total':'OCC_TOTAL',
                   'ADR-Trans.':'ADR_TRANS',
                   'ADR-Grp.':'ADR_GRP',
                   'ADR-Cont.':'ADR_CONT',
                   'ADR-Total':'ADR_TOTAL',
                   'RevPAR-Trans.':'RevPAR_TRANS',
                   'RevPAR-Grp.':'RevPAR_GRP',
                   'RevPAR-Cont.':'RevPAR_CONT',
                   'RevPAR-Total':'RevPAR_TOTAL'
                    }

m_cnty_oar_list = [
                   'Year',
                   'Month',
                   'Occ-Trans.',
                   'Occ-Grp.',
                   'Occ-Cont.',
                   'Occ-Total',
                   'ADR-Trans.',
                   'ADR-Grp.',
                   'ADR-Cont.',
                   'ADR-Total',
                   'RevPAR-Trans.',
                   'RevPAR-Grp.',
                   'RevPAR-Cont.',
                   'RevPAR-Total'
                    ]

m_cnty_oar_mapping = {
                   'Occ-Trans.':'OCC_TRANS',
                   'Occ-Grp.':'OCC_GRP',
                   'Occ-Cont.':'OCC_CONT',
                   'Occ-Total':'OCC_TOTAL',
                   'ADR-Trans.':'ADR_TRANS',
                   'ADR-Grp.':'ADR_GRP',
                   'ADR-Cont.':'ADR_CONT',
                   'ADR-Total':'ADR_TOTAL',
                   'RevPAR-Trans.':'RevPAR_TRANS',
                   'RevPAR-Grp.':'RevPAR_GRP',
                   'RevPAR-Cont.':'RevPAR_CONT',
                   'RevPAR-Total':'RevPAR_TOTAL'
                     }

region_mapping = { 
                   'Santa Barbara/Santa Maria, CA':'Santa Barbara County, CA',
                   'San Jose/Santa Cruz, CA':'San Jose-Santa Cruz, CA',
                   'Napa Valley, CA':'Napa County, CA',
                 }

region_exclude = ['Long Beach/Torrance']

small_group =      [
                    'Napa County, CA',
                    'Santa Barbara County, CA',
                    'Sonoma County, CA',
                    'South Lake Tahoe, CA',
                    'Laguna Beach, CA',
                    'Newport Beach/Dana Point, CA']

large_group =   [
                 'San Diego, CA',
                 'San Francisco/San Mateo, CA',
                 'Palm Springs, CA',
                 'San Jose-Santa Cruz, CA'
                ]

comp_set_amalgam = [
                    'Napa County, CA',
                    'Laguna Beach, CA',
                    'Palm Springs, CA',
                    'San Diego, CA',
                    'Santa Barbara County, CA',
                    'San Francisco/San Mateo, CA',
                    'Sonoma County, CA',
                    'South Lake Tahoe, CA',
                    'San Jose-Santa Cruz, CA',
                    'Newport Beach/Dana Point, CA']

home = [
            'City of Monterey, CA',
            'Monterey County, CA']

# Read in Monterey County XLS
# Documentation for Monterey County XLS goes here...
file_path = os.path.join(data_location, "Monterey-County-City-STR-ALL.xlsx")
m_cnty = pd.read_excel(file_path, sheetname = "County of Monterey 05-16 Seg")

# Read in Compset data
file_path = os.path.join(data_location, "Comp-Set-STR-ALL.xlsx")
compset_hm = pd.read_excel(file_path, sheetname = "Comp-Set OCC-ADR-RevPARv2")
compset_econ = pd.read_excel(file_path, sheetname = "Comp-Set S-D-Rv3")

# Read in Budget data
file_path = os.path.join(data_location, "TID_BUDGET.xlsx")
budget = pd.read_excel(file_path, sheetname = "income")

# Helper Data to expand number of years so they are even for excel viz
file_path = os.path.join(data_location, "Region-Years.xlsx")
region_years = pd.read_excel(file_path, sheetname = "Regions")

# Supply-Demand-Revenue Columns Mappings
S_D_R_mapping = {'Demand-Trans.':'D-TRANS',
                 'Demand-Grp.':'D-GRP',
                 'Demand-Con.':'D-CON',
                 'Demand-Total':'D-TOTAL',
                 'Revenue-Trans.':'R-TRANS',
                 'Revnue-Grp.':'R-GRP',
                 'Revenue-Con.':'R-CON',
                 'Revenue-Total':'R-TOTAL',
                 'Supply-Total':'S-TOTAL'}

# Supply-Demand-Revenue Column Selection
S_D_R_cols = ['Year',
              'Region',
              'Month',
              'Date',
              'S-TOTAL']

# Column Order for output to excel
column_order = [
                'Region',
                'Year',
                'TOTAL_SUPPLY',
                'GRP_ROOMNIGHTS',
                'TRANS_ROOMNIGHTS',
                'CONT_ROOMNIGHTS',
                'TOTAL_ROOMNIGHTS',
                'GRP_REVENUE',
                'TRANS_REVENUE',
                'CONT_REVENUE',
                'TOTAL_REVENUE',
                'GRP_OCCUPANCY',
                'TRANS_OCCUPANCY',
                'CONT_OCCUPANCY',
                'TOTAL_OCCUPANCY',
                'GRP_ADR',
                'TRANS_ADR',
                'CONT_ADR',
                'TOTAL_ADR',
                'GRP_RevPAR',
                'TRANS_RevPAR',
                'CONT_RevPAR',
                'TOTAL_RevPAR',
                'GRP_OCC_YOY',
                'TRANS_OCC_YOY',
                'CONT_OCC_YOY',
                'TOTAL_OCC_YOY',
                'GRP_ADR_YOY',
                'TRANS_ADR_YOY',
                'CONT_ADR_YOY',
                'TOTAL_ADR_YOY',
                'GRP_RevPAR_YOY',
                'TRANS_RevPAR_YOY',
                'CONT_RevPAR_YOY',
                'TOTAL_RevPAR_YOY']

In [108]:
###################################################
# Process Competitive Set down, We just want total supply so we can weight our annual averages
compset_econ = compset_econ.rename(columns=S_D_R_mapping)
compset_econ_v1 = compset_econ[S_D_R_cols]
compset_econ_v1['Month'] = compset_econ_v1['Month'].apply(str)
compset_econ_v1['Year'] = compset_econ_v1['Year'].apply(str)
compset_econ_v1['Date'] = compset_econ_v1['Year'] + "-" + compset_econ_v1['Month']
compset_econ_v1['Date'] = pd.to_datetime(compset_econ_v1['Date'], format = '%Y-%m')
compset_econ_v1['Region'] = compset_econ_v1['Region'].str.replace('+','')
compset_econ_v1 = compset_econ_v1.replace({'Region':region_mapping})

###################################################
# Process monterey_county
m_cnty_v1 = m_cnty[m_cnty_oar_list]
m_cnty_v2 = m_cnty_v1.rename(columns=m_cnty_oar_mapping)
m_cnty_v2['Month'] = m_cnty_v2['Month'].apply(str)
m_cnty_v2['Year'] = m_cnty_v2['Year'].apply(str)
m_cnty_v2['Date'] = m_cnty_v2['Year'] + "-" + m_cnty_v2['Month']
m_cnty_v2['Date'] = pd.to_datetime(m_cnty_v2['Date'], format = "%Y-%b")
m_cnty_v2['Month'] = m_cnty_v2['Date'].apply(lambda x: x.month)
m_cnty_v2['Region'] = 'Monterey County, CA'

###################################################
# Process compset data
compset_hm_v1 = compset_hm = compset_hm[compset_oar_list]
compset_hm_v2 = compset_hm_v1.rename(columns=compset_oar_mapping)

# Date & Time Manipulation
compset_hm_v2['Date'] = pd.to_datetime(compset_hm_v2['Date'])
compset_hm_v2['Month'] = compset_hm_v2['Date'].apply(lambda x: str(x.month))
compset_hm_v2['Year'] = compset_hm_v2['Date'].apply(lambda x: str(x.year))

# String Manipulation
compset_hm_v2['Region'] = compset_hm_v2['Region'].str.replace('+','')
compset_hm_v2 = compset_hm_v2.replace({'Region':region_mapping})

In [109]:
###############################################
# Combine datasets for Hotel Metrics 
tot_set = compset_hm_v2.append(m_cnty_v2)
tot_set = pd.merge(tot_set, compset_econ_v1, on=['Region','Month','Year','Date'])

# Sort before Calculations (Do I need to do this?)
tot_set = tot_set[(tot_set['Date'] < '2016-07-01') & (tot_set['Date'] > '2009-01-01')]
tot_set = tot_set.sort_values(by=['Date'], ascending = True)
tot_set = tot_set.round(2)
tot_set = tot_set.reset_index()



# This is an IMPORTANT STEP-HERE WE APPLY OUR FISCAL YEAR (Jun-Jul) to the YEAR variable.
# Is this best practices? I don't think so... not sure. need training.
tot_set['Year'] = tot_set['Date'].apply(getFiscalYear)

# String Manipulation, The mappings change over time, so we need to make sure all the names are consistent
tot_set = tot_set.replace({'Region':region_mapping})
tot_set['Month'] = tot_set['Month'].apply(str)

# Drop Duplicates, The City of Monterey is included in the Comp Set as well as the city data, the city data went back further
tot_set = tot_set.drop_duplicates(subset=['Region','Date'], keep='last')

# Regions we want to keep, We only had one period of Long Beach. Exclude it
tot_set = tot_set[~(tot_set['Region'].isin(region_exclude))]

# Let's merge everything together, the comp set data and our data about the county and city
tot_set = pd.merge(tot_set, region_years, on = ['Region','Year'], how = 'right')
tot_set = tot_set.reset_index()

In [110]:
writer = pd.ExcelWriter('output.xlsx')

# This Needs to be Broken out into a Raw Sheet
tot_set_v2 = tot_set[['Region','Year','Month','Date','S-TOTAL',\
                      'OCC_GRP','OCC_TRANS','OCC_CONT','OCC_TOTAL',\
                      'ADR_GRP','ADR_TRANS','ADR_CONT','ADR_TOTAL']]
# Create Excel Sheet and input tot_set_v2 as 'Raw-Data-1'
tot_set_v2.to_excel(writer, 'Raw-Data', index=False)
tot_set_v2['GRP_ROOMNIGHTS'] = (tot_set_v2['OCC_GRP']/100)*tot_set_v2['S-TOTAL']
tot_set_v2['TRANS_ROOMNIGHTS'] = (tot_set_v2['OCC_TRANS']/100)*tot_set_v2['S-TOTAL']
tot_set_v2['CONT_ROOMNIGHTS'] = (tot_set_v2['OCC_CONT']/100)*tot_set_v2['S-TOTAL']
tot_set_v2['TOTAL_ROOMNIGHTS'] = (tot_set_v2['OCC_TOTAL']/100)*tot_set_v2['S-TOTAL']
tot_set_v2['GRP_REVENUE'] = tot_set_v2['ADR_GRP']*tot_set_v2['GRP_ROOMNIGHTS']
tot_set_v2['TRANS_REVENUE'] = tot_set_v2['ADR_TRANS']*tot_set_v2['TRANS_ROOMNIGHTS']
tot_set_v2['CONT_REVENUE'] = tot_set_v2['ADR_CONT']*tot_set_v2['CONT_ROOMNIGHTS']
tot_set_v2['TOTAL_REVENUE'] = tot_set_v2['ADR_TOTAL']*tot_set_v2['TOTAL_ROOMNIGHTS']

# Create Excelk Sheet here and input tot_set_v2 as 'Raw-Data-2' Imputed
tot_set_v2.to_excel(writer, 'Raw-Data Imputed', index=False)

In [111]:
###########################################################################
# Grab the entire competitive set and aggregate  
home_agg = pd.DataFrame()
home_set = tot_set_v2[tot_set_v2['Region'].isin(home)]

# We need the total supply for each year for the entire compset (whatever the compset is defined as)
home_agg['TOTAL_SUPPLY'] = home_set.groupby(['Region','Year'])['S-TOTAL'].sum()

# We need the total group room nights for each year for the entire compset (whatever the compset is defined as)
home_agg['GRP_ROOMNIGHTS'] = home_set.groupby(['Region','Year'])['GRP_ROOMNIGHTS'].sum()
home_agg['TRANS_ROOMNIGHTS'] = home_set.groupby(['Region','Year'])['TRANS_ROOMNIGHTS'].sum()
home_agg['CONT_ROOMNIGHTS'] = home_set.groupby(['Region','Year'])['CONT_ROOMNIGHTS'].sum()
home_agg['TOTAL_ROOMNIGHTS'] = home_set.groupby(['Region','Year'])['TOTAL_ROOMNIGHTS'].sum()

home_agg['GRP_REVENUE'] = home_set.groupby(['Region','Year'])['GRP_REVENUE'].sum()
home_agg['TRANS_REVENUE'] = home_set.groupby(['Region','Year'])['TRANS_REVENUE'].sum()
home_agg['CONT_REVENUE'] = home_set.groupby(['Region','Year'])['CONT_REVENUE'].sum()
home_agg['TOTAL_REVENUE'] = home_set.groupby(['Region','Year'])['TOTAL_REVENUE'].sum()
home_agg = home_agg.reset_index()

##########################################################################
# Compute the KPIs 
home_agg['GRP_OCCUPANCY'] = home_agg['GRP_ROOMNIGHTS']/home_agg['TOTAL_SUPPLY']
home_agg['TRANS_OCCUPANCY'] = home_agg['TRANS_ROOMNIGHTS']/home_agg['TOTAL_SUPPLY']
home_agg['CONT_OCCUPANCY'] = home_agg['CONT_ROOMNIGHTS']/home_agg['TOTAL_SUPPLY']
home_agg['TOTAL_OCCUPANCY'] = home_agg['TOTAL_ROOMNIGHTS']/home_agg['TOTAL_SUPPLY']

home_agg['GRP_ADR'] = home_agg['GRP_REVENUE']/home_agg['GRP_ROOMNIGHTS']
home_agg['TRANS_ADR'] = home_agg['TRANS_REVENUE']/home_agg['TRANS_ROOMNIGHTS']
home_agg['CONT_ADR'] = home_agg['CONT_REVENUE']/home_agg['CONT_ROOMNIGHTS']
home_agg['TOTAL_ADR'] = home_agg['TOTAL_REVENUE']/home_agg['TOTAL_ROOMNIGHTS']

home_agg['GRP_RevPAR'] = home_agg['GRP_REVENUE']/home_agg['TOTAL_SUPPLY']
home_agg['TRANS_RevPAR'] = home_agg['TRANS_REVENUE']/home_agg['TOTAL_SUPPLY']
home_agg['CONT_RevPAR'] = home_agg['CONT_REVENUE']/home_agg['TOTAL_SUPPLY']
home_agg['TOTAL_RevPAR'] = home_agg['TOTAL_REVENUE']/home_agg['TOTAL_SUPPLY']

##########################################################################
# Compute the metrics year over year percent change

home_agg['GRP_OCC_YOY'] = home_agg.groupby(['Region'])['GRP_OCCUPANCY'].pct_change(1)
home_agg['TRANS_OCC_YOY'] = home_agg.groupby(['Region'])['TRANS_OCCUPANCY'].pct_change(1)
home_agg['CONT_OCC_YOY'] = home_agg.groupby(['Region'])['CONT_OCCUPANCY'].pct_change(1)
home_agg['TOTAL_OCC_YOY'] = home_agg.groupby(['Region'])['TOTAL_OCCUPANCY'].pct_change(1)

home_agg['GRP_ADR_YOY'] = home_agg.groupby(['Region'])['GRP_ADR'].pct_change(1)
home_agg['TRANS_ADR_YOY'] = home_agg.groupby(['Region'])['TRANS_ADR'].pct_change(1)
home_agg['CONT_ADR_YOY'] = home_agg.groupby(['Region'])['CONT_ADR'].pct_change(1)
home_agg['TOTAL_ADR_YOY'] = home_agg.groupby(['Region'])['TOTAL_ADR'].pct_change(1)

home_agg['GRP_RevPAR_YOY'] = home_agg.groupby(['Region'])['GRP_RevPAR'].pct_change(1)
home_agg['TRANS_RevPAR_YOY'] = home_agg.groupby(['Region'])['TRANS_RevPAR'].pct_change(1)
home_agg['CONT_RevPAR_YOY'] = home_agg.groupby(['Region'])['CONT_RevPAR'].pct_change(1)
home_agg['TOTAL_RevPAR_YOY'] = home_agg.groupby(['Region'])['TOTAL_RevPAR'].pct_change(1)
home_agg = home_agg[column_order]
#Create Excel Sheet
home_agg.to_excel(writer, 'City-County Monterey Agg', index=False)

In [112]:
def wavg(group, weight_column, value_column):
    d = group[value_column]
    w = group[weight_column]
    try:
        return (d * w).sum() / w.sum()
    except ZeroDivisionError:
        return d.mean()

###########################################################################
# Grab the entire competitive set and aggregate  
comp_agg = pd.DataFrame()
comp_set = tot_set_v2[tot_set_v2['Region'].isin(comp_set_amalgam)]

# We need the total supply for each year for the entire compset (whatever the compset is defined as)
comp_agg['TOTAL_SUPPLY'] = comp_set.groupby(['Year'])['S-TOTAL'].sum()

# We need the total group room nights for each year for the entire compset (whatever the compset is defined as)
comp_agg['GRP_ROOMNIGHTS'] = comp_set.groupby(['Year'])['GRP_ROOMNIGHTS'].sum()
comp_agg['TRANS_ROOMNIGHTS'] = comp_set.groupby(['Year'])['TRANS_ROOMNIGHTS'].sum()
comp_agg['CONT_ROOMNIGHTS'] = comp_set.groupby(['Year'])['CONT_ROOMNIGHTS'].sum()
comp_agg['TOTAL_ROOMNIGHTS'] = comp_set.groupby(['Year'])['TOTAL_ROOMNIGHTS'].sum()

comp_agg['GRP_REVENUE'] = comp_set.groupby(['Year'])['GRP_REVENUE'].sum()
comp_agg['TRANS_REVENUE'] = comp_set.groupby(['Year'])['TRANS_REVENUE'].sum()
comp_agg['CONT_REVENUE'] = comp_set.groupby(['Year'])['CONT_REVENUE'].sum()
comp_agg['TOTAL_REVENUE'] = comp_set.groupby(['Year'])['TOTAL_REVENUE'].sum()
comp_agg = comp_agg.reset_index()
comp_agg['Region'] = 'Competitive Set'

##########################################################################
# Compute the KPIs 
comp_agg['GRP_OCCUPANCY'] = comp_agg['GRP_ROOMNIGHTS']/comp_agg['TOTAL_SUPPLY']
comp_agg['TRANS_OCCUPANCY'] = comp_agg['TRANS_ROOMNIGHTS']/comp_agg['TOTAL_SUPPLY']
comp_agg['CONT_OCCUPANCY'] = comp_agg['CONT_ROOMNIGHTS']/comp_agg['TOTAL_SUPPLY']
comp_agg['TOTAL_OCCUPANCY'] = comp_agg['TOTAL_ROOMNIGHTS']/comp_agg['TOTAL_SUPPLY']

comp_agg['GRP_ADR'] = comp_agg['GRP_REVENUE']/comp_agg['GRP_ROOMNIGHTS']
comp_agg['TRANS_ADR'] = comp_agg['TRANS_REVENUE']/comp_agg['TRANS_ROOMNIGHTS']
comp_agg['CONT_ADR'] = comp_agg['CONT_REVENUE']/comp_agg['CONT_ROOMNIGHTS']
comp_agg['TOTAL_ADR'] = comp_agg['TOTAL_REVENUE']/comp_agg['TOTAL_ROOMNIGHTS']

comp_agg['GRP_RevPAR'] = comp_agg['GRP_REVENUE']/comp_agg['TOTAL_SUPPLY']
comp_agg['TRANS_RevPAR'] = comp_agg['TRANS_REVENUE']/comp_agg['TOTAL_SUPPLY']
comp_agg['CONT_RevPAR'] = comp_agg['CONT_REVENUE']/comp_agg['TOTAL_SUPPLY']
comp_agg['TOTAL_RevPAR'] = comp_agg['TOTAL_REVENUE']/comp_agg['TOTAL_SUPPLY']

##########################################################################
# Compute the metrics year over year percent change

comp_agg['GRP_OCC_YOY'] = comp_agg['GRP_OCCUPANCY'].pct_change(1)
comp_agg['TRANS_OCC_YOY'] = comp_agg['TRANS_OCCUPANCY'].pct_change(1)
comp_agg['CONT_OCC_YOY'] = comp_agg['CONT_OCCUPANCY'].pct_change(1)
comp_agg['TOTAL_OCC_YOY'] = comp_agg['TOTAL_OCCUPANCY'].pct_change(1)

comp_agg['GRP_ADR_YOY'] = comp_agg['GRP_ADR'].pct_change(1)
comp_agg['TRANS_ADR_YOY'] = comp_agg['TRANS_ADR'].pct_change(1)
comp_agg['CONT_ADR_YOY'] = comp_agg['CONT_ADR'].pct_change(1)
comp_agg['TOTAL_ADR_YOY'] = comp_agg['TOTAL_ADR'].pct_change(1)

comp_agg['GRP_RevPAR_YOY'] = comp_agg['GRP_RevPAR'].pct_change(1)
comp_agg['TRANS_RevPAR_YOY'] = comp_agg['TRANS_RevPAR'].pct_change(1)
comp_agg['CONT_RevPAR_YOY'] = comp_agg['CONT_RevPAR'].pct_change(1)
comp_agg['TOTAL_RevPAR_YOY'] = comp_agg['TOTAL_RevPAR'].pct_change(1)
comp_agg = comp_agg[column_order]
#Create Excel Sheet 'Competitive Set Aggregated'
comp_agg.to_excel(writer, 'Comp Set Aggregated', index=False)


###########################################################################
# Grab the small group competitive set and aggregate
small_group_agg = pd.DataFrame()
small_group_set = tot_set_v2[tot_set_v2['Region'].isin(small_group)]

# We need the total supply for each year for the entire compset (whatever the compset is defined as)
small_group_agg['TOTAL_SUPPLY'] = small_group_set.groupby(['Year'])['S-TOTAL'].sum()

# We need the total group room nights for each year for the entire compset (whatever the compset is defined as)
small_group_agg['GRP_ROOMNIGHTS'] = small_group_set.groupby(['Year'])['GRP_ROOMNIGHTS'].sum()
small_group_agg['TRANS_ROOMNIGHTS'] = small_group_set.groupby(['Year'])['TRANS_ROOMNIGHTS'].sum()
small_group_agg['CONT_ROOMNIGHTS'] = small_group_set.groupby(['Year'])['CONT_ROOMNIGHTS'].sum()
small_group_agg['TOTAL_ROOMNIGHTS'] = small_group_set.groupby(['Year'])['TOTAL_ROOMNIGHTS'].sum()

small_group_agg['GRP_REVENUE'] = small_group_set.groupby(['Year'])['GRP_REVENUE'].sum()
small_group_agg['TRANS_REVENUE'] = small_group_set.groupby(['Year'])['TRANS_REVENUE'].sum()
small_group_agg['CONT_REVENUE'] = small_group_set.groupby(['Year'])['CONT_REVENUE'].sum()
small_group_agg['TOTAL_REVENUE'] = small_group_set.groupby(['Year'])['TOTAL_REVENUE'].sum()
small_group_agg = small_group_agg.reset_index()
small_group_agg['Region'] = 'Small Group Set'

##########################################################################
# Compute the KPIs 
small_group_agg['GRP_OCCUPANCY'] = small_group_agg['GRP_ROOMNIGHTS']/small_group_agg['TOTAL_SUPPLY']
small_group_agg['TRANS_OCCUPANCY'] = small_group_agg['TRANS_ROOMNIGHTS']/small_group_agg['TOTAL_SUPPLY']
small_group_agg['CONT_OCCUPANCY'] = small_group_agg['CONT_ROOMNIGHTS']/small_group_agg['TOTAL_SUPPLY']
small_group_agg['TOTAL_OCCUPANCY'] = small_group_agg['TOTAL_ROOMNIGHTS']/small_group_agg['TOTAL_SUPPLY']

small_group_agg['GRP_ADR'] = small_group_agg['GRP_REVENUE']/small_group_agg['GRP_ROOMNIGHTS']
small_group_agg['TRANS_ADR'] = small_group_agg['TRANS_REVENUE']/small_group_agg['TRANS_ROOMNIGHTS']
small_group_agg['CONT_ADR'] = small_group_agg['CONT_REVENUE']/small_group_agg['CONT_ROOMNIGHTS']
small_group_agg['TOTAL_ADR'] = small_group_agg['TOTAL_REVENUE']/small_group_agg['TOTAL_ROOMNIGHTS']

small_group_agg['GRP_RevPAR'] = small_group_agg['GRP_REVENUE']/small_group_agg['TOTAL_SUPPLY']
small_group_agg['TRANS_RevPAR'] = small_group_agg['TRANS_REVENUE']/small_group_agg['TOTAL_SUPPLY']
small_group_agg['CONT_RevPAR'] = small_group_agg['CONT_REVENUE']/small_group_agg['TOTAL_SUPPLY']
small_group_agg['TOTAL_RevPAR'] = small_group_agg['TOTAL_REVENUE']/small_group_agg['TOTAL_SUPPLY']

##########################################################################
# Compute the metrics year over year percent change
small_group_agg['GRP_OCC_YOY'] = small_group_agg['GRP_OCCUPANCY'].pct_change(1)
small_group_agg['TRANS_OCC_YOY'] = small_group_agg['TRANS_OCCUPANCY'].pct_change(1)
small_group_agg['CONT_OCC_YOY'] = small_group_agg['CONT_OCCUPANCY'].pct_change(1)
small_group_agg['TOTAL_OCC_YOY'] = small_group_agg['TOTAL_OCCUPANCY'].pct_change(1)

small_group_agg['GRP_ADR_YOY'] = small_group_agg['GRP_ADR'].pct_change(1)
small_group_agg['TRANS_ADR_YOY'] = small_group_agg['TRANS_ADR'].pct_change(1)
small_group_agg['CONT_ADR_YOY'] = small_group_agg['CONT_ADR'].pct_change(1)
small_group_agg['TOTAL_ADR_YOY'] = small_group_agg['TOTAL_ADR'].pct_change(1)

small_group_agg['GRP_RevPAR_YOY'] = small_group_agg['GRP_RevPAR'].pct_change(1)
small_group_agg['TRANS_RevPAR_YOY'] = small_group_agg['TRANS_RevPAR'].pct_change(1)
small_group_agg['CONT_RevPAR_YOY'] = small_group_agg['CONT_RevPAR'].pct_change(1)
small_group_agg['TOTAL_RevPAR_YOY'] = small_group_agg['TOTAL_RevPAR'].pct_change(1)
small_group_agg = small_group_agg[column_order]
# Create Excel sheet 'Small Group Set Aggregated'
small_group_agg.to_excel(writer, 'Small Group Aggregated', index=False)

###########################################################################
# Grab the large group competitive set and aggregate
large_group_agg = pd.DataFrame()
large_group_set = tot_set_v2[tot_set_v2['Region'].isin(large_group)]

# We need the total supply for each year for the entire compset (whatever the compset is defined as)
large_group_agg['TOTAL_SUPPLY'] = large_group_set.groupby(['Year'])['S-TOTAL'].sum()

# We need the total group room nights for each year for the entire compset (whatever the compset is defined as)
large_group_agg['GRP_ROOMNIGHTS'] = large_group_set.groupby(['Year'])['GRP_ROOMNIGHTS'].sum()
large_group_agg['TRANS_ROOMNIGHTS'] = large_group_set.groupby(['Year'])['TRANS_ROOMNIGHTS'].sum()
large_group_agg['CONT_ROOMNIGHTS'] = large_group_set.groupby(['Year'])['CONT_ROOMNIGHTS'].sum()
large_group_agg['TOTAL_ROOMNIGHTS'] = large_group_set.groupby(['Year'])['TOTAL_ROOMNIGHTS'].sum()

large_group_agg['GRP_REVENUE'] = large_group_set.groupby(['Year'])['GRP_REVENUE'].sum()
large_group_agg['TRANS_REVENUE'] = large_group_set.groupby(['Year'])['TRANS_REVENUE'].sum()
large_group_agg['CONT_REVENUE'] = large_group_set.groupby(['Year'])['CONT_REVENUE'].sum()
large_group_agg['TOTAL_REVENUE'] = large_group_set.groupby(['Year'])['TOTAL_REVENUE'].sum()
large_group_agg = large_group_agg.reset_index()
large_group_agg['Region'] = 'Large Group Set'

##########################################################################
# Compute the KPIs 
large_group_agg['GRP_OCCUPANCY'] = large_group_agg['GRP_ROOMNIGHTS']/large_group_agg['TOTAL_SUPPLY']
large_group_agg['TRANS_OCCUPANCY'] = large_group_agg['TRANS_ROOMNIGHTS']/large_group_agg['TOTAL_SUPPLY']
large_group_agg['CONT_OCCUPANCY'] = large_group_agg['CONT_ROOMNIGHTS']/large_group_agg['TOTAL_SUPPLY']
large_group_agg['TOTAL_OCCUPANCY'] = large_group_agg['TOTAL_ROOMNIGHTS']/large_group_agg['TOTAL_SUPPLY']

large_group_agg['GRP_ADR'] = large_group_agg['GRP_REVENUE']/large_group_agg['GRP_ROOMNIGHTS']
large_group_agg['TRANS_ADR'] = large_group_agg['TRANS_REVENUE']/large_group_agg['TRANS_ROOMNIGHTS']
large_group_agg['CONT_ADR'] = large_group_agg['CONT_REVENUE']/large_group_agg['CONT_ROOMNIGHTS']
large_group_agg['TOTAL_ADR'] = large_group_agg['TOTAL_REVENUE']/large_group_agg['TOTAL_ROOMNIGHTS']

large_group_agg['GRP_RevPAR'] = large_group_agg['GRP_REVENUE']/large_group_agg['TOTAL_SUPPLY']
large_group_agg['TRANS_RevPAR'] = large_group_agg['TRANS_REVENUE']/large_group_agg['TOTAL_SUPPLY']
large_group_agg['CONT_RevPAR'] = large_group_agg['CONT_REVENUE']/large_group_agg['TOTAL_SUPPLY']
large_group_agg['TOTAL_RevPAR'] = large_group_agg['TOTAL_REVENUE']/large_group_agg['TOTAL_SUPPLY']

##########################################################################
# Compute the metrics year over year percent change

large_group_agg['GRP_OCC_YOY'] = large_group_agg['GRP_OCCUPANCY'].pct_change(1)
large_group_agg['TRANS_OCC_YOY'] = large_group_agg['TRANS_OCCUPANCY'].pct_change(1)
large_group_agg['CONT_OCC_YOY'] = large_group_agg['CONT_OCCUPANCY'].pct_change(1)
large_group_agg['TOTAL_OCC_YOY'] = large_group_agg['TOTAL_OCCUPANCY'].pct_change(1)

large_group_agg['GRP_ADR_YOY'] = large_group_agg['GRP_ADR'].pct_change(1)
large_group_agg['TRANS_ADR_YOY'] = large_group_agg['TRANS_ADR'].pct_change(1)
large_group_agg['CONT_ADR_YOY'] = large_group_agg['CONT_ADR'].pct_change(1)
large_group_agg['TOTAL_ADR_YOY'] = large_group_agg['TOTAL_ADR'].pct_change(1)

large_group_agg['GRP_RevPAR_YOY'] = large_group_agg['GRP_RevPAR'].pct_change(1)
large_group_agg['TRANS_RevPAR_YOY'] = large_group_agg['TRANS_RevPAR'].pct_change(1)
large_group_agg['CONT_RevPAR_YOY'] = large_group_agg['CONT_RevPAR'].pct_change(1)
large_group_agg['TOTAL_RevPAR_YOY'] = large_group_agg['TOTAL_RevPAR'].pct_change(1)
large_group_agg = large_group_agg[column_order]
# Create Excel sheet 'Large Group Set Aggregated'
large_group_agg.to_excel(writer, 'Large Group Aggregated', index=False)

In [ ]: