In [226]:
import os
import pandas as pd
import numpy as np
import seasonal
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import gmean
pd.options.mode.chained_assignment = None
# Seaborn Style
sns.set(style="whitegrid", color_codes=True)

#Fix plots inline for notebook
%matplotlib inline

# Data Location within CC DS
data_location = os.path.join("..","data","external")

#######################################################
# Helpful Functions I may need

def getFiscalYear(dt):
    year = dt.year
    if dt.month>6: year += 1
    return year

def wavg(group, weight_column, value_column):
    d = group[value_column]
    w = group[weight_column]
    try:
        return (d * w).sum() / w.sum()
    except ZeroDivisionError:
        return d.mean()


#######################################################
# Create Lists of Columns for Subsetting Data for Analysis
# Create Mapping of Column Names for Cleanliness
#


compset_oar_list = [
                   'Date',
                   'Region',
                   'Occ-Trans.',
                   'Occ-Grp.',
                   'Occ-Cont.',
                   'Occ-Total',
                   'ADR-Trans.',
                   'ADR-Grp.',
                   'ADR-Cont.',
                   'ADR-Total',
                   'RevPAR-Trans.',
                   'RevPAR-Grp.',
                   'RevPAR-Cont.',
                    'RevPAR-Total'
                    ]

compset_oar_mapping = {
                   'Occ-Trans.':'OCC_TRANS',
                   'Occ-Grp.':'OCC_GRP',
                   'Occ-Cont.':'OCC_CONT',
                   'Occ-Total':'OCC_TOTAL',
                   'ADR-Trans.':'ADR_TRANS',
                   'ADR-Grp.':'ADR_GRP',
                   'ADR-Cont.':'ADR_CONT',
                   'ADR-Total':'ADR_TOTAL',
                   'RevPAR-Trans.':'RevPAR_TRANS',
                   'RevPAR-Grp.':'RevPAR_GRP',
                   'RevPAR-Cont.':'RevPAR_CONT',
                   'RevPAR-Total':'RevPAR_TOTAL'
                    }

m_cnty_oar_list = [
                   'Year',
                   'Month',
                   'Occ-Trans.',
                   'Occ-Grp.',
                   'Occ-Cont.',
                   'Occ-Total',
                   'ADR-Trans.',
                   'ADR-Grp.',
                   'ADR-Cont.',
                   'ADR-Total',
                   'RevPAR-Trans.',
                   'RevPAR-Grp.',
                   'RevPAR-Cont.',
                   'RevPAR-Total'
                    ]

m_cnty_oar_mapping = {
                   'Occ-Trans.':'OCC_TRANS',
                   'Occ-Grp.':'OCC_GRP',
                   'Occ-Cont.':'OCC_CONT',
                   'Occ-Total':'OCC_TOTAL',
                   'ADR-Trans.':'ADR_TRANS',
                   'ADR-Grp.':'ADR_GRP',
                   'ADR-Cont.':'ADR_CONT',
                   'ADR-Total':'ADR_TOTAL',
                   'RevPAR-Trans.':'RevPAR_TRANS',
                   'RevPAR-Grp.':'RevPAR_GRP',
                   'RevPAR-Cont.':'RevPAR_CONT',
                   'RevPAR-Total':'RevPAR_TOTAL'
                     }

region_mapping = { 
                   'Santa Barbara/Santa Maria, CA':'Santa Barbara County, CA',
                   'San Jose/Santa Cruz, CA':'San Jose-Santa Cruz, CA',
                   'Napa Valley, CA':'Napa County, CA',
                 }

region_exclude = ['Long Beach/Torrance']

small_group =      [
                    'Napa County, CA',
                    'Santa Barbara, CA',
                    'Sonoma County, CA',
                    'South Lake Tahoe, CA',
                    'Laguna Beach, CA',
                    'Newport Beach/Dana Point, CA'
                   ]

large_group =   [
                 'San Diego, CA',
                 'San Francisco/San Mateo, CA',
                 'Palm Springs, CA',
                 'San Jose-Santa Cruz, CA'
                ]

comp_set_amalgam = [
                    'Napa County, CA',
                    'Laguna Beach, CA',
                    'Palm Springs, CA',
                    'San Diego, CA',
                    'Santa Barbara County, CA',
                    'San Francisco/San Mateo, CA',
                    'Sonoma County, CA',
                    'South Lake Tahoe, CA',
                    'San Jose-Santa Cruz, CA',
                    'Newport Beach/Dana Point, CA'
                    ]

# Read in Monterey County XLS
# Documentation for Monterey County XLS goes here...
file_path = os.path.join(data_location, "Monterey-County-City-STR-ALL.xlsx")
m_cnty = pd.read_excel(file_path, sheetname = "County of Monterey 05-16 Seg")

# Read in Compset data
file_path = os.path.join(data_location, "Comp-Set-STR-ALL.xlsx")
compset_hm = pd.read_excel(file_path, sheetname = "Comp-Set OCC-ADR-RevPARv2")
compset_econ = pd.read_excel(file_path, sheetname = "Comp-Set S-D-Rv3")

# Read in Budget data
file_path = os.path.join(data_location, "TID_BUDGET.xlsx")
budget = pd.read_excel(file_path, sheetname = "income")

#Helper Data to expand number of years so they are even for excel viz
file_path = os.path.join(data_location, "Region-Years.xlsx")
region_years = pd.read_excel(file_path, sheetname = "Regions")

In [201]:
###################################################
# Process Competitive Set down, We just want total supply so we can weight our annual averages
compset_econ_v1 = compset_econ[['Year','Month','Date','Region','Supply-Total']]
compset_econ_v1['Month'] = compset_econ_v1['Month'].apply(str)
compset_econ_v1['Year'] = compset_econ_v1['Year'].apply(str)
compset_econ_v1['Date'] = compset_econ_v1['Year'] + "-" + compset_econ_v1['Month']
compset_econ_v1['Date'] = pd.to_datetime(compset_econ_v1['Date'], format = '%Y-%m')
compset_econ_v1['Region'] = compset_econ_v1['Region'].str.replace('+','')
compset_econ_v1 = compset_econ_v1.replace({'Region':region_mapping})
compset_total = compset_econ_v1.groupby('Region')[['Region','Year','Supply-Total']].mean()
compset_total = compset_total.reset_index()

###################################################
# Process monterey_county
m_cnty_v1 = m_cnty[m_cnty_oar_list]
m_cnty_v2 = m_cnty_v1.rename(columns=m_cnty_oar_mapping)
m_cnty_v2['Month'] = m_cnty_v2['Month'].apply(str)
m_cnty_v2['Year'] = m_cnty_v2['Year'].apply(str)
m_cnty_v2['Date'] = m_cnty_v2['Year'] + "-" + m_cnty_v2['Month']
m_cnty_v2['Date'] = pd.to_datetime(m_cnty_v2['Date'], format = "%Y-%b")
m_cnty_v2['Month'] = m_cnty_v2['Date'].apply(lambda x: x.month)
m_cnty_v2['Region'] = 'Monterey County, CA'

###################################################
# Process compset data
compset_hm_v1 = compset_hm = compset_hm[compset_oar_list]
compset_hm_v2 = compset_hm_v1.rename(columns=compset_oar_mapping)

#Date & Time Manipulation
compset_hm_v2['Date'] = pd.to_datetime(compset_hm_v2['Date'])
compset_hm_v2['Month'] = compset_hm_v2['Date'].apply(lambda x: str(x.month))
compset_hm_v2['Year'] = compset_hm_v2['Date'].apply(lambda x: str(x.year))

#String Manipulation
compset_hm_v2['Region'] = compset_hm_v2['Region'].str.replace('+','')

In [202]:
###############################################
# Combine datasets for Hotel Metrics 
compset_hm_v3 = pd.merge(compset_hm_v2, compset_total, on=['Region'], how = 'right')
tot_set = compset_hm_v3.append(m_cnty_v2)
#tot_set = tot_set.append(m_city_v2) # These datasets differ

# Sort before Calculations (Do I need to do this?)
tot_set = tot_set[(tot_set['Date'] < '2016-07-01') & (tot_set['Date'] > '2009-01-01')]
tot_set = tot_set.sort_values(by=['Date'], ascending = True)
tot_set = tot_set.round(2)
tot_set = tot_set.reset_index()

#This is an IMPORTANT STEP-HERE WE APPLY OUR FISCAL YEAR (Jun-Jul) to the YEAR variable.
# Is this best practices? I don't think so... not sure. need training.
tot_set['Year'] = tot_set['Date'].apply(getFiscalYear)

#String Manipulation, The mappings change over time, so we need to make sure all the names are consistent
tot_set = tot_set.replace({'Region':region_mapping})
tot_set['Month'] = tot_set['Month'].apply(str)

# Drop Duplicates, The City of Monterey is included in the Comp Set as well as the city data, the city data went back further
tot_set = tot_set.drop_duplicates(subset=['Region','Year','Month'], keep='last')

# Regions we want to keep, We only had one period of Long Beach. Exclude it
tot_set = tot_set[~(tot_set['Region'].isin(region_exclude))]

# Let's merge everything together, the comp set data and our data about the county and city
tot_set = pd.merge(tot_set, region_years, on = ['Region','Year'], how = 'right')

In [228]:
##############################################
# Calculate metrics, in this case, simple growth MOM & YOY
# May consider YTD

#Average Daily Rate  MOM & YOY & Annual changes
tot_set['ADR_GRP_MOM'] =     tot_set.groupby(['Region'])['ADR_GRP'].pct_change(1)
tot_set['ADR_TRANS_MOM'] =   tot_set.groupby(['Region'])['ADR_TRANS'].pct_change(1)
tot_set['ADR_TOTAL_MOM'] =   tot_set.groupby(['Region'])['ADR_TOTAL'].pct_change(1)

tot_set['ADR_GRP_YOY'] =     tot_set.groupby(['Region'])['ADR_GRP'].pct_change(12)
tot_set['ADR_TRANS_YOY'] =   tot_set.groupby(['Region'])['ADR_TRANS'].pct_change(12)
tot_set['ADR_TOTAL_YOY'] =   tot_set.groupby(['Region'])['ADR_TOTAL'].pct_change(12)

#Occupancy MOM & YOY & Annual
tot_set['OCC_GRP_MOM'] =     tot_set.groupby(['Region'])['OCC_GRP'].pct_change(1)
tot_set['OCC_TRANS_MOM'] =   tot_set.groupby(['Region'])['OCC_TRANS'].pct_change(1)
tot_set['OCC_TOTAL_MOM'] =   tot_set.groupby(['Region'])['OCC_TOTAL'].pct_change(1)

tot_set['OCC_GRP_YOY'] =     tot_set.groupby(['Region'])['OCC_GRP'].pct_change(12)
tot_set['OCC_TRANS_YOY'] =   tot_set.groupby(['Region'])['OCC_TRANS'].pct_change(12)
tot_set['OCC_TOTAL_YOY'] =   tot_set.groupby(['Region'])['OCC_TOTAL'].pct_change(12)

#RevPar MOM & YOY & Annual
tot_set['RevPAR_GRP_MOM'] =     tot_set.groupby(['Region'])['RevPAR_GRP'].pct_change(1)
tot_set['RevPAR_TRANS_MOM'] =   tot_set.groupby(['Region'])['RevPAR_TRANS'].pct_change(1)
tot_set['RevPAR_TOTAL_MOM'] =   tot_set.groupby(['Region'])['RevPAR_TOTAL'].pct_change(1)

tot_set['RevPAR_GRP_YOY'] =     tot_set.groupby(['Region'])['RevPAR_GRP'].pct_change(12)
tot_set['RevPAR_TRANS_YOY'] =   tot_set.groupby(['Region'])['RevPAR_TRANS'].pct_change(12)
tot_set['RevPAR_TOTAL_YOY'] =   tot_set.groupby(['Region'])['RevPAR_TOTAL'].pct_change(12)

tot_set.groupby(['Region','Year'])


Out[228]:
index ADR_CONT ADR_GRP ADR_TOTAL ADR_TRANS Date Month OCC_CONT OCC_GRP OCC_TOTAL ... OCC_TOTAL_MOM OCC_GRP_YOY OCC_TRANS_YOY OCC_TOTAL_YOY RevPAR_GRP_MOM RevPAR_TRANS_MOM RevPAR_TOTAL_MOM RevPAR_GRP_YOY RevPAR_TRANS_YOY RevPAR_TOTAL_YOY
0 49.0 66.98 175.24 154.71 141.85 2009-02-01 2 0.63 19.82 47.79 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 50.0 73.11 154.33 142.05 137.92 2009-03-01 3 0.83 15.67 49.32 ... 0.032015 NaN NaN NaN -0.303685 0.167397 -0.052475 NaN NaN NaN
2 144.0 67.78 153.87 141.49 136.70 2009-04-01 4 0.65 19.26 59.62 ... 0.208840 NaN NaN NaN 0.225300 0.199293 0.204111 NaN NaN NaN
3 145.0 67.43 159.85 147.22 142.88 2009-05-01 5 0.59 16.91 55.92 ... -0.062060 NaN NaN NaN -0.088057 0.011422 -0.024064 NaN NaN NaN
4 146.0 66.60 155.27 141.85 137.27 2009-06-01 6 0.43 17.26 61.27 ... 0.095672 NaN NaN NaN -0.008509 0.089617 0.055630 NaN NaN NaN
5 147.0 64.40 175.33 160.92 155.53 2009-07-01 7 0.39 21.65 72.95 ... 0.190632 NaN NaN NaN 0.416418 0.323470 0.350708 NaN NaN NaN
6 148.0 64.93 219.53 180.32 171.40 2009-08-01 8 0.41 14.48 73.26 ... 0.004249 NaN NaN NaN -0.162803 0.263863 0.125309 NaN NaN NaN
7 149.0 67.51 158.79 155.37 154.68 2009-09-01 9 0.48 21.25 65.17 ... -0.110429 NaN NaN NaN 0.061989 -0.328503 -0.233460 NaN NaN NaN
8 150.0 62.87 162.08 156.27 152.95 2009-10-01 10 0.36 25.29 59.75 ... -0.083167 NaN NaN NaN 0.214222 -0.223843 -0.078017 NaN NaN NaN
9 151.0 66.24 142.42 139.92 139.27 2009-11-01 11 0.34 17.52 47.24 ... -0.209372 NaN NaN NaN -0.391166 -0.215149 -0.291988 NaN NaN NaN
10 152.0 66.35 119.60 121.58 123.13 2009-12-01 12 0.33 12.21 39.78 ... -0.157917 NaN NaN NaN -0.414429 -0.180552 -0.268230 NaN NaN NaN
11 60.0 62.04 108.85 115.21 120.50 2010-01-01 1 0.34 15.20 37.18 ... -0.065359 NaN NaN NaN 0.132101 -0.222421 -0.114327 NaN NaN NaN
12 61.0 68.37 154.61 148.36 145.02 2010-02-01 2 0.35 19.30 47.30 ... 0.272189 -0.026236 0.012075 -0.010253 0.803507 0.537960 0.638189 -0.141336 0.034563 -0.050852
13 62.0 66.95 132.39 131.06 131.03 2010-03-01 3 0.36 18.02 50.62 ... 0.070190 0.149968 -0.017367 0.026358 -0.200469 0.053353 -0.054716 -0.014055 -0.066505 -0.053097
14 156.0 66.82 142.99 143.13 143.97 2010-04-01 4 0.37 23.88 61.04 ... 0.205847 0.239875 -0.073281 0.023818 0.431447 0.253964 0.317003 0.151822 -0.023950 0.035680
15 157.0 68.06 151.62 152.49 153.94 2010-05-01 5 0.38 25.32 63.14 ... 0.034404 0.497339 -0.025761 0.129113 0.124487 0.087769 0.102095 0.420274 0.049727 0.169562
16 65.0 56.69 229.43 195.50 172.56 2010-06-01 6 0.29 28.31 68.73 ... 0.088533 0.640209 -0.079165 0.121756 0.691847 0.201631 0.395368 1.423507 0.157640 0.545967
17 159.0 61.11 162.22 167.35 170.13 2010-07-01 7 0.31 22.43 76.06 ... 0.106649 0.036028 0.047338 0.042632 -0.439723 0.309892 -0.052620 -0.041359 0.145762 0.084334
18 67.0 56.42 186.49 186.63 187.78 2010-08-01 8 0.41 27.37 77.71 ... 0.021693 0.890193 -0.144741 0.060743 0.402858 0.033513 0.139367 0.606356 -0.063062 0.097880
19 68.0 57.21 157.23 162.12 166.09 2010-09-01 9 0.39 24.91 66.25 ... -0.147471 0.172235 -0.057320 0.016572 -0.232713 -0.274560 -0.259395 0.160593 0.012204 0.060735
20 69.0 61.49 156.42 162.02 166.95 2010-10-01 10 0.26 25.08 59.03 ... -0.108981 -0.008304 -0.012023 -0.012050 0.001532 -0.172916 -0.109580 -0.042704 0.078619 0.024422
21 70.0 60.33 145.04 148.38 151.03 2010-11-01 11 0.26 18.01 49.55 ... -0.160596 0.027968 0.064648 0.048899 -0.334438 -0.159822 -0.231179 0.046493 0.154654 0.112405
22 164.0 74.17 110.02 125.75 132.87 2010-12-01 12 0.36 11.27 39.10 ... -0.210898 -0.076986 0.008443 -0.017094 -0.525086 -0.227465 -0.331293 -0.151266 0.088551 0.016539
23 72.0 48.54 110.16 116.72 122.98 2011-01-01 1 0.40 17.35 40.38 ... 0.032737 0.141447 0.044804 0.086068 0.541935 -0.238291 -0.041489 0.155985 0.066334 0.100140
24 166.0 52.26 147.61 147.50 148.99 2011-02-01 2 0.49 20.68 51.12 ... 0.265973 0.071503 0.082791 0.080761 0.596234 0.604459 0.599830 0.023131 0.112441 0.074380
25 167.0 52.87 122.85 129.66 135.82 2011-03-01 3 0.44 21.39 50.88 ... -0.004695 0.187014 -0.099225 0.005136 -0.138925 -0.115643 -0.125066 0.101887 -0.066036 -0.005577
26 168.0 54.44 140.88 142.50 144.39 2011-04-01 4 0.48 21.62 63.18 ... 0.241745 -0.094640 0.116304 0.035059 0.159056 0.503041 0.364711 -0.107791 0.119479 0.030445
27 76.0 78.17 151.36 154.79 158.01 2011-05-01 5 0.44 24.65 61.79 ... -0.022001 -0.026461 -0.019765 -0.021381 0.224557 -0.022256 0.062313 -0.028393 0.006247 -0.006750
28 77.0 60.98 147.83 153.31 156.81 2011-06-01 6 0.57 19.62 65.99 ... 0.067972 -0.306959 0.141291 -0.039866 -0.222520 0.238489 0.057821 -0.553503 0.037112 -0.247023
29 236.0 97.17 204.25 205.60 206.59 2011-04-01 4 0.14 20.14 63.30 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
514 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
515 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
516 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
517 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
518 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
519 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
520 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
521 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
522 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
523 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
524 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
525 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
526 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
527 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
528 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
529 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
530 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
531 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
532 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
533 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
534 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
535 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
536 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
537 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
538 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
539 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
540 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
541 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
542 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
543 NaN NaN NaN NaN NaN NaT NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

544 rows × 36 columns


In [214]:
agg_set = pd.DataFrame()

agg_set['ADR_GRP_ANN'] =     tot_set.groupby(['Region','Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_GRP_YOY')
agg_set


Out[214]:
ADR_GRP_ANN
Region Year
City of Monterey, CA 2008.0 NaN
2009.0 NaN
2010.0 NaN
2011.0 NaN
2012.0 0.000000
2013.0 -0.027222
2014.0 0.078942
2015.0 -0.070274
2016.0 0.003609
Laguna Beach, CA 2008.0 NaN
2009.0 NaN
2010.0 NaN
2011.0 NaN
2012.0 NaN
2013.0 NaN
2014.0 0.000000
2015.0 -0.018031
2016.0 -0.167887
Monterey County, CA 2008.0 NaN
2009.0 0.000000
2010.0 0.092152
2011.0 0.024584
2012.0 0.049005
2013.0 -0.058530
2014.0 0.089135
2015.0 0.004548
2016.0 0.072337
Napa County, CA 2008.0 NaN
2009.0 NaN
2010.0 NaN
... ... ...
San Jose-Santa Cruz, CA 2014.0 0.000000
2015.0 0.000000
2016.0 -0.072095
Santa Barbara County, CA 2008.0 NaN
2009.0 NaN
2010.0 NaN
2011.0 0.000000
2012.0 0.016573
2013.0 0.012705
2014.0 -0.015835
2015.0 NaN
2016.0 NaN
Sonoma County, CA 2008.0 NaN
2009.0 NaN
2010.0 NaN
2011.0 0.000000
2012.0 -0.013598
2013.0 0.137400
2014.0 0.157896
2015.0 0.127197
2016.0 -0.098609
South Lake Tahoe, CA 2008.0 NaN
2009.0 NaN
2010.0 NaN
2011.0 NaN
2012.0 NaN
2013.0 NaN
2014.0 0.000000
2015.0 -0.016826
2016.0 -0.176564

108 rows × 1 columns


In [222]:
##################################################
# Let's visualize this data for output to Tammy & Rob

agg_set = pd.DataFrame()


#########################################################################################
# Now that metrics are calculated lets create the aggregated set for Annual Averages and Rankings

agg_set['ADR_GRP_ANN'] =     tot_set.groupby(['Region','Year'])['ADR_GRP_YOY'].mean()
agg_set['ADR_TRANS_ANN'] =   tot_set.groupby(['Region','Year'])['ADR_TRANS_YOY'].mean()
agg_set['ADR_TOTAL_ANN'] =   tot_set.groupby(['Region','Year'])['ADR_TOTAL_YOY'].mean()

agg_set['OCC_GRP_ANN'] =     tot_set.groupby(['Region','Year'])['OCC_GRP_YOY'].mean()
agg_set['OCC_TRANS_ANN'] =   tot_set.groupby(['Region','Year'])['OCC_TRANS_YOY'].mean()
agg_set['OCC_TOTAL_ANN'] =   tot_set.groupby(['Region','Year'])['OCC_TOTAL_YOY'].mean()

agg_set['RevPAR_GRP_ANN'] =     tot_set.groupby(['Region','Year'])['RevPAR_GRP_YOY'].mean()
agg_set['RevPAR_TRANS_ANN'] =   tot_set.groupby(['Region','Year'])['RevPAR_TRANS_YOY'].mean()
agg_set['RevPAR_TOTAL_ANN'] =   tot_set.groupby(['Region','Year'])['RevPAR_TOTAL_YOY'].mean()

# THIS SECTION IS FOR WEIGHTED AVERAGES
agg_set['ADR_GRP_ANN'] =     tot_set.groupby(['Region','Year'])['ADR_GRP_YOY'].mean()
agg_set['ADR_TRANS_ANN'] =   tot_set.groupby(['Region','Year'])['ADR_TRANS_YOY'].mean()
agg_set['ADR_TOTAL_ANN'] =   tot_set.groupby(['Region','Year'])['ADR_TOTAL_YOY'].mean()

agg_set['OCC_GRP_ANN'] =     tot_set.groupby(['Region','Year'])['OCC_GRP_YOY'].mean()
agg_set['OCC_TRANS_ANN'] =   tot_set.groupby(['Region','Year'])['OCC_TRANS_YOY'].mean()
agg_set['OCC_TOTAL_ANN'] =   tot_set.groupby(['Region','Year'])['OCC_TOTAL_YOY'].mean()

agg_set['RevPAR_GRP_ANN'] =     tot_set.groupby(['Region','Year'])['RevPAR_GRP_YOY'].mean()
agg_set['RevPAR_TRANS_ANN'] =   tot_set.groupby(['Region','Year'])['RevPAR_TRANS_YOY'].mean()
agg_set['RevPAR_TOTAL_ANN'] =   tot_set.groupby(['Region','Year'])['RevPAR_TOTAL_YOY'].mean()



# Actuals Averaged
agg_set['ADR_GRP_AVG'] =     tot_set.groupby(['Region','Year'])['ADR_GRP'].mean()
agg_set['ADR_TRANS_AVG'] =   tot_set.groupby(['Region','Year'])['ADR_TRANS'].mean()
agg_set['ADR_TOTAL_AVG'] =   tot_set.groupby(['Region','Year'])['ADR_TOTAL'].mean()

# Actuals Averaged
agg_set['OCC_GRP_AVG'] =     tot_set.groupby(['Region','Year'])['OCC_GRP'].mean()
agg_set['OCC_TRANS_AVG'] =   tot_set.groupby(['Region','Year'])['OCC_TRANS'].mean()
agg_set['OCC_TOTAL_AVG'] =   tot_set.groupby(['Region','Year'])['OCC_TOTAL'].mean()

#Actuals Averaged
agg_set['RevPAR_GRP_AVG'] =     tot_set.groupby(['Region','Year'])['RevPAR_GRP'].mean()
agg_set['RevPAR_TRANS_AVG'] =   tot_set.groupby(['Region','Year'])['RevPAR_TRANS'].mean()
agg_set['RevPAR_TOTAL_AVG'] =   tot_set.groupby(['Region','Year'])['RevPAR_TOTAL'].mean()
agg_set = agg_set.reset_index()

##########################################################################################
# Subset agg_set so we can amalgamate the comp set growth
comp_agg = pd.DataFrame()
comp_set = tot_set[tot_set['Region'].isin(comp_set_amalgam)]

# Annual Average Daily Rate using Year Over Year Percentages, averaging across comp set Regions
comp_agg['ADR_GRP_ANN'] =     comp_set.groupby(['Year'])['ADR_GRP_YOY'].mean()
comp_agg['ADR_TRANS_ANN'] =   comp_set.groupby(['Year'])['ADR_TRANS_YOY'].mean()
comp_agg['ADR_TOTAL_ANN'] =   comp_set.groupby(['Year'])['ADR_TOTAL_YOY'].mean()

# Annual Occupancy using Year Over Year Percentages, averaging across comp set Regions
comp_agg['OCC_GRP_ANN'] =     comp_set.groupby(['Year'])['OCC_GRP_YOY'].mean()
comp_agg['OCC_TRANS_ANN'] =   comp_set.groupby(['Year'])['OCC_TRANS_YOY'].mean()
comp_agg['OCC_TOTAL_ANN'] =   comp_set.groupby(['Year'])['OCC_TOTAL_YOY'].mean()

# Annual RevPAR using Year Over Year Percentages, averaging across comp set Regions
comp_agg['RevPAR_GRP_ANN'] =     comp_set.groupby(['Year'])['RevPAR_GRP_YOY'].mean()
comp_agg['RevPAR_TRANS_ANN'] =   comp_set.groupby(['Year'])['RevPAR_TRANS_YOY'].mean()
comp_agg['RevPAR_TOTAL_ANN'] =   comp_set.groupby(['Year'])['RevPAR_TOTAL_YOY'].mean()

# THIS SECTION IS FOR WEIGHTED AVERAGES
# Annual Average Daily Rate using Year Over Year Percentages, weighting the data using the total supply of the destination
comp_agg['ADR_GRP_ANN_WT'] =     comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_GRP_YOY')
comp_agg['ADR_TRANS_ANN_WT'] =   comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_TRANS_YOY')
comp_agg['ADR_TOTAL_ANN_WT'] =   comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_TOTAL_YOY')

# Annual Occupancy using Year Over Year Percentages, weighting the data using the total supply of the destination
comp_agg['OCC_GRP_ANN_WT'] =     comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_GRP_YOY')
comp_agg['OCC_TRANS_ANN_WT'] =   comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_TRANS_YOY')
comp_agg['OCC_TOTAL_ANN_WT'] =   comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_TOTAL_YOY')

# Annual RevPAR using Year Over Year Percentages, weighting the data using the total supply of the destination
comp_agg['RevPAR_GRP_ANN_WT'] =     comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_GRP_YOY')
comp_agg['RevPAR_TRANS_ANN_WT'] =   comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_TRANS_YOY')
comp_agg['RevPAR_TOTAL_ANN_WT'] =   comp_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_TOTAL_YOY')

# Actuals Averaged
comp_agg['ADR_GRP_AVG'] =     comp_set.groupby(['Year'])['ADR_GRP'].mean()
comp_agg['ADR_TRANS_AVG'] =   comp_set.groupby(['Year'])['ADR_TRANS'].mean()
comp_agg['ADR_TOTAL_AVG'] =   comp_set.groupby(['Year'])['ADR_TOTAL'].mean()

# Actuals Averaged
comp_agg['OCC_GRP_AVG'] =     comp_set.groupby(['Year'])['OCC_GRP'].mean()
comp_agg['OCC_TRANS_AVG'] =   comp_set.groupby(['Year'])['OCC_TRANS'].mean()
comp_agg['OCC_TOTAL_AVG'] =   comp_set.groupby(['Year'])['OCC_TOTAL'].mean()

#Actuals Averaged
comp_agg['RevPAR_GRP_AVG'] = comp_set.groupby([ 'Year'])['RevPAR_GRP'].mean()
comp_agg['RevPAR_TRANS_AVG'] = comp_set.groupby([ 'Year'])['RevPAR_TRANS'].mean()
comp_agg['RevPAR_TOTAL_AVG'] = comp_set.groupby([ 'Year'])['RevPAR_TOTAL'].mean()
comp_agg = comp_agg.reset_index()

comp_agg['Region'] = 'Competitive Set'

In [225]:
########################################################################################
# Small group aggregation. Lets take the small groups from our comp set and re-run the annual averages

small_group_agg = pd.DataFrame()
small_group_set = tot_set[tot_set['Region'].isin(small_group)]

small_group_agg['ADR_GRP_ANN'] = small_group_set.groupby(['Year'])['ADR_GRP_YOY'].mean()
small_group_agg['ADR_TRANS_ANN'] = small_group_set.groupby(['Year'])['ADR_TRANS_YOY'].mean()
small_group_agg['ADR_TOTAL_ANN'] = small_group_set.groupby(['Year'])['ADR_TOTAL_YOY'].mean()

small_group_agg['OCC_GRP_ANN'] = small_group_set.groupby(['Year'])['OCC_GRP_YOY'].mean()
small_group_agg['OCC_TRANS_ANN'] = small_group_set.groupby(['Year'])['OCC_TRANS_YOY',].mean()
small_group_agg['OCC_TOTAL_ANN'] = small_group_set.groupby(['Year'])['OCC_TOTAL_YOY'].mean()

small_group_agg['RevPAR_GRP_ANN'] = small_group_set.groupby(['Year'])[['Date', 'RevPAR_GRP_YOY', 'Region']].mean()
small_group_agg['RevPAR_TRANS_ANN'] = small_group_set.groupby(['Year'])[['Date', 'RevPAR_TRANS_YOY', 'Region']].mean()
small_group_agg['RevPAR_TOTAL_ANN'] = small_group_set.groupby(['Year'])[['Date', 'RevPAR_TOTAL_YOY', 'Region']].mean()

# THIS SECTION IS FOR WEIGHTED AVERAGES
# Annual Average Daily Rate using Year Over Year Percentages, weighting the data using the total supply of the destination
small_group_agg['ADR_GRP_ANN_WT'] =     small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_GRP_YOY')
small_group_agg['ADR_TRANS_ANN_WT'] =   small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_TRANS_YOY')
small_group_agg['ADR_TOTAL_ANN_WT'] =   small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_TOTAL_YOY')

# Annual Occupancy using Year Over Year Percentages, weighting the data using the total supply of the destination
small_group_agg['OCC_GRP_ANN_WT'] =     small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_GRP_YOY')
small_group_agg['OCC_TRANS_ANN_WT'] =   small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_TRANS_YOY')
small_group_agg['OCC_TOTAL_ANN_WT'] =   small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_TOTAL_YOY')

# Annual RevPAR using Year Over Year Percentages, weighting the data using the total supply of the destination
small_group_agg['RevPAR_GRP_ANN_WT'] =     small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_GRP_YOY')
small_group_agg['RevPAR_TRANS_ANN_WT'] =   small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_TRANS_YOY')
small_group_agg['RevPAR_TOTAL_ANN_WT'] =   small_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_TOTAL_YOY')

# Actuals Averaged
small_group_agg['ADR_GRP_AVG'] = small_group_set.groupby(['Year'])['ADR_GRP'].mean()
small_group_agg['ADR_TRANS_AVG'] = small_group_set.groupby(['Year'])['ADR_TRANS'].mean()
small_group_agg['ADR_TOTAL_AVG'] = small_group_set.groupby(['Year'])['ADR_TOTAL'].mean()

# Actuals Averaged
small_group_agg['OCC_GRP_AVG'] =     small_group_set.groupby(['Year'])['OCC_GRP'].mean()
small_group_agg['OCC_TRANS_AVG'] =   small_group_set.groupby(['Year'])['OCC_TRANS'].mean()
small_group_agg['OCC_TOTAL_AVG'] =   small_group_set.groupby(['Year'])['OCC_TOTAL'].mean()

#Actuals Averaged
small_group_agg['RevPAR_GRP_AVG'] = small_group_set.groupby([ 'Year'])['RevPAR_GRP'].mean()
small_group_agg['RevPAR_TRANS_AVG'] = small_group_set.groupby([ 'Year'])['RevPAR_TRANS'].mean()
small_group_agg['RevPAR_TOTAL_AVG'] = small_group_set.groupby([ 'Year'])['RevPAR_TOTAL'].mean()
small_group_agg = small_group_agg.reset_index()

small_group_agg['Region'] = 'Small Group Meetings'

##########################################################################################
# Large group aggregation. Lets take the large groups from our comp set and re-run the annual averages

large_group_agg = pd.DataFrame()
large_group_set = tot_set[tot_set['Region'].isin(large_group)]

large_group_agg['ADR_GRP_ANN'] = large_group_set.groupby(['Year'])['ADR_GRP_YOY'].mean()
large_group_agg['ADR_TRANS_ANN'] = large_group_set.groupby(['Year'])['ADR_TRANS_YOY'].mean()
large_group_agg['ADR_TOTAL_ANN'] = large_group_set.groupby(['Year'])['ADR_TOTAL_YOY'].mean()

large_group_agg['OCC_GRP_ANN'] = large_group_set.groupby(['Year'])['OCC_GRP_YOY'].mean()
large_group_agg['OCC_TRANS_ANN'] = large_group_set.groupby(['Year'])['OCC_TRANS_YOY',].mean()
large_group_agg['OCC_TOTAL_ANN'] = large_group_set.groupby(['Year'])['OCC_TOTAL_YOY'].mean()

large_group_agg['RevPAR_GRP_ANN'] = large_group_set.groupby(['Year'])[['Date','RevPAR_GRP_YOY','Region']].mean()
large_group_agg['RevPAR_TRANS_ANN'] = large_group_set.groupby(['Year'])[['Date','RevPAR_TRANS_YOY','Region']].mean()
large_group_agg['RevPAR_TOTAL_ANN'] = large_group_set.groupby(['Year'])[['Date','RevPAR_TOTAL_YOY','Region']].mean()

# THIS SECTION IS FOR WEIGHTED AVERAGES
# Annual Average Daily Rate using Year Over Year Percentages, weighting the data using the total supply of the destination
large_group_agg['ADR_GRP_ANN_WT'] =     large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_GRP_YOY')
large_group_agg['ADR_TRANS_ANN_WT'] =   large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_TRANS_YOY')
large_group_agg['ADR_TOTAL_ANN_WT'] =   large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'ADR_TOTAL_YOY')

# Annual Occupancy using Year Over Year Percentages, weighting the data using the total supply of the destination
large_group_agg['OCC_GRP_ANN_WT'] =     large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_GRP_YOY')
large_group_agg['OCC_TRANS_ANN_WT'] =   large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_TRANS_YOY')
large_group_agg['OCC_TOTAL_ANN_WT'] =   large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'OCC_TOTAL_YOY')

# Annual RevPAR using Year Over Year Percentages, weighting the data using the total supply of the destination
large_group_agg['RevPAR_GRP_ANN_WT'] =     large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_GRP_YOY')
large_group_agg['RevPAR_TRANS_ANN_WT'] =   large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_TRANS_YOY')
large_group_agg['RevPAR_TOTAL_ANN_WT'] =   large_group_set.groupby(['Year']).apply(wavg, weight_column = 'Supply-Total', value_column = 'RevPAR_TOTAL_YOY')

# Actuals Averaged
large_group_agg['ADR_GRP_AVG'] = large_group_set.groupby(['Year'])['ADR_GRP'].mean()
large_group_agg['ADR_TRANS_AVG'] = large_group_set.groupby(['Year'])['ADR_TRANS'].mean()
large_group_agg['ADR_TOTAL_AVG'] = large_group_set.groupby(['Year'])['ADR_TOTAL'].mean()

# Actuals Averaged
large_group_agg['OCC_GRP_AVG'] = large_group_set.groupby(['Year'])['OCC_GRP'].mean()
large_group_agg['OCC_TRANS_AVG'] = large_group_set.groupby(['Year'])['OCC_TRANS'].mean()
large_group_agg['OCC_TOTAL_AVG'] = large_group_set.groupby(['Year'])['OCC_TOTAL'].mean()

#Actuals Averaged
large_group_agg['RevPAR_GRP_AVG'] = large_group_set.groupby([ 'Year'])['RevPAR_GRP'].mean()
large_group_agg['RevPAR_TRANS_AVG'] = large_group_set.groupby([ 'Year'])['RevPAR_TRANS'].mean()
large_group_agg['RevPAR_TOTAL_AVG'] = large_group_set.groupby([ 'Year'])['RevPAR_TOTAL'].mean()
large_group_agg = large_group_agg.reset_index()

large_group_agg['Region'] = 'Large Group Meetings'

################################################################################################
# Concatenate All Frames together for a final frmae for all analysis

column_order = [
                'Region',
                'Year',
                'ADR_GRP_ANN',
                'ADR_GRP_ANN_WT',
                'ADR_TRANS_ANN',
                'ADR_TRANS_ANN_WT',
                'ADR_TOTAL_ANN',
                'ADR_TOTAL_ANN_WT',
                'OCC_GRP_ANN',
                'OCC_GRP_ANN_WT',
                'OCC_TRANS_ANN',
                'OCC_TRANS_ANN_WT',
                'OCC_TOTAL_ANN',
                'OCC_TOTAL_ANN_WT',
                'RevPAR_GRP_ANN',
                'RevPAR_GRP_ANN_WT',
                'RevPAR_TRANS_ANN',
                'RevPAR_TRANS_ANN_WT',
                'RevPAR_TOTAL_ANN',
                'RevPAR_TOTAL_ANN_WT',
                'ADR_GRP_AVG',
                'ADR_TRANS_AVG',
                'ADR_TOTAL_AVG',
                'OCC_GRP_AVG',
                'OCC_TRANS_AVG',
                'OCC_TOTAL_AVG',
                'RevPAR_GRP_AVG',
                'RevPAR_TRANS_AVG',
                'RevPAR_TOTAL_AVG'
                ]

frames = [agg_set,comp_agg,small_group_agg,large_group_agg]
final_agg = pd.concat(frames)
final_agg = final_agg[column_order]
final_agg.to_csv('excel_visualizations.csv', index = False)