In [155]:
from __future__ import division

import os.path
import pandas as pd
import numpy as np
import csv
import glob

import matplotlib.pyplot as plt
import seaborn as sns

import datetime
import uuid

from numpy import *
%matplotlib inline

import warnings
# squelch an anaconda "bug" and some python verbosity
# this can move to system wide python if needed
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [156]:
header = ['SPID','SA','UOM','DIR','DATE','RS','NAICS','APCT',
 '0:00',
 '0:15',
 '0:30',
 '0:45',
 '1:00',
 '1:15',
 '1:30',
 '1:45',
 '2:00',
 '2:15',
 '2:30',
 '2:45',
 '3:00',
 '3:15',
 '3:30',
 '3:45',
 '4:00',
 '4:15',
 '4:30',
 '4:45',
 '5:00',
 '5:15',
 '5:30',
 '5:45',
 '6:00',
 '6:15',
 '6:30',
 '6:45',
 '7:00',
 '7:15',
 '7:30',
 '7:45',
 '8:00',
 '8:15',
 '8:30',
 '8:45',
 '9:00',
 '9:15',
 '9:30',
 '9:45',
 '10:00',
 '10:15',
 '10:30',
 '10:45',
 '11:00',
 '11:15',
 '11:30',
 '11:45',
 '12:00',
 '12:15',
 '12:30',
 '12:45',
 '13:00',
 '13:15',
 '13:30',
 '13:45',
 '14:00',
 '14:15',
 '14:30',
 '14:45',
 '15:00',
 '15:15',
 '15:30',
 '15:45',
 '16:00',
 '16:15',
 '16:30',
 '16:45',
 '17:00',
 '17:15',
 '17:30',
 '17:45',
 '18:00',
 '18:15',
 '18:30',
 '18:45',
 '19:00',
 '19:15',
 '19:30',
 '19:45',
 '20:00',
 '20:15',
 '20:30',
 '20:45',
 '21:00',
 '21:15',
 '21:30',
 '21:45',
 '22:00',
 '22:15',
 '22:30',
 '22:45',
 '23:00',
 '23:15',
 '23:30',
 '23:45',
 'ZIPCODE',
 'SUBLAP']

In [20]:
customers = pd.read_csv('../../2016/PGE/D1977_OFFICE_LBNL.csv',delimiter=',')
# customers.drop(customers.columns[[0]], inplace=True, axis=1)

In [122]:
def str2float(x):
    try:
        return("{0:.3f}".format(float(x)))
    except ValueError:
        return 0

Start the analyze the dataset


In [123]:
def CleanMeterData(df, SAID, weather_file, holidays, DREventDays):
    df2 = df[df['SA'] == SAID].sort_index(axis=0)
    df3 = pd.DataFrame(df2[df2.columns[8:104]].stack(),columns=['energykWh'])
    df3.index.names = ['Date','Time']

    dateindex = []
    for datenum in range(len(df3)):
        dateindex.append(df3.index[datenum][0].strftime('%Y-%m-%d')+' '+df3.index[datenum][1])

    df3.index = pd.to_datetime(dateindex, format='%Y-%m-%d %H:%M')
    
    df3['power'] = df3['energykWh'].apply(lambda x: str2float(x))
    df3[['power']] = df3[['power']].astype(float)
    df3.power = df3.power * 4 # convert kWh to kW

    df3['day'] = list(map(lambda x: x.strftime('%Y/%m/%d'),df3.index))
    df3['time'] = list(map(lambda x: x.strftime('%H:%M'),df3.index))
    df3['month'] = list(map(lambda x: x.strftime('%m'),df3.index))
    df3['hour'] = list(map(lambda x: x.strftime('%H'),df3.index))
    df3['month'] = df3.month.astype(int)
    df3['hour'] = df3.hour.astype(int)
    df3['weekday'] = df3.index.weekday
    df3['holiday'] = list(map(lambda x: x.strftime('%Y-%m-%d') in holidays.day.values,df3.index))
    df3['holiday'] = df3.holiday.astype(int)
    df3['DREventDay'] = list(map(lambda x: x.strftime('%Y-%m-%d') in DREventDays.day.values,df3.index))
    df3['DREventDay'] = df3.DREventDay.astype(int)
    
    weather = read_weather(weather_file)
    data = pd.concat([df3,weather],axis=1,join_axes=[df3.index])
    
    data.fillna(method='pad')

    # calculate basepower on weekdays
    data.loc[:,'basepower'] = 0
    data.loc[:,'peakoat'] = 0

    # calculate the daily peak OAT
    PeakOAT = data.groupby(['day'])['oat'].max()

    for day in PeakOAT.index:
        data.loc[(data['day'] == day),'peakoat'] = PeakOAT[day]

    # subset the weekday and weekend power
    df_wd = data.loc[(data['weekday'] >= 0) & (data['weekday'] <= 4) & (data['holiday'] == 0)]
    df_wk = data.loc[(data.weekday.apply(lambda x: x in [5,6])) | (data['holiday'] == 1)]

    # calculate the basepower on weekdays and weekend
    df_wd_base = df_wd.loc[(df_wd['peakoat'] < 45)]
    df_wd_basedaily = df_wd_base.pivot(index='time',columns='day',values='power')

    for tim in df_wd_basedaily.index:
        data.loc[(data['time'] == tim) & (data['weekday'] >= 0) & \
           (data['weekday'] <= 4) & (data['holiday'] == 0),'basepower'] = df_wd_basedaily.mean(axis=1)[tim]
        data.loc[(data['time'] == tim) & (data.weekday.apply(lambda x: x in [5,6])),'basepower'] = df_wd_basedaily.mean(axis=1)[tim]
        data.loc[(data['time'] == tim) & (data['holiday'] == 1),'basepower'] = df_wd_basedaily.mean(axis=1)[tim]

    data.loc[:,'hvac'] = data.power - data.basepower
    
    return data

In [188]:
# consider the holidays and previous DR Event days in 2015
holidays = pd.read_csv('/Users/ryin/Dropbox/LBNL/PGE/model_input/holiday.csv', usecols=[0])
holidays['date'] = pd.to_datetime(pd.Series(holidays['date']), format='%m/%d/%y')
holidays['day'] = holidays.date.apply(lambda x: x.strftime('%Y-%m-%d'))

DREventDays = pd.read_csv('/Users/ryin/Dropbox/LBNL/PGE/model_input/DREventDays.csv', usecols=[0])
DREventDays['date'] = pd.to_datetime(pd.Series(DREventDays['date']), format='%m/%d/%y')
DREventDays['day'] = DREventDays.date.apply(lambda x: x.strftime('%Y-%m-%d'))

In [143]:
def zc2cz(zipcode):
    ZipCodeClimateZone = pd.read_csv('/Users/ryin/Dropbox/LBNL/PGE/model_input/BuildingClimateZonesByZIPCode.csv')
    ClimateZone = ZipCodeClimateZone.loc[ZipCodeClimateZone['ZipCode'] == zc,'ClimateZone'].values
    return "{0:0=2d}".format(int(ClimateZone))

In [144]:
def read_weather(weather_file):
    weather = pd.read_csv(weather_file,header=None)
    weather.columns = ['Date','oat']
    weather.index = pd.to_datetime(weather.Date, format='%m/%d/%Y %H:%M')
    weather = weather.drop(['Date'],axis=1)
    weather = weather.groupby(weather.index).first() # remove duplicated Index in DataFrame
    weather = weather.asfreq('15min', method='pad') # fill missing timeseries data in DataFrame
    return weather

In [209]:
def calculate_utility_TOU(df, tariff, kWShed):
    utility = []
    utility = pd.read_csv('/Users/ryin/Dropbox/LBNL/PGE/utilityrates/'+'PGE_'+tariff+'.csv', usecols=[1])

    # meter charge
    meter_charge = float("{0:.2f}".format(utility['Secondary'][0]))
    # demand charge
    summer_onpeak_demand = utility['Secondary'][1]
    summer_midpeak_demand = utility['Secondary'][2]
    summer_monthly_demand = utility['Secondary'][3]
    winter_midpeak_demand = utility['Secondary'][4]
    winter_monthly_demand = utility['Secondary'][5]

    # energy charge
    summer_onpeak_energy = utility['Secondary'][6]
    summer_midpeak_energy = utility['Secondary'][7]
    summer_offpeak_energy = utility['Secondary'][8]
    winter_midpeak_energy = utility['Secondary'][9]
    winter_offpeak_energy = utility['Secondary'][10]

    # PDP charge and credits
    cpp_charge = utility['Secondary'][11]
    credits_onpeak_demand = utility['Secondary'][12]
    credits_midpeak_demand = utility['Secondary'][13]

    # add energy charge as new column in dataframe
    try:
        data = df['2015']
        data['energycharge'] = 0

        # weekdays in summer
        data.loc[(data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                 (data['weekday'] < 5) & \
                 (data['time'] >= '00:00') & (data['time'] < '08:30'),'energycharge'] = summer_offpeak_energy

        data.loc[(data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                 (data['weekday'] < 5) & \
                 (data['time'] >= '08:30') & (data['time'] < '12:00'),'energycharge'] = summer_midpeak_energy

        data.loc[(data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                 (data['weekday'] < 5) & \
                 (data.time >= '12:00') & (data.time < '18:00'),'energycharge'] = summer_onpeak_energy

        data.loc[(data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                 (data['weekday'] < 5) & \
                 (data['time'] >= '18:00') & (data['time'] < '23:00'),'energycharge'] = summer_midpeak_energy

        data.loc[(data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                 (data['weekday'] < 5) & \
                 (data['time'] >= '23:00') & (data['time'] <= '23:45'),'energycharge'] = summer_offpeak_energy

        # weekdays in winter
        data.loc[(data.month.apply(lambda x: (x in [1,2,3,4,11,12]))) & \
                 (data.weekday < 5) & \
                 (data.time >= '00:00') & (data.time < '08:30'),'energycharge'] = winter_offpeak_energy

        data.loc[(data.month.apply(lambda x: (x in [1,2,3,4,11,12]))) & \
                 (data.weekday < 5) & \
                 (data.time >= '08:30') & (data.time < '21:30'),'energycharge'] = winter_midpeak_energy

        data.loc[(data.month.apply(lambda x: (x in [1,2,3,4,11,12]))) & \
                 (data.weekday < 5) & \
                 (data.time >= '21:30') & (data.time <= '23:45'),'energycharge'] = winter_offpeak_energy    

        # weekend and holidays in summer and winter
        data.loc[((data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                  (data.weekday >= 5)),'energycharge'] = summer_offpeak_energy

        data.loc[((data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                  (data.holiday == 1)),'energycharge'] = summer_offpeak_energy

        data.loc[((data.month.apply(lambda x: (x in [1,2,3,4,11,12]))) & \
                  (data.weekday >= 5)),'energycharge'] = winter_offpeak_energy

        data.loc[((data.month.apply(lambda x: (x in [1,2,3,4,11,12]))) & \
                  (data.holiday == 1)),'energycharge'] = winter_offpeak_energy

        # Calculate Capacity Reservation Level (CRL), 50% CRL
        CRL = np.mean(data.loc[(data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                           (data.weekday < 5) & \
                           (data.time >= '12:00') & (data.time < '18:00'),'power'].values) * 0.5

        # Calculate energy cost by multipling demand with energy charge 
        data['energycost'] = data.power * data.energycharge / 4

        monthly_cost = []
        monthly_cost = pd.DataFrame(columns = ('onpeak_demand', \
                                               'midpeak_demand',\
                                               'monthly_demand', \
                                               'monthly_demandcharge', \
                                               'monthly_onpeak_demandcharge', \
                                               'monthly_midpeak_demandcharge', \
                                               'monthly_energycharge', \
                                               'monthly_charge', \
                                               'monthly_cpp_credits',\
                                               'monthly_cpp_charge',\
                                               'monthly_CRL_charge',\
                                               'monthly_total_cost'))
        for i in range(1,13,1):
            # Monthly Demand Charges
            if i in [1,2,3,4,11,12]:
                midpeak_demand = max(data.loc[(data.month == i) & (data.weekday < 5) & \
                                (data.time >= '08:30') & (data.time < '21:30'),'power'].values)
                monthly_demand = max(data.loc[(data.month == i),'power'].values)
                onpeak_demand = 0
                
                monthly_energycharge = data.loc[(data.month == i),'energycost'].sum()
                monthly_demandcharge = monthly_demand * winter_monthly_demand  
                monthly_onpeak_demandcharge = 0
                monthly_midpeak_demandcharge = midpeak_demand * winter_midpeak_demand 
                monthly_charge = monthly_energycharge + monthly_demandcharge + monthly_onpeak_demandcharge + monthly_midpeak_demandcharge
            elif i in [5,6,7,8,9,10]:
                onpeak_demand = max(data.loc[(data.month == i) & (data.weekday < 5) & \
                                (data.time >= '12:00') & (data.time < '18:00'),'power'].values)
                onpeak_energycost = sum(data.loc[(data.month == i) & (data.weekday < 5) & \
                                    (data.time >= '12:00') & (data.time < '18:00'),'power'].values)                    
                midpeak_demand1 = max(data.loc[(data.month == i) & (data.weekday < 5) & \
                                    (data.time >= '18:00') & (data.time < '21:30'),'power'].values)
                midpeak_demand2 = max(data.loc[(data.month == i) & (data.weekday < 5) & \
                                    (data.time >= '08:30') & (data.time < '12:00'),'power'].values)   
                midpeak_demand = max(midpeak_demand1,midpeak_demand2)                                         
                monthly_demand = max(data.loc[(data.month == i),'power'].values)
                
                monthly_energycharge = data.loc[(data.month == i),'energycost'].sum()
                monthly_demandcharge = monthly_demand * summer_monthly_demand
                monthly_onpeak_demandcharge = onpeak_demand * summer_onpeak_demand  
                monthly_midpeak_demandcharge = midpeak_demand * summer_midpeak_demand 
                monthly_charge = monthly_energycharge + monthly_demandcharge + monthly_onpeak_demandcharge + monthly_midpeak_demandcharge
            else:
                pass
            # PDP Credits
            monthly_cpp_credits = 0
            monthly_cpp_charge = 0
            monthly_CRL_charge = 0

            # monthly cost
            monthly_total_cost = monthly_charge + monthly_cpp_credits + monthly_CRL_charge

            # Summary of Energy and Demand Charges
            monthly_cost.loc[i] = [onpeak_demand,\
                                   midpeak_demand,\
                                   monthly_demand, \
                                   monthly_demandcharge, \
                                   monthly_onpeak_demandcharge, \
                                   monthly_midpeak_demandcharge, \
                                   monthly_energycharge, \
                                   monthly_charge, \
                                   monthly_cpp_credits,\
                                   monthly_cpp_charge,\
                                   monthly_CRL_charge,\
                                   monthly_total_cost]
        annualcost = monthly_cost.monthly_total_cost.sum()
    except ValueError:
        print('MeterData is Not Available')
        annualcost = 0
    monthly_cost.to_csv('/Users/ryin/Dropbox/LBNL/PGE/utilitycost_PGE_'+tariff+'.csv')

    return annualcost

In [210]:
def calculate_utility_PDP(df, tariff, kWShed):
    utility = []
    utility = pd.read_csv('/Users/ryin/Dropbox/LBNL/PGE/utilityrates/'+'PGE_'+tariff+'.csv', usecols=[1])

    # meter charge
    meter_charge = float("{0:.2f}".format(utility['Secondary'][0]))
    # demand charge
    summer_onpeak_demand = utility['Secondary'][1]
    summer_midpeak_demand = utility['Secondary'][2]
    summer_monthly_demand = utility['Secondary'][3]
    winter_midpeak_demand = utility['Secondary'][4]
    winter_monthly_demand = utility['Secondary'][5]

    # energy charge
    summer_onpeak_energy = utility['Secondary'][6]
    summer_midpeak_energy = utility['Secondary'][7]
    summer_offpeak_energy = utility['Secondary'][8]
    winter_midpeak_energy = utility['Secondary'][9]
    winter_offpeak_energy = utility['Secondary'][10]

    # PDP charge and credits
    cpp_charge = utility['Secondary'][11]
    credits_onpeak_demand = utility['Secondary'][12]
    credits_midpeak_demand = utility['Secondary'][13]
    
    
    # add energy charge as new column in dataframe
    try:
        data = df['2015']
        data['energycharge'] = 0

        # weekdays in summer
        data.loc[(data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                 (data.weekday < 5) & \
                 (data.time >= '00:00') & (data.time < '08:30'),'energycharge'] = summer_offpeak_energy

        data.loc[(data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                 (data.weekday < 5) & \
                 (data.time >= '08:30') & (data.time < '12:00'),'energycharge'] = summer_midpeak_energy

        data.loc[(data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                 (data.weekday < 5) & \
                 (data.time >= '12:00') & (data.time < '18:00'),'energycharge'] = summer_onpeak_energy

        data.loc[(data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                 (data.weekday < 5) & \
                 (data.time >= '18:00') & (data.time < '23:00'),'energycharge'] = summer_midpeak_energy

        data.loc[(data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                 (data.weekday < 5) & \
                 (data.time >= '23:00') & (data.time <= '23:45'),'energycharge'] = summer_offpeak_energy
        # add the PDP charge during the DR Event Hours
        data.loc[(data.DREventDay == 1) & \
                 (data.time >= '14:00') & (data.time < '18:00'),'energycharge'] = summer_onpeak_energy + cpp_charge
        # weekdays in winter
        data.loc[(data.month.apply(lambda x: (x in [1,2,3,4,11,12]))) & \
                 (data.weekday < 5) & \
                 (data.time >= '00:00') & (data.time < '08:30'),'energycharge'] = winter_offpeak_energy

        data.loc[(data.month.apply(lambda x: (x in [1,2,3,4,11,12]))) & \
                 (data.weekday < 5) & \
                 (data.time >= '08:30') & (data.time < '21:30'),'energycharge'] = winter_midpeak_energy

        data.loc[(data.month.apply(lambda x: (x in [1,2,3,4,11,12]))) & \
                 (data.weekday < 5) & \
                 (data.time >= '21:30') & (data.time <= '23:45'),'energycharge'] = winter_offpeak_energy    

        # weekend and holidays in summer and winter
        data.loc[((data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                  (data.weekday >= 5)),'energycharge'] = summer_offpeak_energy

        data.loc[((data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                  (data.holiday == 1)),'energycharge'] = summer_offpeak_energy

        data.loc[((data.month.apply(lambda x: (x in [1,2,3,4,11,12]))) & \
                  (data.weekday >= 5)),'energycharge'] = winter_offpeak_energy

        data.loc[((data.month.apply(lambda x: (x in [1,2,3,4,11,12]))) & \
                  (data.holiday == 1)),'energycharge'] = winter_offpeak_energy

        # Calculate Capacity Reservation Level (CRL), 50% CRL
        CRL = np.mean(data.loc[(data.month.apply(lambda x: (x in [5,6,7,8,9,10]))) & \
                           (data.weekday < 5) & \
                           (data.time >= '12:00') & (data.time < '18:00'),'power'].values) * 0.5
        data.loc[(data.DREventDay == 1) & \
                 (data.time >= '14:00') & (data.time < '18:00'),'power'] = data.loc[(data.DREventDay == 1) & \
                 (data.time >= '14:00') & (data.time < '18:00'),'power'].values - kWShed

        # Calculate energy cost by multipling demand with energy charge 
        data['energycost'] = data.power * data.energycharge / 4

        monthly_cost = []
        monthly_cost = pd.DataFrame(columns = ('onpeak_demand', \
                                               'midpeak_demand',\
                                               'monthly_demand', \
                                               'monthly_demandcharge', \
                                               'monthly_onpeak_demandcharge', \
                                               'monthly_midpeak_demandcharge', \
                                               'monthly_energycharge', \
                                               'monthly_charge', \
                                               'monthly_cpp_credits',\
                                               'monthly_cpp_charge',\
                                               'monthly_CRL_charge',\
                                               'monthly_total_cost'))
        for i in range(1,13,1):
            # Monthly Demand Charges
            if i in [1,2,3,4,11,12]:
                midpeak_demand = max(data.loc[(data.month == i) & (data.weekday < 5) & \
                                (data.time >= '08:30') & (data.time < '21:30'),'power'].values)

                monthly_demand = max(data.loc[data['month'] == i,'power'].values)
                onpeak_demand = 0
                
                monthly_energycharge = sum(data.loc[data['month'] == i,'energycost'].values)
                monthly_demandcharge = monthly_demand * winter_monthly_demand  
                monthly_onpeak_demandcharge = 0
                monthly_midpeak_demandcharge = midpeak_demand * winter_midpeak_demand 
                monthly_charge = monthly_energycharge + monthly_demandcharge + monthly_onpeak_demandcharge + monthly_midpeak_demandcharge
                monthly_cpp_credits = 0
                monthly_cpp_charge = 0
                monthly_CRL_charge = 0
            elif i in [5,6,7,8,9,10]:
                onpeak_demand = max(data.loc[(data.month == i) & (data.weekday < 5) & \
                                (data.time >= '12:00') & (data.time < '18:00'),'power'].values)
                onpeak_energycost = sum(data.loc[(data.month == i) & (data.weekday < 5) & \
                                    (data.time >= '12:00') & (data.time < '18:00'),'power'].values)                    
                midpeak_demand1 = max(data.loc[(data.month == i) & (data.weekday < 5) & \
                                    (data.time >= '18:00') & (data.time < '21:30'),'power'].values)
                midpeak_demand2 = max(data.loc[(data.month == i) & (data.weekday < 5) & \
                                    (data.time >= '08:30') & (data.time < '12:00'),'power'].values)   
                midpeak_demand = max(midpeak_demand1,midpeak_demand2)                                         
                monthly_demand = max(data.loc[(data.month == i),'power'].values)
                
                monthly_energycharge = sum(data.loc[data['month'] == i,'energycost'].values)
                monthly_demandcharge = monthly_demand * summer_monthly_demand
                monthly_onpeak_demandcharge = onpeak_demand * (summer_onpeak_demand + credits_onpeak_demand)  
                monthly_midpeak_demandcharge = midpeak_demand * (summer_midpeak_demand + credits_midpeak_demand)
                monthly_charge = monthly_energycharge + monthly_demandcharge + monthly_onpeak_demandcharge + monthly_midpeak_demandcharge

            # PDP Credits
                monthly_cpp_credits = onpeak_demand * credits_onpeak_demand + midpeak_demand * credits_midpeak_demand
                monthly_cpp_charge = sum(data.loc[(data.month == i) & (data.DREventDay == 1) & \
                                        (data.time >= '14:00') & (data.time < '18:00'),'energycost'].values)
                monthly_CRL_charge = 0
            else:
                pass
            # monthly cost
            monthly_total_cost = monthly_charge + monthly_cpp_credits + monthly_CRL_charge

            # Summary of Energy and Demand Charges
            monthly_cost.loc[i] = [onpeak_demand, midpeak_demand, monthly_demand, monthly_demandcharge, 
                                   monthly_onpeak_demandcharge, monthly_midpeak_demandcharge, 
                                   monthly_energycharge, monthly_charge, monthly_cpp_credits,
                                   monthly_cpp_charge, monthly_CRL_charge, monthly_total_cost]
        annualcost = monthly_cost.monthly_total_cost.sum()
    except ValueError:
        print('MeterData is Not Available')
        annualcost = 0
    monthly_cost.to_csv('/Users/ryin/Dropbox/LBNL/PGE/utilitycost_PGE_'+tariff+'.csv')
#     data.to_csv('/Users/ryin/Dropbox/LBNL/PGE/utilitycost_PGE_Meter'+tariff+'.csv')

    return annualcost

2. Generate all the basic metric from meter data and weather data


In [205]:
def calculateCustMetrics(df, customers, SAID, cz):
    
    df_wd = df.loc[(df['weekday'] >= 0) & (df['weekday'] <= 4) & (df['holiday'] == 0)]
    df_cssb = df_wd.loc[(df_wd['month'] >= 5) & (df_wd['month'] <= 10),['power','hour','time','oat']]
    CSSB = df_cssb.groupby(['time'])['power'].mean()

    hotDays = df_wd.groupby(['day'])['oat'].max().sort_values(ascending=False)[0:10]
    hotDaysPower = df.loc[saidData['day'].isin(hotDays.index),['power', 'oat','time','day']]
    hotDaysHVAC = df.loc[saidData['day'].isin(hotDays.index),['hvac', 'oat','time','day']]

    custHVAC = hotDaysHVAC.pivot(index='time',columns='day',values='hvac')
    custHVAC.columns = ['hotDayHVAC0','hotDayHVAC1','hotDayHVAC2','hotDayHVAC3','hotDayHVAC4',
                        'hotDayHVAC5','hotDayHVAC6','hotDayHVAC7','hotDayHVAC8','hotDayHVAC9']
    custHVAC['hotDayHVAC'] = hotDaysHVAC.groupby(['time'])['hvac'].mean()

    custMetrics = hotDaysPower.pivot(index='time',columns='day',values='power')
    custMetrics.columns = ['hotDay0','hotDay1','hotDay2','hotDay3','hotDay4',
                           'hotDay5','hotDay6','hotDay7','hotDay8','hotDay9']
    custMetrics['cssb'] = df_cssb.groupby(['time'])['power'].mean()
    custMetrics['LoadVariability_kW'] = df_cssb.groupby(['time'])['power'].std()
    custMetrics['LoadVariability_Pct'] = custMetrics['LoadVariability_kW'] / custMetrics['cssb'] 
    custMetrics['HotDaysLoadVariability_kW'] = hotDaysPower.groupby(['time'])['power'].std()
    custMetrics['HotDaysLoadVariability_Pct'] = custMetrics['HotDaysLoadVariability_kW'] / hotDaysPower.groupby(['time'])['power'].mean() 
    custMetrics['hotDay'] = hotDaysPower.groupby(['time'])['power'].mean()
    custMetrics['peakDay'] = df.loc[df['day'] == df.power.idxmax().strftime('%Y/%m/%d'),'power'].values

    DR_Equations = pd.read_csv('/Users/ryin/Dropbox/LBNL/PGE/DREquations/DREquations_CZ'+str(cz)+'.csv')
    DR_Equations.index = DR_Equations.DR_Controls

    custOAT = hotDaysPower.pivot(index='time',columns='day',values='oat')
    custOAT.columns = ['hotDayOAT0','hotDayOAT1','hotDayOAT2','hotDayOAT3','hotDayOAT4','hotDayOAT5','hotDayOAT6',
                       'hotDayOAT7','hotDayOAT8','hotDayOAT9']
    custOAT['cssb_oat'] = df_cssb.groupby(['time'])['oat'].mean()
    custOAT['hotDay_oat'] = hotDaysPower.groupby(['time'])['oat'].mean()
    custOAT['peakDay_oat'] = df.loc[df['day'] == df.power.idxmax().strftime('%Y/%m/%d'),'oat'].values
    custOAT['hour'] = df.loc[df['day'] == df.power.idxmax().strftime('%Y/%m/%d'),'hour'].values
    custOAT['NoPrecool_Reset2F_alpha'] = np.nan
    custOAT['NoPrecool_Reset2F_beta'] = np.nan
    custOAT['NoPrecool_Reset4F_alpha'] = np.nan
    custOAT['NoPrecool_Reset4F_beta'] = np.nan
    custOAT['NoPrecool_Reset6F_alpha'] = np.nan
    custOAT['NoPrecool_Reset6F_beta'] = np.nan
    custOAT['Precool2F_Reset2F_alpha'] = np.nan
    custOAT['Precool2F_Reset2F_beta'] = np.nan
    custOAT['Precool2F_Reset4F_alpha'] = np.nan
    custOAT['Precool2F_Reset4F_beta'] = np.nan
    custOAT['Precool2F_Reset6F_alpha'] = np.nan
    custOAT['Precool2F_Reset6F_beta'] = np.nan

    for hour in range(14,18):
        custOAT.loc[custOAT['hour'] == hour, ['NoPrecool_Reset2F_alpha','NoPrecool_Reset2F_beta']] = \
            DR_Equations.loc[(DR_Equations['hour'] == hour) & (DR_Equations['precool'] == 0) & (DR_Equations['reset'] == 2), ['alpha_2','beta_2']].values
        custOAT.loc[custOAT['hour'] == hour, ['NoPrecool_Reset4F_alpha','NoPrecool_Reset4F_beta']] = \
            DR_Equations.loc[(DR_Equations['hour'] == hour) & (DR_Equations['precool'] == 0) & (DR_Equations['reset'] == 4), ['alpha_2','beta_2']].values
        custOAT.loc[custOAT['hour'] == hour, ['NoPrecool_Reset6F_alpha','NoPrecool_Reset6F_beta']] = \
            DR_Equations.loc[(DR_Equations['hour'] == hour) & (DR_Equations['precool'] == 0) & (DR_Equations['reset'] == 6), ['alpha_2','beta_2']].values

        custOAT.loc[custOAT['hour'] == hour, ['Precool2F_Reset2F_alpha','Precool2F_Reset2F_beta']] = \
            DR_Equations.loc[(DR_Equations['hour'] == hour) & (DR_Equations['precool'] == 2) & (DR_Equations['reset'] == 2), ['alpha_2','beta_2']].values
        custOAT.loc[custOAT['hour'] == hour, ['Precool2F_Reset4F_alpha','Precool2F_Reset4F_beta']] = \
            DR_Equations.loc[(DR_Equations['hour'] == hour) & (DR_Equations['precool'] == 2) & (DR_Equations['reset'] == 4), ['alpha_2','beta_2']].values
        custOAT.loc[custOAT['hour'] == hour, ['Precool2F_Reset6F_alpha','Precool2F_Reset6F_beta']] = \
            DR_Equations.loc[(DR_Equations['hour'] == hour) & (DR_Equations['precool'] == 2) & (DR_Equations['reset'] == 6), ['alpha_2','beta_2']].values

    result = pd.concat([custMetrics, custOAT, custHVAC], axis=1)

    usrInput = customers[customers['sa_id'] == SAID]
    usrOutput = pd.DataFrame()

    usrOutput['SAID'] = usrInput.sa_id
    usrOutput['UUID'] = uuid.uuid4()
    usrOutput.index = usrOutput['UUID']
    usrOutput['Peak_kW'] = df.power.max()
    usrOutput['Peak_kW_timestamp'] = df.power.idxmax()
    
    usrOutput['NAICS'] = usrInput.sa_naics_cd.values
    usrOutput['NAICS_desc'] = usrInput.naics_desc.values
    usrOutput['cty'] = usrInput.cty_nm.values
    usrOutput['ZIPCODE'] = usrInput.zip_5_dgt.values
    usrOutput['SUBLAP'] = 'SLAP_PGSN-APND'
    usrOutput['PGE_CZ'] = cz
    usrOutput['CEC_CZ'] = cz
    usrOutput['AMP'] = usrInput.AMP.values
    usrOutput['PDP'] = usrInput.PDP.values
    usrOutput['DBP'] = usrInput.DBP.values
    usrOutput['CBP'] = usrInput.CBP.values
    
#     # Peak Day
#     usrOutput['DR_Capacity_Precool0F_Reset2F'] = np.mean(result.peakDay * (result.NoPrecool_Reset2F_beta + result.peakDay_oat * result.NoPrecool_Reset2F_alpha)/100)
#     usrOutput['DR_Capacity_Precool0F_Reset4F'] = np.mean(result.peakDay * (result.NoPrecool_Reset4F_beta + result.peakDay_oat * result.NoPrecool_Reset4F_alpha)/100)
#     usrOutput['DR_Capacity_Precool0F_Reset6F'] = np.mean(result.peakDay * (result.NoPrecool_Reset6F_beta + result.peakDay_oat * result.NoPrecool_Reset6F_alpha)/100)
#     usrOutput['DR_Capacity_Precool2F_Reset2F'] = np.mean(result.peakDay * (result.Precool2F_Reset2F_beta + result.peakDay_oat * result.Precool2F_Reset2F_alpha)/100)
#     usrOutput['DR_Capacity_Precool2F_Reset4F'] = np.mean(result.peakDay * (result.Precool2F_Reset4F_beta + result.peakDay_oat * result.Precool2F_Reset4F_alpha)/100)
#     usrOutput['DR_Capacity_Precool2F_Reset6F'] = np.mean(result.peakDay * (result.Precool2F_Reset6F_beta + result.peakDay_oat * result.Precool2F_Reset6F_alpha)/100)
    # Hot Day
    usrOutput['DR_Capacity_Precool0F_Reset2F'] = np.mean(result.hotDay * (result.NoPrecool_Reset2F_beta + result.hotDay_oat * result.NoPrecool_Reset2F_alpha)/100)
    usrOutput['DR_Capacity_Precool0F_Reset4F'] = np.mean(result.hotDay * (result.NoPrecool_Reset4F_beta + result.hotDay_oat * result.NoPrecool_Reset4F_alpha)/100)
    usrOutput['DR_Capacity_Precool0F_Reset6F'] = np.mean(result.hotDay * (result.NoPrecool_Reset6F_beta + result.hotDay_oat * result.NoPrecool_Reset6F_alpha)/100)
    usrOutput['DR_Capacity_Precool2F_Reset2F'] = np.mean(result.hotDay * (result.Precool2F_Reset2F_beta + result.hotDay_oat * result.Precool2F_Reset2F_alpha)/100)
    usrOutput['DR_Capacity_Precool2F_Reset4F'] = np.mean(result.hotDay * (result.Precool2F_Reset4F_beta + result.hotDay_oat * result.Precool2F_Reset4F_alpha)/100)
    usrOutput['DR_Capacity_Precool2F_Reset6F'] = np.mean(result.hotDay * (result.Precool2F_Reset6F_beta + result.hotDay_oat * result.Precool2F_Reset6F_alpha)/100)

    usrOutput['DR_Capacity_CycleOnOff_30Pct'] = np.mean(result['14:00':'17:45'].hotDayHVAC) * 0.3
    usrOutput['DR_Capacity_CycleOnOff_50Pct'] = np.mean(result['14:00':'17:45'].hotDayHVAC) * 0.5
    usrOutput['DR_Capacity_CycleOnOff_100Pct'] = np.mean(result['14:00':'17:45'].hotDayHVAC) * 1.0
    usrOutput['HVAC_Building_Ratio'] = usrOutput['DR_Capacity_CycleOnOff_100Pct'] / result.hotDay.max()
    usrOutput['CycleShed_Building_Ratio'] = usrOutput['DR_Capacity_CycleOnOff_50Pct'] / result.hotDay.max()
    usrOutput['ResetShed_Building_Ratio'] = usrOutput['DR_Capacity_Precool2F_Reset4F'] / result.hotDay.max()
    
    usrOutput['LoadVariability_kW'] = np.mean(result['14:00':'17:45'].LoadVariability_kW)
    usrOutput['LoadVariability_Pct'] = np.mean(result['14:00':'17:45'].LoadVariability_Pct)
    usrOutput['HotDaysLoadVariability_kW'] = np.mean(result['14:00':'17:45'].HotDaysLoadVariability_kW)
    usrOutput['HotDaysLoadVariability_Pct'] = np.mean(result['14:00':'17:45'].HotDaysLoadVariability_Pct)
    # Calculate the Annual Utility Bill base on 2015 Meter Data
    kWshed = usrOutput['DR_Capacity_CycleOnOff_50Pct'].values
    if df.power.max() < 75:
        usrOutput['CustSize'] = 'Small'
        usrOutput['Tariff'] = 'A-1'
        usrOutput['TOU_UtilityCost'] = calculate_utility_TOU(df,'A-1',0)
        usrOutput['PDP_UtilityCost_NoADR'] = calculate_utility_PDP(df,'A-1_PDP',0)
        usrOutput['PDP_UtilityCost_ADR'] = calculate_utility_PDP(df,'A-1_PDP',kWshed)

    elif (df.power.max() >= 75) & (df.power.max() < 200):
        usrOutput['CustSize'] = 'Small/Medium'
        usrOutput['Tariff'] = 'A-6'
        usrOutput['TOU_UtilityCost'] = calculate_utility_TOU(df,'A-6',0)
        usrOutput['PDP_UtilityCost_NoADR'] = calculate_utility_PDP(df,'A-6_PDP',0)
        usrOutput['PDP_UtilityCost_ADR'] = calculate_utility_PDP(df,'A-6_PDP',kWshed)

    elif (df.power.max() >= 200) & (df.power.max() < 500):
        usrOutput['CustSize'] = 'Medium'
        usrOutput['Tariff'] = 'A-10'
        usrOutput['TOU_UtilityCost'] = calculate_utility_TOU(df,'A-10',0)
        usrOutput['PDP_UtilityCost_NoADR'] = calculate_utility_PDP(df,'A-10_PDP',0)
        usrOutput['PDP_UtilityCost_ADR'] = calculate_utility_PDP(df,'A-10_PDP',kWshed)

    elif df.power.max() >= 500:
        usrOutput['CustSize'] = 'Large'
        usrOutput['Tariff'] = 'E-19'
        usrOutput['TOU_UtilityCost'] = calculate_utility_TOU(df,'E-19',0)
        usrOutput['PDP_UtilityCost_NoADR'] = calculate_utility_PDP(df,'E-19_PDP',0)
        usrOutput['PDP_UtilityCost_ADR'] = calculate_utility_PDP(df,'E-19_PDP',kWshed)
    
    usrOutput['PDP_TOU_Ratio'] = usrOutput['PDP_UtilityCost_NoADR'] / usrOutput['TOU_UtilityCost']
    usrOutput['TOU2PDP_Benefit'] = usrOutput['TOU_UtilityCost'] - usrOutput['PDP_UtilityCost_NoADR']
    usrOutput['TOU2PDP_ADR_Benefit'] = usrOutput['TOU_UtilityCost'] - usrOutput['PDP_UtilityCost_ADR']
    usrOutput['PDP_ADR_Benefit'] = usrOutput['PDP_UtilityCost_NoADR'] - usrOutput['PDP_UtilityCost_ADR']
    usrOutput['PDP_ADR_Benefit_Pct'] = usrOutput['PDP_ADR_Benefit'] / usrOutput['PDP_UtilityCost_NoADR']
    
    return result, usrOutput

In [203]:
ZipCodeGroup = pd.read_csv('/Users/ryin/Dropbox/LBNL/PGE/ADRGEOOfficeZips.csv')

In [208]:
for zc in ZipCodeGroup[ZipCodeGroup['City']=='Stockton']['Zip5'][0:1]:
    df = pd.read_csv('/Users/ryin/Dropbox/LBNL/PGE/stockton/'+str(zc)+'.csv',header=None)
    df.drop(df.columns[[0]], inplace=True, axis=1)
    df.columns = header

    df.DATE = pd.to_datetime(df.DATE, format='%m/%d/%Y')
    df.index = pd.to_datetime(df.DATE, format='%m/%d/%Y')
    
    cz = zc2cz(zc)  # convert ZipCode to CEC Climate Zone
    with open('/Users/ryin/Dropbox/LBNL/PGE/stockton/CustSummary_'+str(zc)+'.csv','w',newline='') as csvfile:
        writer = csv.writer(csvfile,delimiter=',')
        csvheader = ['UUID','SAID','UUID','Peak_kW','Peak_kW_timestamp','NAICS',
                     'NAICS_desc','cty','ZIPCODE','SUBLAP','PGE_CZ','CEC_CZ','AMP','PDP','DBP','CBP',
                     'DR_Capacity_Precool0F_Reset2F','DR_Capacity_Precool0F_Reset4F','DR_Capacity_Precool0F_Reset6F',
                     'DR_Capacity_Precool2F_Reset2F','DR_Capacity_Precool2F_Reset4F','DR_Capacity_Precool2F_Reset6F',
                     'DR_Capacity_CycleOnOff_30Pct','DR_Capacity_CycleOnOff_50Pct','DR_Capacity_CycleOnOff_100Pct',
                     'HVAC_Building_Ratio','CycleShed_Building_Ratio','ResetShed_Building_Ratio','LoadVariability_kW',
                     'LoadVariability_PCT','HotDaysLoadVariability_kW',
                     'HotDaysLoadVariability_PCT','CustSize','Tariff','TOU_UtilityCost','PDP_UtilityCost_NoADR',
                     'PDP_UtilityCost_ADR','PDP_TOU_Ratio','TOU2PDP_Benefit','TOU2PDP_ADR_Benefit','PDP_ADR_Benefit',
                     'PDP_ADR_Benefit_Pct']
        writer.writerow(csvheader)

    for SAID in set(df.SA):
        weatherInput = '/Users/ryin/Dropbox/LBNL/PGE/stockton/stockon/KCASTOCK9.temperatureinterpolated.csv'
        saidData = CleanMeterData(df, SAID, weatherInput, holidays, DREventDays)
        metrics = pd.DataFrame()
        custSummary = pd.DataFrame()
        metrics, custSummary = calculateCustMetrics(saidData, customers, SAID, cz)
        metrics.to_csv('/Users/ryin/Dropbox/LBNL/PGE/stockton/CustMetrics_'+str(zc)+'.csv',mode='w',sep=',',header=True)
        custSummary.to_csv('/Users/ryin/Dropbox/LBNL/PGE/stockton/CustSummary_'+str(zc)+'.csv',mode='a',sep=',',header=False)
        print('Customer ' + str(SAID)+' is Completed.')


/Users/ryin/anaconda/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
/Users/ryin/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:29: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/ryin/anaconda/lib/python3.5/site-packages/pandas/core/indexing.py:465: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
/Users/ryin/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:62: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/ryin/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:30: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/ryin/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:65: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Customer 838288097.0 is Completed.
Customer 1605680066.0 is Completed.
Customer 2041448326.0 is Completed.
Customer 398583910.0 is Completed.
Customer 1152913159.0 is Completed.
Customer 2051150602.0 is Completed.
Customer 1659186381.0 is Completed.
MeterData is Not Available
MeterData is Not Available
MeterData is Not Available
Customer 1556177005.0 is Completed.
MeterData is Not Available
MeterData is Not Available
MeterData is Not Available
Customer 1670765005.0 is Completed.
Customer 891901840.0 is Completed.
Customer 1627410353.0 is Completed.
Customer 48035508.0 is Completed.
Customer 1738264661.0 is Completed.
Customer 1502410805.0 is Completed.
Customer 729786169.0 is Completed.
Customer 323154585.0 is Completed.
Customer 1868800378.0 is Completed.
Customer 1856089211.0 is Completed.
Customer 60120025.0 is Completed.
Customer 1462405501.0 is Completed.
Customer 2117937756.0 is Completed.

In [ ]: