12-752 Course Project

Xiaowen Gu (xiaoweng), Kenan Zhang (kenanz)


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
%matplotlib inline

1. Load Data

1.1 Energy Data of Gates


In [3]:
gatesDateConverter = lambda d : dt.datetime.strptime(d,'%m/%d/%Y %H:%M')
gates_elect = np.genfromtxt('pointData_gates.csv',delimiter=",",names=True,
            dtype=[dt.datetime,'f8'],converters={0: gatesDateConverter})

In [4]:
print gates_elect['Time'][0],gates_elect['Time'][-1]
print gates_elect['Time'][1]-gates_elect['Time'][0]
print np.max(np.diff(gates_elect['Time'])),np.min(np.diff(gates_elect['Time']))


2014-12-10 00:02:00 2015-11-10 23:55:00
0:15:00
1 day, 6:47:00 -1 day, 23:15:00

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(gates_elect['Time'],gates_elect['Value'],'r')
plt.xlabel('Time Stamp')
plt.ylabel('Electricity Consumption')
plt.title('GHC Electricity Consumption')


Out[ ]:
<matplotlib.text.Text at 0x1093cd790>

As is shown in the figure, the load data is cumulated energy consumption from Dec 2014 to Dec 2015 with some time missing. The regular time interval is 15min.

1.2 Occupancy Data of Gates


In [ ]:
occDateConverter = lambda d : dt.datetime.strptime(d,'%d-%b-%Y %H:%M:%S')
occ_all = np.genfromtxt('occ_clean.csv',delimiter=",",
                        dtype=[('timestamp', type(dt.datetime.now)),('id','f8'),('occ', 'f8')],
                        converters={0: occDateConverter}, skip_header=1)

In [ ]:
print "First sample is Office %d with occupancy %d on %s"%(occ_all['id'][0],occ_all['occ'][0],occ_all['timestamp'][0])
print occ_all['timestamp'][1]-occ_all['timestamp'][0]

In [ ]:
sample_office=occ_all[np.where(occ_all['id']==1)]

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(sample_office['timestamp'],sample_office['occ'],'r')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('Occupancy of Office 1')
plt.ylim(0,1.5)

As is shown in the figure, generally the occupancy data is from Sep 2014 to Dec 2015 with some time missing. And the time interval is around 20min but not regular.

1.3 Energy Data of Other Building


In [ ]:
bakerDateConverter = lambda d : dt.datetime.strptime(d,'%m/%d/%Y %H:%M')
baker_elect = np.genfromtxt('baker_energy.csv',delimiter=",",names=True,
            dtype=[dt.datetime,'f8'],converters={0: bakerDateConverter})

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(baker_elect['Time'],gates_elect['BakerHall'],'r')
plt.xlabel('Time Stamp')
plt.ylabel('Electricity Consumption')
plt.title('Baker Hall Electricity Consumption')

1.4 Temperature Data


In [ ]:
temperatureDateConverter = lambda d : dt.datetime.strptime(d,'%Y-%m-%d %H:%M:%S')
temperature = np.genfromtxt('temperature.csv',delimiter=",",
                            dtype=[('timestamp', type(dt.datetime.now)),('tempF', 'f8')],
                            converters={0: temperatureDateConverter}, skip_header=1)

In [ ]:
plt.plot(temperature['timestamp'])

In [ ]:
print "The minimum difference between any two consecutive timestamps is: " + str(np.min(np.diff(temperature['timestamp'])))
print "The maximum difference between any two consecutive timestamps is: " + str(np.max(np.diff(temperature['timestamp'])))

As there is no gap, change time interval to 15 minutes.


In [ ]:
new_temperature = temperature[0:-1:3]

In [ ]:
print "First timestamp is on \t{}. \nLast timestamp is on \t{}.".format(temperature['timestamp'][0], temperature['timestamp'][-1])

2. Clean Data

2.1 Energy Data

Calculate the electricity consumption for each interval


In [ ]:
gates_elect_value = []
for i in range (1, len(gates_elect['Value'])):
    gates_elect_value.append(gates_elect['Value'][i]-gates_elect['Value'][i-1])

In [ ]:
print(len(gates_elect_value))
print(len(gates_elect['Value']))

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(gates_elect['Time'][1:],gates_elect_value)
plt.xlabel('Time')
plt.ylabel('Electricity [kWh]')
plt.ylim(-500,3000)

In [ ]:
gates_elect_value = np.array(gates_elect_value)
value = np.where((gates_elect_value>10)*(gates_elect_value<200))

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(gates_elect['Time'][1:][value],gates_elect_value[value],'-b')
plt.xlabel('Time')
plt.ylabel('Electricity [kWh]')
plt.ylim(80,200)

In [ ]:
print "Power data from {0} to {1}.\nTemperature data from {2} to {3}".format(gates_elect['Time'][1:][value][0], gates_elect['Time'][1:][value][-1],new_temperature['timestamp'][2:][0], new_temperature['timestamp'][2:][-1])

In [ ]:
new_temperature = new_temperature[0:-24]

In [ ]:
newElectValues = interp(gates_elect['Time'][1:][value], gates_elect_value[value], new_temperature['timestamp'][2:])

In [ ]:
toposix = lambda d: (d - dt.datetime(1970,1,1,0,0,0)).total_seconds()

timestamp_in_seconds = map(toposix,new_temperature['timestamp'])
timestamps = new_temperature['timestamp'][2:]
temp_values = new_temperature['tempF'][2:]
elect_values = newElectValues
print(timestamps[0],timestamps[-1])

In [ ]:
len(temp_values)==len(elect_values)

In [ ]:
weekday = map(lambda t: t.weekday(), timestamps)
weekday = np.array(weekday)
weekends = np.where(weekday>=5) ## Note that depending on how you do this, the result could be a tuple of ndarrays.
weekdays = np.where(weekday<5)

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(timestamps[weekdays[0]],elect_values[weekdays[0]],'--b')
plt.figure(figsize=(15,5))
plt.plot(timestamps[weekdays[0]], temp_values[weekdays[0]], '--r')

2.2 Occupancy Data

Find the latest starting time and the earliest ending time

In [ ]:
def startEndOffice(id_office,occ):
    n=len(id_office)
    startEndList=np.empty([n,2],dtype=type(dt.datetime.now))
    for i in range(n):
        office=occ[np.where(occ['id']==id_office[i])]
        startEndList[i,:]=np.array([office['timestamp'][0],office['timestamp'][-1]])  
    return startEndList

Test startEndOffice function


In [ ]:
sample_start_end_office=startEndOffice([1],sample_office)
print min(sample_start_end_office[:,0])
print max(sample_start_end_office[:,1])

Find and print the latest starting time and the earliest ending time of all offices


In [ ]:
id_office=np.unique(occ_all['id'])

In [ ]:
start_end_office=startEndOffice(id_office,occ_all)
print max(start_end_office[:,0]),min(start_end_office[:,1])

In [ ]:
date_1=dt.datetime(2014,10,28)
date_2=dt.datetime(2015,12,10)
print date_1.weekday(),date_2.weekday()

The latest starting date is 2014-10-28 Tue; the earliest ending date is 2015-12-10 Thu

Initiate the Time Series of Building Occupancy

In [ ]:
def generateTimeSeries(start,end,step):
    #generat a time series with input start, end and step
    #skip weekends
    ts = []
    t=start
    while t <= end:
        if t.weekday() < 5:
            ts.append(t)
        t += step
    return ts

Test generateTimeSeries function


In [ ]:
start=dt.datetime(2015,12,3)
end=dt.datetime(2015,12,5,23,45)
step = dt.timedelta(minutes=15)
print start,end,step
ts_test=generateTimeSeries(start,end,step)
print ts_test[0],ts_test[-1]

Generate an occupancy time series with starting time 2014-11-03 00:00:00 Mon, ending time 2015-12-04 23:45:00 Fri and time step 15min without weekends.


In [ ]:
start=dt.datetime(2014,11,3,0,0)
end=dt.datetime(2015,12,4,23,45)
step = dt.timedelta(minutes=15)
ts_occ=generateTimeSeries(start,end,step)
print ts_occ[0],ts_occ[-1]
print np.shape(ts_occ)

3. Prepare Data

3.1 Harmonize Time Series


In [ ]:
def interp(tP, P, tT):
    # This function assumes that the input is an numpy.ndarray of datetime objects

    # Most useful interpolation tools don't work well with datetime objects
    # so we convert all datetime objects into the number of seconds elapsed
    # since 1/1/1970 at midnight (also called the UNIX Epoch, or POSIX time):
    
    toposix = lambda d: (d - dt.datetime(1970,1,1,0,0,0)).total_seconds()
    tP = map(toposix, tP)
    tT = map(toposix, tT)
    
    # Now we interpolate
    from scipy.interpolate import interp1d
    f = interp1d(tP, P,'linear')
    
    return f(tT)

3.2 Calculate Occupancy Level

Test occupancy interpolation


In [ ]:
sample_interp=interp(sample_office['timestamp'],sample_office['occ'],ts_occ)
print np.shape(sample_interp)

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(ts_occ,sample_interp,'r')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('Interpolated Occupancy of Office 1')
plt.ylim(0,1.5)

In [ ]:
def interpOcc(id_office,occ_all,ts_occ):
    occ=np.zeros(len(ts_occ))
    for i in range(len(id_office)):
        office=occ_all[np.where(occ_all['id']==id_office[i])]
        occ=np.add(occ,interp(office['timestamp'],office['occ'],ts_occ))
    occ_interp=np.ndarray(shape=(len(ts_occ)),dtype=[('timestamp',dt.datetime),('occ',float)])
    occ_interp['timestamp']=ts_occ
    occ_interp['occ']=np.divide(occ,len(id_office))
    return occ_interp

Test interpOcc function


In [ ]:
sample_office_2=occ_all[(occ_all['id']==1)|(occ_all['id']==2)]
print np.shape(sample_office_2)

In [ ]:
sample_interp_2=interpOcc([1,2],sample_office_2,ts_occ)

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(ts_occ,sample_interp_2['occ'],'r')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('Interpolated Occupancy of Office 1 & 2')
plt.ylim(0,1.5)

Interpolate occupancy data and calculate occupancy level


In [ ]:
occ_interp=interpOcc(id_office,occ_all,ts_occ)

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(ts_occ,occ_interp['occ'],'b')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('Interpolated GHC Occupancy Level')
plt.ylim(0,1)

As is shown in the figure, there is missing data in Aug and Sep. Besides, the occupancy level was relatively low during the Jan and late Jun due to the winter and summer break. Therefore, we further zoom into spring and fall semester to determine the time period used in our study.

3.3 Crop Study Period

According the academic calendar of CMU, Fall 2014 ended by Dec 12 while Spring 2015 started from Jan 12. Therefore, we firstly plot the occupancy from Nov 3 to Dec 12.


In [ ]:
t_1=dt.datetime(2014,12,13)
occ_1=occ_interp[np.where(occ_interp['timestamp']<t_1)]
print np.shape(occ_1)

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(occ_1['timestamp'],occ_1['occ'],'b')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('GHC Occupancy Level Fall 2014 (Nov 3 to Dec 12)')
plt.ylim(0,1)

Similarly, we plot spring and summer semester of 2015, specifically from Jan 12 to May 15 and from May 18 to Aug 7. Fall 2015 started from Aug 31.


In [ ]:
t_2=dt.datetime(2015,1,12)
t_3=dt.datetime(2015,5,16)
t_4=dt.datetime(2015,5,18)
t_5=dt.datetime(2015,8,8)
t_6=dt.datetime(2015,8,31)
occ_2=occ_interp[np.where((occ_interp['timestamp']>=t_2)&(occ_interp['timestamp']<t_3))]
occ_3=occ_interp[np.where((occ_interp['timestamp']>=t_4)&(occ_interp['timestamp']<t_5))]
occ_4=occ_interp[np.where(occ_interp['timestamp']>=t_6)]
print np.shape(occ_2),np.shape(occ_3),np.shape(occ_4)

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(occ_2['timestamp'],occ_2['occ'],'b')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('GHC Occupancy Level Spring 2015 (Jan 12 to May 15)')
plt.ylim(0,1)

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(occ_3['timestamp'],occ_3['occ'],'b')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('GHC Occupancy Level Summer 2015 (May 18 to Aug 7)')
plt.ylim(0,1)

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(occ_4['timestamp'],occ_4['occ'],'b')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('GHC Occupancy Level Fall 2015 (Aug 31 to Dec 4)')
plt.ylim(0,1)

Based on figures above, we choose Nov 3 to Dec 12 2014 (6w), Jan 12 to Apr 3 (12w), Apr 20 to May 15 (4w), Jun 22 to Jul 17 (4w) and Sep 28 to Dec 4 (10w), 36 weeks in total, to build our models.

Although we choose discrete time periods, for each of them the number of weeks is even thus we could split the traning set and test set week by week.

Define start and end arrays

In [ ]:
start=np.array([dt.datetime(2014,11,3),dt.datetime(2015,1,12),dt.datetime(2015,4,20),dt.datetime(2015,6,22),dt.datetime(2015,9,28)])
end=np.array([dt.datetime(2014,12,13),dt.datetime(2015,4,4),dt.datetime(2015,5,16),dt.datetime(2015,7,17),dt.datetime(2015,12,5)])
print start
print end
Crop data

In [ ]:
def cropData(data,ts,start,end):
    n=np.shape(start)[0]
    crop=np.array([],dtype=int)
    for i in range(n):
        r=np.where((data['timestamp']>=start[i])&(data['timestamp']<end[i]))
        crop=np.append(crop,r[0])
    return data[crop]

Test cropData function


In [ ]:
sample_crop=cropData(sample_interp_2,ts_occ,start,end)
print np.shape(sample_crop)
print sample_crop['timestamp'][0],sample_crop['timestamp'][-1]

Crop the occupancy data


In [ ]:
occ_crop=cropData(occ_interp,ts_occ,start,end)

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(occ_crop['timestamp'],occ_crop['occ'],'b')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('Cropped GHC Occupancy Level')
plt.ylim(0,1)

3.4 Harmonize data for temperature, electricity & occupancy


In [ ]:
new_temp = temp_values[weekdays[0]][0:22080]
new_elect = elect_values[weekdays[0]][0:22080]
new_timestamp = timestamps[weekdays[0]][0:22080]
temp = []
temp.extend(new_temp[2206:7966])
temp.extend(new_temp[8926:10846])
temp.extend(new_temp[13246:14686])
temp.extend(new_temp[19966:21886])

elect = []
elect.extend(new_elect[2206:7966])
elect.extend(new_elect[8926:10846])
elect.extend(new_elect[13246:14686])
elect.extend(new_elect[19966:21886])

tstamp = []
tstamp.extend(new_timestamp[2206:7966])
tstamp.extend(new_timestamp[8926:10846])
tstamp.extend(new_timestamp[13246:14686])
tstamp.extend(new_timestamp[19966:21886])

4. Building Energy Model without Occupancy

4.1 Design Matrix with/without Occupancy


In [ ]:
def Tc(temperature, T_bound):
    # The return value will be a matrix with as many rows as the temperature
    # array, and as many columns as len(T_bound) [assuming that 0 is the first boundary]
    
    Tc_matrix = np.zeros((len(temperature), len(T_bound)))
    for i in range (len(temperature)):
        temp = temperature[i]
        if temp > T_bound[0]:
            Tc_matrix[i,0] = T_bound[0]
        else:
            Tc_matrix[i,0] = temp
        for j in range (1, len(T_bound)):
            if temp > T_bound[j]:
                Tc_matrix[i,j] = T_bound[j] - T_bound[j-1]
            elif temp > T_bound[j-1]:
                Tc_matrix[i,j] = temp - T_bound[j-1]               
    return Tc_matrix

In [ ]:
def DesignMatrix(temperature, timestamps_num, temp_bounds):
    #timestamps_num is the number of data points for a week
    #The total number of data points will be cleaned before calling this 
    #function to ensure len(temperature) is divisible by timestamps_num.
    weeks_num = len(temperature)//timestamps_num
    I = np.identity(timestamps_num)
    tiled_I = np.tile(I, (weeks_num,1))
    T = Tc(temperature, temp_bounds)        
    X = np.concatenate((tiled_I, T), axis = 1)
    return np.matrix(X)

def DesignMatrix2(temperature, timestamps_num, temp_bounds, occupancy):
    weeks_num = len(temperature)//timestamps_num
    I = np.identity(timestamps_num)
    tiled_I = np.tile(I, (weeks_num,1))
    T = Tc(temperature, temp_bounds)        
    X = np.concatenate((tiled_I, T), axis = 1)
    new_X = np.concatenate((X, occupancy), axis = 1)
    return np.matrix(new_X)

4.2 Coefficient Estimation


In [ ]:
def beta_hat(X, elect_values):
    X_T = np.transpose(X)
    b = np.linalg.inv(X_T*X) * X_T * elect_values
    return b

4.3 Define Training and Test Data


In [ ]:
new_temp = temp_values[weekdays[0]][0:22080]
new_elect = elect_values[weekdays[0]][0:22080]
new_timestamp = timestamps[weekdays[0]][0:22080]

train_temp = []
test_temp = []
for i in range(23):
    train_temp.extend(new_temp[960*i:960*i+480])
    test_temp.extend(new_temp[960*i+480:960*i+960])

train_elect = []
test_elect = []
for i in range(23):
    train_elect.extend(new_elect[960*i:960*i+480])
    test_elect.extend(new_elect[960*i+480:960*i+960])
    
train_timestamps = []
test_timestamps = []
for i in range(23):
    train_timestamps.extend(new_timestamp[960*i:960*i+480])
    test_timestamps.extend(new_timestamp[960*i+480:960*i+960])
print(train_timestamps[0],test_timestamps[-1])

In [ ]:
train_T_bound = (40,50,60,70,80,90)
test_T_bound = (40,50,60,70,80,90)

In [ ]:
train_X = DesignMatrix(train_temp,480, train_T_bound)

In [ ]:
train_elect_trans = np.transpose(np.matrix(train_elect))
betahat = beta_hat(train_X, train_elect_trans)

In [ ]:
test_X = DesignMatrix(test_temp,480, test_T_bound)

In [ ]:
predict_y = test_X * betahat

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(predict_y[1000:2000], 'r-', label = "Predicted")
plt.plot(test_elect[1000:2000], 'b-', label = "Actual")
plt.ylim(100,200)
plt.xlabel("Time",size=15)
plt.ylabel("Electricity [kWh]",size=15)
plt.legend(loc="upper left")
plt.title('Original Model')
T-test & R-square

In [ ]:
X = test_X
Y = np.matrix(test_elect).T
y_bar = np.matrix([[np.mean(Y)]*len(Y)]).T

In [ ]:
R_squared = 1-((Y-X*betahat).T*(Y-X*betahat))/((Y-y_bar).T*(Y-y_bar))
print R_squared[0,0]

In [ ]:
from scipy.stats import t

In [ ]:
n = 11040
p = 486
t.isf((1-0.95)/2,n-p)

In [ ]:
MSE = (Y-X*betahat).T*(Y-X*betahat)/(n-p)
S_betahat_sqr = np.multiply(MSE, np.linalg.inv(X.T*X))
S_betahatk_sqr = S_betahat_sqr.diagonal()
S_betahatk = np.sqrt(S_betahatk_sqr.T)

In [ ]:
T = []
for i in range(486):
    T.append(np.array(betahat[i,:])[0][0]/np.array(S_betahatk[i,:])[0][0])
def rej_coef(T):
    rej_list = []
    for i in range(len(T)):
        if T[i] > t.isf((1-0.95)/2,n-p) or T[i] < -t.isf((1-0.95)/2,n-p):
            rej_list.append(T[i])
    return rej_list

In [ ]:
len(rej_coef(T))

The model has 486 coefficients therefore H0 hypothesis of all vairables is rejected

5. Occupancy Pattern Model

5.1 Define State of Occupancy Level

Define occupany state based on the pattern of real occupancy level

To define the occupancy state, we firstly plot the occupancy level of a sample week and a sample


In [ ]:
def weekdayArray(data):
    weekday = map(lambda t: t.weekday(), data['timestamp'])
    return np.asarray(weekday)

In [ ]:
def hourArray(data):
    hour = map(lambda t: t.hour, data['timestamp'])
    return np.asarray(hour)

In [ ]:
def minuteArray(data):
    minute = map(lambda t: t.minute, data['timestamp'])
    return np.asarray(minute)

Sample week occupancy data


In [ ]:
week_start=np.where((weekdayArray(occ_crop)==0)&(hourArray(occ_crop)==0)&(minuteArray(occ_crop)==0))[0]
print np.shape(week_start)
sample_week=occ_crop[week_start[0]:week_start[1]]
print np.shape(sample_week)

In [ ]:
day_start=np.where((hourArray(sample_week)==0)&(minuteArray(sample_week)==0))[0]
print np.shape(day_start)
sample_day=occ_crop[day_start[0]:day_start[1]]
print np.shape(sample_day)

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(sample_week['timestamp'],sample_week['occ'],'b')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('GHC Occupancy Level of Sample Week')
plt.ylim(0,1)

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(sample_day['timestamp'],sample_day['occ'],'b')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('GHC Occupancy Level of Sample Day')
plt.ylim(0,1)

As is shown in the figures, there is little difference among weekdays, and the range of daily occupancy is between 0.1 to 0.8. Therefore, we define the occupancy states as [0.1,0.3,0.6,0.8].


In [ ]:
state=np.array([0.1,0.3,0.6,0.8])
print state

We also plot the sample day occupancy level and the occupancy state to validate our assumption


In [ ]:
def plotOccState(occ,state):
    n=np.shape(occ)[0]
    plt.figure(figsize=(15,5))
    plt.plot(occ['timestamp'],occ['occ'],'b')
    for i in state:
        s=i*np.ones(n)
        plt.plot(occ['timestamp'],s,'r')
    plt.xlabel('Time Stamp')
    plt.ylabel('Occupancy')
    plt.title('Occupancy Level vs Occupancy State')
    plt.ylim(0,1)

In [ ]:
plotOccState(sample_day,state)
Build occupancy state Matrix

Calculate the difference between real occupancy and occupancy state


In [ ]:
occ_data=np.matrix(occ_crop['occ']).T
n_data=len(occ_data)
n_state=len(state)
print np.shape(occ_data)
diff=occ_data*np.ones((1,n_state))-np.ones((n_data,1))*state
diff=np.abs(diff)
print diff.shape
print diff[1:10]

Assign real occupancy level to a single state and build occupancy state matrix


In [ ]:
occ_mtx=np.zeros((n_data,n_state))
ind=np.argmin(diff,1)
for i in range(occ_data.shape[0]):
    occ_mtx[i,ind[i]]=1
print occ_mtx.shape

In [ ]:
occ_state=occ_mtx*state
occ_state=np.sum(occ_state,1)
print np.shape(occ_state)
sample_week_state=occ_state[:len(sample_week)]
plt.figure(figsize=(15,5))
plt.plot(sample_week['timestamp'],sample_week['occ'],'b',label='occupancy')
plt.plot(sample_week['timestamp'],sample_week_state,'r',label='occupancy state')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('GHC Occupancy of Sample Week')
plt.ylim(0,1)
plt.legend()
Reshape the occupancy matrix (# time_indicator, # state, # week)

Check if there is missing data in certain week


In [ ]:
for i in range(1,len(week_start)):
    if (week_start[i]-week_start[i-1])!=5*len(sample_day):
        print i-1,week_start[i]-week_start[i-1]

The 25th week only has 384 sample, thus we need to discard this week.


In [ ]:
crop=range(week_start[25],week_start[26])
print len(crop)
occ_mtx=np.delete(occ_mtx,crop,0)
occ_crop_new=np.delete(occ_crop,crop,0)
print np.shape(occ_mtx)
print np.shape(occ_crop_new)

Now reshape the occupancy matrix


In [ ]:
n_day=(len(week_start)-1)*5
print n_day
occ_mtx=np.reshape(occ_mtx,(n_day,len(sample_day),n_state))
print occ_mtx.shape

Test the occupancy matrix is reshaped correctly


In [ ]:
test_mtx=occ_mtx[0,:,:]
test_mtx=test_mtx*state
test_mtx=np.sum(test_mtx,1)
plt.figure(figsize=(15,5))
plt.plot(sample_day['timestamp'],sample_day['occ'],'b',label='occupancy')
plt.plot(sample_day['timestamp'],test_mtx,'r',label='occupancy state')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('GHC Occupancy of Sample Week')
plt.ylim(0,1)
plt.legend()

5.2 Define Transition Matrices

Initiate transition matrices (# state, # state, # time_indicator)

In [ ]:
n_t=len(sample_day)
print n_t
tran_mtx=np.zeros((n_state*n_t,n_state))
tran_mtx=np.reshape(tran_mtx,(n_state,n_state,n_t))
print tran_mtx.shape
Calculate parameters of each transition matrix

In [ ]:
def T(occ_pre,occ_pri):
    n_state=occ_pre.shape[1]
    tran_mtx=np.zeros((n_state,n_state))
    for i in range(n_state):
        for j in range(n_state):
            ind_pri=np.where(occ_pri[:,i]==1)
            if len(ind_pri[0])==0:
                tran_mtx[i,j]=0
            else: 
                pri_pre=occ_pre[ind_pri[0]]
                ind_pre=np.where(pri_pre[:,j]==1)
                prob=len(ind_pre[0])/float(len(ind_pri[0]))
                tran_mtx[i,j]=prob
    return tran_mtx

Test T function


In [ ]:
occ_pre=np.reshape(occ_mtx[:,1,:],(n_day,n_state))
occ_pri=np.reshape(occ_mtx[:,0,:],(n_day,n_state))
#print occ_pre
#print occ_pri
print T(occ_pre,occ_pri)

Calculate transition matrices of each time interval


In [ ]:
for i in range(n_t-1):
    occ_pre=np.reshape(occ_mtx[:,i+1,:],(n_day,n_state))
    occ_pri=np.reshape(occ_mtx[:,i,:],(n_day,n_state)) 
    tran_mtx[:,:,i]=T(occ_pre,occ_pri)

In [ ]:
print tran_mtx.shape
print tran_mtx[:,:,0]

Print one transition matrix


In [ ]:
print "Transition matrix of %d:%d"%(sample_day['timestamp'][40].hour,sample_day['timestamp'][40].minute)
print tran_mtx[:,:,40]

Plot transition probability from 0.1 to 0.3 and from 0.8 to 0.5


In [ ]:
prob_1=tran_mtx[0,1,:]
prob_2=tran_mtx[3,2,:]
print np.shape(prob_1)

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(sample_day['timestamp'],prob_1,'b',label='prob 0.1 to 0.3')
plt.plot(sample_day['timestamp'],prob_2,'r',label='prob 0.8 to 0.6')
plt.xlabel('Time Stamp')
plt.ylabel('Probability')
plt.title('Transition Probability')
plt.ylim(0,1)
plt.legend()

As is showen in the figure, the occupancy is very likely to increase around 9am, and to decrease around 12pm, 5pm and 9pm, which correctly reponds to the daily schedule

5.3 Blended Transition Matrices

Mix of transition matrices

In [ ]:
def T_mix(tran_mtx):
    n_state=tran_mtx.shape[0]
    n_t=tran_mtx.shape[2]    
    tran_mix=np.zeros((n_state,n_state,n_t))
    for i in range(n_t):  
        mtx=np.zeros((n_state,n_state))
        for j in range(n_t):
            b=beta(i,j,n_t)
            mtx=mtx+b*tran_mtx[:,:,j]
        tran_mix[:,:,i]=mtx
    return tran_mix

In [ ]:
test_mix=T_mix(tran_mtx[:,:,:2])
print np.shape(test_mix)
print test_mix[:,:,0]
Weight of each transition matrix

In [ ]:
def beta(c1,c2,K):
    a=np.zeros(K)
    for i in range(K):
        a[i]=alpha(c1-i)
    return a[c2]/sum(a)
Value of each time slot

In [ ]:
def alpha(x):
    a=10.0
    d=3.0
    theta1=theta(2*a/d*(x+d/2))
    theta2=theta(2*a/d*(x-d/2))
    return theta1-theta2
Sigmoid function

In [ ]:
def theta(x):
    return 1/(1+np.exp(-x))

Calculate blended transition matrices


In [ ]:
tran_blend=T_mix(tran_mtx)
print np.shape(tran_blend)

Plot increase and decrease transition probabilities again


In [ ]:
prob_1=tran_blend[0,1,:]
prob_2=tran_blend[1,2,:]
prob_3=tran_blend[2,3,:]
prob_4=tran_blend[1,0,:]
prob_5=tran_blend[2,1,:]
prob_6=tran_blend[3,2,:]
print np.shape(prob_1)

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(sample_day['timestamp'],prob_1,'b',label='prob 0.1 to 0.3')
plt.plot(sample_day['timestamp'],prob_2,'r',label='prob 0.3 to 0.6')
plt.plot(sample_day['timestamp'],prob_3,'y',label='prob 0.6 to 0.8')
plt.plot(sample_day['timestamp'],prob_4,'g',label='prob 0.3 to 0.1')
plt.plot(sample_day['timestamp'],prob_4,'c',label='prob 0.6 to 0.3')
plt.plot(sample_day['timestamp'],prob_4,'m',label='prob 0.8 to 0.6')
plt.xlabel('Time Stamp')
plt.ylabel('Probability')
plt.title('Blended Transition Probability')
plt.ylim(0,1)
plt.legend()

6. Occupancy Pattern Simulation

6.1 Simulate Occupancy


In [ ]:
def occSim(n_state,n_day,tran_mix):    
    #init occ_sim=[# time indicator, # state, # week]
    n_t=tran_mix.shape[2]
    occ_sim=np.zeros((n_day,n_t,n_state))
    #init occ state
    for i in range(n_day):  
        occ_pri=0
        occ_sim[i,0,occ_pri]=1
        for j in range(1,n_t):
            occ_pre=markovSim(tran_mix[occ_pri,:,j],n_state)
            occ_sim[i,j,occ_pre]=1
            occ_pri=occ_pre
    return occ_sim

In [ ]:
def markovSim(pdf,n):
    cdf=np.zeros(n)
    cdf[0]=pdf[0]
    for i in range(1,n):
        cdf[i]=cdf[i-1]+pdf[i]
    r=np.random.random()
    for j in range(n):
        if r<cdf[j]:
            break
    return j

Test markovSim function


In [ ]:
pdf=tran_blend[:,:,30][0,:]
print pdf
print markovSim(pdf,4)

Test occSim function


In [ ]:
test_sim=occSim(4,1,tran_blend)
print np.shape(test_sim)

In [ ]:
test_sim=np.sum(test_sim[0,:,:]*state,1)
plt.figure(figsize=(15,5))
plt.plot(sample_day['timestamp'],sample_day['occ'],'b',label='occupancy')
plt.plot(sample_day['timestamp'],test_sim,'r',label='simulation')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('Test Occupancy Simulation')
plt.ylim(0,1)
plt.legend()

Simulate occupancy


In [ ]:
N=100
occ_0=0 #first state
occ_sim=np.zeros((n_t*n_day,n_state))
for i in range(N):
    sim=occSim(n_state,n_day,tran_blend)    
    sim=np.reshape(sim,(n_t*n_day,n_state))
    occ_sim=occ_sim+sim

Pring the first day occupancy simulation result


In [ ]:
print occ_sim[:96]

Calculate the simulated occupancy level


In [ ]:
occ_sim=occ_sim*1.0/N
occ_sim=np.sum(occ_sim*state,1)

6.2 Compare Simulated Occupancy with Ground Truth


In [ ]:
sample_week_sim=occ_sim[:len(sample_week)]
plt.figure(figsize=(15,5))
plt.plot(sample_week['timestamp'],sample_week['occ'],'b',label='occupancy')
plt.plot(sample_week['timestamp'],sample_week_sim,'r',label='simulation')
plt.xlabel('Time Stamp')
plt.ylabel('Occupancy')
plt.title('Simulated Occupancy of Sample Week')
plt.ylim(0,1)
plt.legend()

As is shown in the figure, the simulated occupancy perferctly matches the real occupancy. Therefore, we replace the real occupancy with the simulation result in the energy prediction model in the next section. To do this, we need to crop the occupancy to match the energy data.

6.3 Crop Simulated Occupancy

The energy data used in this study has 4 time period: Jan 12 to Apr 3, Apr 20 to May 15, Jun 22 to Jul 17, and Sep 28 to Dec 4. The time series of occupancy basically overlp with them, while we still need to cut off some data. Simply, we only need to shorten the occupancy time series.


In [ ]:
print np.where(occ_crop_new['timestamp']==dt.datetime(2015,1,12))
print np.where(occ_crop_new['timestamp']==dt.datetime(2015,10,26))

In [ ]:
occ_sim_crop=np.ndarray(shape=(13920-2880),
    dtype=[('timestamp',dt.datetime),('occ',float)])
occ_sim_crop['timestamp']=occ_crop_new['timestamp'][2880:13920]
occ_sim_crop['occ']=occ_sim[2880:13920]

In [ ]:
print np.shape(occ_sim_crop)

In [ ]:
plt.plot(occ_sim_crop['timestamp'])

Also, remember we have cropped a week data.


In [ ]:
crop_week=occ_crop[week_start[25]:week_start[26]]
print np.shape(crop_week)
print 'The week cropped is from ',crop_week['timestamp'][0],' to ',crop_week['timestamp'][-1]

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(occ_crop_new['occ'][2880:13920][1000:2000],'b',label='occupancy')
plt.plot(occ_sim_crop['occ'][1000:2000],'r',label='simulation')
plt.xlabel('Day')
plt.ylabel('Occupancy')
plt.title('Comparison of Real and Simulated Occupancy')
plt.ylim(0,1)
plt.legend()

7. Energy Prediction Model with Occupancy

7.1 Harmonize Energy and Occupancy Data


In [ ]:
new_temp2 = temp[0:10560]
new_elect2 = elect[0:10560] 
new_occup = occ_sim_crop['occ'][0:10560]
new_tstamp = tstamp[0:10560]

train_temp2 = []
test_temp2 = []
for i in range(11):
    train_temp2.extend(new_temp2[960*i:960*i+480])
    test_temp2.extend(new_temp2[960*i+480:960*i+960])

train_elect2 = []
test_elect2 = []
for i in range(11):
    train_elect2.extend(new_elect2[960*i:960*i+480])
    test_elect2.extend(new_elect2[960*i+480:960*i+960])

train_occup = []
test_occup = []
for i in range(11):
    train_occup.extend(new_occup[960*i:960*i+480])
    test_occup.extend(new_occup[960*i+480:960*i+960])

train_tstamp2 = []
test_tstamp2 = []
for i in range(11):
    train_tstamp2.extend(new_tstamp[960*i:960*i+480])
    test_tstamp2.extend(new_tstamp[960*i+480:960*i+960])

train_occup2 = (np.matrix(train_occup)).T
test_occup2 = (np.matrix(test_occup)).T

7.2 Fit Building Energy Model with Real Occupancy


In [ ]:
occup2 = occ_crop_new['occ'][2880:13920]
new_occup2 = occup2[0:10560]

train_occup2 = []
test_occup2 = []
for i in range(11):
    train_occup2.extend(new_occup2[960*i:960*i+480])
    test_occup2.extend(new_occup2[960*i+480:960*i+960])

train_occup3 = (np.matrix(train_occup2)).T
test_occup3 = (np.matrix(test_occup2)).T

train_X3 = DesignMatrix2(train_temp2, 480, train_T_bound, train_occup3)
train_elect_trans2 = (np.matrix(train_elect2)).T
betahat3 = beta_hat(train_X3, train_elect_trans2)
test_X3 = DesignMatrix2(test_temp2, 480, test_T_bound, test_occup3)
predict_y3 = test_X3 * betahat3

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(predict_y3[1000:2000], 'r-', label = "Predicted")
plt.plot(test_elect2[1000:2000], 'b-', label = "Actual")
plt.ylim(100,200)
plt.xlabel("Time",size=15)
plt.ylabel("Electricity [kWh]",size=15)
plt.legend(loc="upper left")

In [ ]:
X3 = test_X3
Y3 = np.matrix(test_elect2).T
y_bar3 = np.matrix([[np.mean(Y3)]*len(Y3)]).T
R_squared3 = 1-((Y3-X3*betahat3).T*(Y3-X3*betahat3))/((Y3-y_bar3).T*(Y3-y_bar3))
R_squared3[0,0]

In [ ]:
MSE3 = (Y3-X3*betahat3).T*(Y3-X3*betahat3)/(n2-p2)
S_betahat_sqr3 = np.multiply(MSE3, linalg.inv(X3.T*X3))
S_betahatk_sqr3 = S_betahat_sqr3.diagonal()
S_betahatk3 = np.sqrt(S_betahatk_sqr3.T)
left3 = np.array(betahat3 - t.isf((1-0.95)/2, n2-p2)*S_betahatk3)
right3 = np.array(betahat3 + t.isf((1-0.95)/2, n2-p2)*S_betahatk3)
CI3 = []
for i in range(487):
    CI3.append((left3[i][0], right3[i][0]))
CI3

In [ ]:
T3 = []
for i in range(487):
    T3.append(np.array(betahat3[i,:])[0][0]/np.array(S_betahatk3[i,:])[0][0])

def rej_coef3(T):
    rej_list = []
    for i in range(len(T)):
        if T[i] > t.isf((1-0.95)/2,n2-p2) or T[i] < -t.isf((1-0.95)/2,n2-p2):
            rej_list.append(T[i])
        else:
            print(T[i],i)
    return rej_list

In [ ]:
rej_coef3(T3)

7.3 Fit Building Energy Model with Simulated Occupancy


In [ ]:
train_X2 = DesignMatrix2(train_temp2, 480, train_T_bound, train_occup2)

In [ ]:
train_elect_trans2 = (np.matrix(train_elect2)).T
betahat2 = beta_hat(train_X2, train_elect_trans2)

In [ ]:
test_X2 = DesignMatrix2(test_temp2, 480, test_T_bound, test_occup2)

In [ ]:
predict_y2 = test_X2 * betahat2

In [ ]:
plt.figure(figsize=(15,5))
plt.plot(predict_y2[1000:2000], 'r-', label = "Predicted")
plt.plot(test_elect2[1000:2000], 'b-', label = "Actual")
plt.ylim(100,200)
plt.xlabel("Time",size=15)
plt.ylabel("Electricity [kWh]",size=15)
plt.legend(loc="upper left")
T-test and R-square

In [ ]:
X2 = test_X2
Y2 = np.matrix(test_elect2).T
y_bar2 = np.matrix([[np.mean(Y2)]*len(Y2)]).T
R_squared2 = 1-((Y2-X2*betahat2).T*(Y2-X2*betahat2))/((Y2-y_bar2).T*(Y2-y_bar2))
R_squared2[0,0]

In [ ]:
n2 = 5280
p2 = 487
t.isf((1-0.95)/2,n2-p2)

In [ ]:
MSE2 = (Y2-X2*betahat2).T*(Y2-X2*betahat2)/(n2-p2)
S_betahat_sqr2 = np.multiply(MSE2, np.linalg.inv(X2.T*X2))
S_betahatk_sqr2 = S_betahat_sqr2.diagonal()
S_betahatk2 = np.sqrt(S_betahatk_sqr2.T)

In [ ]:
T2 = []
for i in range(487):
    T2.append(np.array(betahat2[i,:])[0][0]/np.array(S_betahatk2[i,:])[0][0])

In [ ]:
def rej_coef2(T):
    rej_list = []
    for i in range(len(T)):
        if T[i] > t.isf((1-0.95)/2,n2-p2) or T[i] < -t.isf((1-0.95)/2,n2-p2):
            rej_list.append(T[i])
        else:
            print(T[i],i)
    return rej_list

In [ ]:
len(rej_coef2(T2))

Unfortunately, occupancy variale is rejected

7.5 Relationship between Occupancy and Energy Consumption


In [ ]:
fig, ax1 = plt.subplots(figsize=(15,5))

ax2 = ax1.twinx()
ax2.plot(new_tstamp[1000:2000],new_occup[1000:2000], 'r-',label = 'Occupany')
ax1.plot(new_tstamp[1000:2000],new_elect2[1000:2000], 'b-',label = 'Electricity')

ax1.set_xlabel('Time')
ax1.set_ylabel('Electricity [kWh]')
ax2.set_ylabel('Occupancy')
ax1.legend()
ax2.legend(loc="upper left")
plt.title('Building Eletricity Consumption vs Occupancy')

plt.show()

As is shown in the figure, occupancy and energy use are correlated.


In [ ]: