Load data, clean the data, combine separte data together, divide training data and test data.



In [1]:

    
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#outdoor air temperature
oa = pd.read_csv("../data/oa_temp_utc_f.csv");
oa.columns = ['time', 'oa']
oa.set_index("time", drop = True, inplace = True);

oa.index = pd.to_datetime(oa.index)
oa = oa.replace('/', np.nan)
oa['oa'] = oa['oa'].astype(float)
oa = oa.interpolate(method = "time")

#relative humidity
rh = pd.read_csv("../data/rh_utc_perct.csv");
rh.columns = ['time', 'rh']
rh.set_index("time", drop = True, inplace = True);

rh.index = pd.to_datetime(rh.index)
rh = rh.replace('/', np.nan)
rh['rh'] = rh['rh'].astype(float)
rh = rh.interpolate(method = "time")

#solar radiation
sr = pd.read_csv("../data/total_solar_utc_btuh-ft2.csv");
sr.columns = ['time', 'sr'];
sr.set_index('time', drop = True, inplace = True);

sr.index = pd.to_datetime(sr.index)
sr = sr.replace('/', np.nan)
sr['sr'] = sr['sr'].astype(float)
sr = sr.interpolate(method = "time")
#wind speed
ws = pd.read_csv("../data/wind_speed_utc_mph.csv");
ws.columns = ['time', 'ws'];
ws.set_index('time', drop = True, inplace = True);

ws.index = pd.to_datetime(ws.index)
ws = ws.replace('/', np.nan)
ws['ws'] = ws['ws'].astype(float)
ws = ws.interpolate(method = "time")
#damper position of room 107n
dp = pd.read_csv("../data/107n_damper_utc_0to7or10.csv");
dp.columns = ['time', 'dp'];
dp.set_index('time', drop = True, inplace = True); 

dp.index = pd.to_datetime(dp.index)
dp = dp.replace('/', np.nan)
dp['dp'] = dp['dp'].astype(float)
dp = dp.interpolate(method = "time")
#some data has scale 0-10, some has scale 0-7, so transfer 0-7 to 0-10
dpUpper = dp.loc['2014-06-26 00:10:00':'2015-02-02 14:40:00',:];
dpLower = dp.loc['2015-02-02 14:50:00':,:]
dpLower = dpLower.multiply(10.0/7.0, axis='columns');

frames = [dpUpper, dpLower]
dp = pd.concat(frames)

#supply air temperature of room 107n
st = pd.read_csv("../data/107n_vavtemp_utc_f.csv");
st.columns = ['time', 'st'];
st.set_index('time', drop = True, inplace = True);

st.index = pd.to_datetime(st.index)
st = st.replace('/', np.nan)
st['st'] = st['st'].astype(float)
st = st.interpolate(method = "time");

#indoor air temperature of 107n
at = pd.read_csv("../data/107n_temp_utc_f.csv");
at.columns = ['time', 'at'];
at.set_index('time', drop = True, inplace = True);

at.index = pd.to_datetime(at.index)
at = at.replace('/', np.nan)
at['at'] = at['at'].astype(float)
at = at.interpolate(method = "time");

#merge together, change original utc time to local time
allDataRaw = (oa.merge(rh,how = 'inner',left_index = True, right_index = True)
              .merge(sr,how = 'inner',left_index = True, right_index = True)
              .merge(ws,how = 'inner',left_index = True, right_index = True)
              .merge(dp,how = 'inner',left_index = True, right_index = True)
              .merge(st,how = 'inner',left_index = True, right_index = True)
              .merge(at,how = 'inner',left_index = True, right_index = True));
import pytz
eastern = pytz.timezone('US/Eastern');
origIndex = allDataRaw.index;
newTimeIndex = origIndex.tz_localize(pytz.utc).tz_convert(eastern);
allDataRaw.index = newTimeIndex;

#add month, weekday, hour of day information
allDataRaw['month'] = allDataRaw.index;
allDataRaw['weekday'] = allDataRaw.index;
allDataRaw['hour'] = allDataRaw.index;
allDataRaw['month'] = (allDataRaw['month']
                             .apply(lambda x: x.month))
allDataRaw['weekday'] = (allDataRaw['weekday']
                             .apply(lambda x: x.weekday()))
allDataRaw['hour'] = (allDataRaw['hour']
                             .apply(lambda x: x.hour));

#seperate one year for train, one year for test
train = allDataRaw.loc['2014-11-01 00:00:00':'2015-11-01 00:00:00',:];
test = allDataRaw.loc['2015-11-01 00:10:00':'2016-11-01 00:10:00'];
train









    Out[1]:






  
    
      
      oa
      rh
      sr
      ws
      dp
      st
      at
      month
      weekday
      hour
    
    
      time
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2014-11-01 00:00:00-04:00
      42.700000
      86.589980
      0.704026
      0.000000
      0.000000
      62.440970
      68.200000
      11
      5
      0
    
    
      2014-11-01 00:10:00-04:00
      42.710010
      86.600000
      1.244994
      0.000000
      0.000000
      62.341324
      68.100000
      11
      5
      0
    
    
      2014-11-01 00:20:00-04:00
      42.800000
      86.600000
      1.115994
      0.000000
      0.000000
      63.057070
      68.100000
      11
      5
      0
    
    
      2014-11-01 00:30:00-04:00
      42.900000
      86.500000
      0.900000
      0.062532
      0.000000
      64.773110
      68.100000
      11
      5
      0
    
    
      2014-11-01 00:40:00-04:00
      42.900000
      86.410010
      0.560000
      0.000000
      0.000000
      66.950250
      67.946335
      11
      5
      0
    
    
      2014-11-01 00:50:00-04:00
      42.900000
      86.500000
      0.584016
      0.000000
      0.000000
      68.192020
      67.900000
      11
      5
      0
    
    
      2014-11-01 01:00:00-04:00
      43.000000
      86.510025
      1.250954
      0.000000
      0.000000
      68.516280
      67.900000
      11
      5
      1
    
    
      2014-11-01 01:10:00-04:00
      42.989980
      86.510020
      0.489018
      1.250000
      0.000000
      68.658485
      67.900000
      11
      5
      1
    
    
      2014-11-01 01:20:00-04:00
      42.900000
      86.600000
      0.605010
      1.124740
      0.000000
      68.700000
      67.900000
      11
      5
      1
    
    
      2014-11-01 01:30:00-04:00
      42.900000
      86.500000
      1.080000
      0.125264
      0.000000
      68.758470
      67.900000
      11
      5
      1
    
    
      2014-11-01 01:40:00-04:00
      42.800000
      86.720010
      0.644996
      0.000000
      0.000000
      68.741170
      67.900000
      11
      5
      1
    
    
      2014-11-01 01:50:00-04:00
      42.900000
      86.900000
      0.550990
      0.000000
      0.000000
      68.700000
      67.900000
      11
      5
      1
    
    
      2014-11-01 02:00:00-04:00
      42.889980
      86.700000
      0.760000
      0.000000
      0.000000
      68.758446
      67.900000
      11
      5
      2
    
    
      2014-11-01 02:10:00-04:00
      42.700000
      86.410030
      0.830000
      0.000000
      0.000000
      68.741540
      67.900000
      11
      5
      2
    
    
      2014-11-01 02:20:00-04:00
      42.700000
      86.410000
      0.944005
      0.000000
      0.000000
      68.700000
      67.900000
      11
      5
      2
    
    
      2014-11-01 02:30:00-04:00
      42.700000
      86.400000
      0.650981
      0.000000
      0.000000
      68.700000
      67.900000
      11
      5
      2
    
    
      2014-11-01 02:40:00-04:00
      42.600000
      86.500000
      0.764005
      0.000000
      0.000000
      68.641610
      67.900000
      11
      5
      2
    
    
      2014-11-01 02:50:00-04:00
      42.600000
      86.500000
      0.700000
      0.000000
      0.000000
      68.600000
      67.900000
      11
      5
      2
    
    
      2014-11-01 03:00:00-04:00
      42.600000
      86.189990
      0.614994
      0.000000
      0.000000
      68.600000
      67.900000
      11
      5
      3
    
    
      2014-11-01 03:10:00-04:00
      42.600000
      86.000000
      0.640979
      0.000000
      0.000000
      68.600000
      67.900000
      11
      5
      3
    
    
      2014-11-01 03:20:00-04:00
      42.500000
      85.300000
      0.980000
      0.000000
      0.000000
      68.600000
      67.900000
      11
      5
      3
    
    
      2014-11-01 03:30:00-04:00
      42.500000
      85.289990
      0.899011
      0.000000
      0.000000
      68.600000
      67.900000
      11
      5
      3
    
    
      2014-11-01 03:40:00-04:00
      42.400000
      85.210020
      1.314989
      0.000000
      10.000000
      68.659294
      67.900000
      11
      5
      3
    
    
      2014-11-01 03:50:00-04:00
      42.300000
      85.100000
      0.610000
      0.000000
      10.000000
      68.640690
      67.900000
      11
      5
      3
    
    
      2014-11-01 04:00:00-04:00
      42.110000
      85.100000
      0.860003
      0.000000
      10.000000
      68.600000
      67.900000
      11
      5
      4
    
    
      2014-11-01 04:10:00-04:00
      42.200000
      85.110020
      1.399976
      0.563868
      10.000000
      68.541730
      67.900000
      11
      5
      4
    
    
      2014-11-01 04:20:00-04:00
      42.200000
      84.989990
      0.570008
      0.000000
      10.000000
      68.500000
      67.900000
      11
      5
      4
    
    
      2014-11-01 04:30:00-04:00
      42.100000
      84.689990
      0.790976
      0.000000
      10.000000
      68.441360
      67.900000
      11
      5
      4
    
    
      2014-11-01 04:40:00-04:00
      42.100000
      84.600000
      0.660000
      0.000000
      10.000000
      68.458694
      67.900000
      11
      5
      4
    
    
      2014-11-01 04:50:00-04:00
      42.110012
      82.689980
      0.715007
      0.000000
      10.000000
      68.440674
      67.900000
      11
      5
      4
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2015-10-31 19:10:00-04:00
      56.484997
      44.500000
      0.800000
      1.062479
      0.000000
      72.188810
      68.700000
      10
      5
      19
    
    
      2015-10-31 19:20:00-04:00
      56.385000
      44.515003
      0.800000
      0.843843
      0.000000
      72.088700
      68.700000
      10
      5
      19
    
    
      2015-10-31 19:30:00-04:00
      56.315002
      44.984997
      0.781005
      3.218771
      0.000000
      71.988945
      68.700000
      10
      5
      19
    
    
      2015-10-31 19:40:00-04:00
      56.500000
      45.000000
      1.162498
      1.062441
      0.000000
      71.888540
      68.700000
      10
      5
      19
    
    
      2015-10-31 19:50:00-04:00
      56.500000
      45.115000
      1.108995
      0.656396
      0.000000
      71.788830
      68.700000
      10
      5
      19
    
    
      2015-10-31 20:00:00-04:00
      56.784996
      45.184998
      0.968506
      4.125077
      0.000000
      71.688830
      68.600000
      10
      5
      20
    
    
      2015-10-31 20:10:00-04:00
      56.700000
      45.200000
      0.828998
      1.625041
      0.000000
      71.600000
      68.600000
      10
      5
      20
    
    
      2015-10-31 20:20:00-04:00
      56.800000
      45.100000
      0.961005
      0.000000
      0.000000
      71.588870
      68.600000
      10
      5
      20
    
    
      2015-10-31 20:30:00-04:00
      56.800000
      45.100000
      1.320000
      2.345005
      0.000000
      71.489150
      68.600000
      10
      5
      20
    
    
      2015-10-31 20:40:00-04:00
      56.800000
      45.100000
      0.750000
      3.125000
      0.428571
      71.400000
      68.600000
      10
      5
      20
    
    
      2015-10-31 20:50:00-04:00
      56.800000
      45.200000
      0.801494
      0.656385
      0.428571
      71.389150
      68.600000
      10
      5
      20
    
    
      2015-10-31 21:00:00-04:00
      56.700000
      45.315002
      1.040000
      0.281312
      0.428571
      71.289000
      68.500000
      10
      5
      21
    
    
      2015-10-31 21:10:00-04:00
      56.500000
      45.800000
      0.563551
      0.000000
      0.428571
      71.200000
      68.500000
      10
      5
      21
    
    
      2015-10-31 21:20:00-04:00
      56.300000
      46.300000
      0.955003
      0.000000
      0.000000
      71.189200
      68.500000
      10
      5
      21
    
    
      2015-10-31 21:30:00-04:00
      56.200000
      46.415005
      0.882498
      0.000000
      0.000000
      71.089480
      68.500000
      10
      5
      21
    
    
      2015-10-31 21:40:00-04:00
      56.100000
      47.584995
      0.928993
      0.000000
      0.000000
      71.000000
      68.500000
      10
      5
      21
    
    
      2015-10-31 21:50:00-04:00
      56.400000
      47.000000
      0.728039
      0.000000
      0.000000
      70.988650
      68.500000
      10
      5
      21
    
    
      2015-10-31 22:00:00-04:00
      56.084995
      48.175015
      0.982498
      0.000000
      0.000000
      70.888560
      68.492330
      10
      5
      22
    
    
      2015-10-31 22:10:00-04:00
      55.824993
      54.635014
      1.020000
      0.000000
      0.000000
      70.800000
      68.400000
      10
      5
      22
    
    
      2015-10-31 22:20:00-04:00
      54.613335
      58.473330
      0.301331
      0.000000
      0.000000
      70.777040
      68.400000
      10
      5
      22
    
    
      2015-10-31 22:30:00-04:00
      53.700000
      61.900000
      0.560000
      0.000000
      0.000000
      70.577410
      68.400000
      10
      5
      22
    
    
      2015-10-31 22:40:00-04:00
      53.059990
      66.266680
      0.902003
      0.000000
      0.000000
      70.388985
      68.400000
      10
      5
      22
    
    
      2015-10-31 22:50:00-04:00
      53.213337
      65.259995
      1.043337
      0.000000
      0.000000
      70.277630
      68.400000
      10
      5
      22
    
    
      2015-10-31 23:00:00-04:00
      52.586662
      69.153340
      1.073332
      0.000000
      0.000000
      70.088684
      68.400000
      10
      5
      23
    
    
      2015-10-31 23:10:00-04:00
      52.513336
      67.446655
      0.921330
      0.000000
      0.000000
      69.988830
      68.400000
      10
      5
      23
    
    
      2015-10-31 23:20:00-04:00
      52.100000
      72.040050
      0.568689
      0.000000
      0.000000
      69.888830
      68.392650
      10
      5
      23
    
    
      2015-10-31 23:30:00-04:00
      52.213337
      70.606650
      0.731331
      0.000000
      0.000000
      69.777630
      68.300000
      10
      5
      23
    
    
      2015-10-31 23:40:00-04:00
      51.944275
      70.700000
      0.577379
      5.717234
      0.000000
      69.588670
      68.300000
      10
      5
      23
    
    
      2015-10-31 23:50:00-04:00
      54.015003
      65.539990
      0.666001
      6.968762
      0.000000
      69.488686
      68.300000
      10
      5
      23
    
    
      2015-11-01 00:00:00-04:00
      53.200000
      65.200000
      0.840000
      0.127669
      0.000000
      69.388830
      68.300000
      11
      6
      0
    
  

52561 rows × 10 columns



In [2]:

    
#Move at (the y) to the end of the last column of the dataframe
at = train['at'];
train.drop(labels=['at'], axis=1,inplace = True)
train.insert(len(train.columns), 'at', at)

at = test['at'];
test.drop(labels=['at'], axis=1,inplace = True)
test.insert(len(test.columns), 'at', at)
test









    



/home/zhiang/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
/home/zhiang/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy






    Out[2]:






  
    
      
      oa
      rh
      sr
      ws
      dp
      st
      month
      weekday
      hour
      at
    
    
      time
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2015-11-01 00:10:00-04:00
      55.811670
      57.576660
      1.297827
      1.322937
      0.0
      69.288820
      11
      6
      0
      68.292000
    
    
      2015-11-01 00:20:00-04:00
      56.600000
      54.911670
      0.565834
      3.864502
      0.0
      69.188680
      11
      6
      0
      68.200000
    
    
      2015-11-01 00:30:00-04:00
      56.611477
      54.788520
      0.654262
      7.161926
      0.0
      69.088810
      11
      6
      0
      68.200000
    
    
      2015-11-01 00:40:00-04:00
      58.188522
      54.500000
      0.586068
      8.391342
      0.0
      69.011314
      11
      6
      0
      68.208000
    
    
      2015-11-01 00:50:00-04:00
      60.210003
      48.429990
      0.995001
      3.937762
      0.0
      69.100000
      11
      6
      0
      68.200000
    
    
      2015-11-01 01:00:00-04:00
      61.089996
      44.900000
      0.920994
      4.249959
      0.0
      69.111040
      11
      6
      1
      68.200000
    
    
      2015-11-01 01:10:00-04:00
      60.576656
      43.900000
      0.465836
      2.020896
      0.0
      69.222664
      11
      6
      1
      68.200000
    
    
      2015-11-01 01:20:00-04:00
      59.188330
      44.123337
      0.890000
      5.697937
      0.0
      69.388670
      11
      6
      1
      68.200000
    
    
      2015-11-01 01:30:00-04:00
      58.076660
      47.135010
      0.840000
      4.739687
      0.0
      69.288690
      11
      6
      1
      68.200000
    
    
      2015-11-01 01:40:00-04:00
      57.700000
      47.435013
      0.990503
      7.500000
      0.0
      69.188810
      11
      6
      1
      68.200000
    
    
      2015-11-01 01:50:00-04:00
      57.900000
      47.088330
      0.905834
      5.479141
      0.0
      69.097170
      11
      6
      1
      68.200000
    
    
      2015-11-01 01:00:00-05:00
      57.900000
      47.000000
      1.063660
      10.520546
      0.0
      69.072174
      11
      6
      1
      68.200000
    
    
      2015-11-01 01:10:00-05:00
      57.888332
      48.811670
      0.454165
      7.176813
      0.0
      69.047190
      11
      6
      1
      68.200000
    
    
      2015-11-01 01:20:00-05:00
      57.611668
      49.176662
      0.532170
      3.937438
      0.0
      69.022200
      11
      6
      1
      68.200000
    
    
      2015-11-01 01:30:00-05:00
      57.600000
      49.551690
      0.913664
      4.302073
      0.0
      68.997210
      11
      6
      1
      68.200000
    
    
      2015-11-01 01:40:00-05:00
      57.600000
      53.300000
      0.554165
      6.729125
      0.0
      68.972220
      11
      6
      1
      68.200000
    
    
      2015-11-01 01:50:00-05:00
      57.488330
      53.300000
      0.565835
      4.781188
      0.0
      68.947230
      11
      6
      1
      68.200000
    
    
      2015-11-01 02:00:00-05:00
      57.288330
      54.400000
      0.620502
      7.750124
      0.0
      68.922240
      11
      6
      2
      68.200000
    
    
      2015-11-01 02:10:00-05:00
      56.300000
      60.035010
      1.199497
      14.156187
      0.0
      68.888960
      11
      6
      2
      68.200000
    
    
      2015-11-01 02:20:00-05:00
      55.700000
      63.646680
      1.224165
      7.614438
      0.0
      68.800000
      11
      6
      2
      68.191990
    
    
      2015-11-01 02:30:00-05:00
      55.400000
      66.223335
      0.280000
      5.958255
      0.0
      68.778040
      11
      6
      2
      68.100000
    
    
      2015-11-01 02:40:00-05:00
      54.900000
      68.111670
      0.620503
      3.604125
      0.0
      68.589130
      11
      6
      2
      68.100000
    
    
      2015-11-01 02:50:00-05:00
      54.600000
      69.400000
      0.735333
      4.781219
      0.0
      68.500000
      11
      6
      2
      68.100000
    
    
      2015-11-01 03:00:00-05:00
      54.300000
      70.746670
      1.146336
      2.427073
      0.0
      68.477740
      11
      6
      3
      68.100000
    
    
      2015-11-01 03:10:00-05:00
      54.000000
      73.311676
      1.437827
      5.958250
      0.0
      68.289116
      11
      6
      3
      68.100000
    
    
      2015-11-01 03:20:00-05:00
      53.976660
      74.011670
      0.620503
      1.656191
      0.0
      68.200000
      11
      6
      3
      68.100000
    
    
      2015-11-01 03:30:00-05:00
      53.800000
      74.711670
      1.181670
      6.437375
      0.0
      68.188700
      11
      6
      3
      68.100000
    
    
      2015-11-01 03:40:00-05:00
      53.810444
      74.910446
      0.655377
      7.994441
      0.0
      68.088960
      11
      6
      3
      68.100000
    
    
      2015-11-01 03:50:00-05:00
      53.700000
      75.822960
      0.654261
      6.444604
      0.0
      67.977450
      11
      6
      3
      68.100000
    
    
      2015-11-01 04:00:00-05:00
      53.800000
      76.600000
      0.894005
      1.125375
      0.0
      67.789130
      11
      6
      4
      68.100000
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2016-10-31 19:20:00-04:00
      48.200000
      56.900000
      0.410000
      0.000000
      0.0
      69.572000
      10
      0
      19
      71.600000
    
    
      2016-10-31 19:30:00-04:00
      48.243324
      54.510070
      0.539344
      0.000000
      0.0
      69.307680
      10
      0
      19
      71.599330
    
    
      2016-10-31 19:40:00-04:00
      47.600000
      56.156677
      0.600650
      0.000000
      0.0
      69.007180
      10
      0
      19
      71.499330
    
    
      2016-10-31 19:50:00-04:00
      47.100000
      57.500000
      0.278342
      0.000000
      0.0
      68.772606
      10
      0
      19
      71.400000
    
    
      2016-10-31 20:00:00-04:00
      46.800000
      57.556683
      0.510000
      0.000000
      0.0
      68.508400
      10
      0
      20
      71.300000
    
    
      2016-10-31 20:10:00-04:00
      46.900000
      57.543320
      0.211011
      0.000000
      0.0
      68.336160
      10
      0
      20
      71.200000
    
    
      2016-10-31 20:20:00-04:00
      46.656670
      58.656670
      0.630661
      0.000000
      0.0
      68.175970
      10
      0
      20
      71.200000
    
    
      2016-10-31 20:30:00-04:00
      46.800000
      58.513350
      0.150000
      0.000000
      0.0
      67.977390
      10
      0
      20
      71.100000
    
    
      2016-10-31 20:40:00-04:00
      46.756683
      58.626736
      0.459357
      0.000000
      0.0
      67.774060
      10
      0
      20
      71.099335
    
    
      2016-10-31 20:50:00-04:00
      46.500000
      59.113350
      0.600657
      0.708423
      0.0
      67.571690
      10
      0
      20
      71.000000
    
    
      2016-10-31 21:00:00-04:00
      46.400000
      58.640068
      0.397329
      0.000000
      0.0
      67.438240
      10
      0
      21
      70.900000
    
    
      2016-10-31 21:10:00-04:00
      46.300000
      58.643314
      0.418982
      0.000000
      0.0
      67.275300
      10
      0
      21
      70.899330
    
    
      2016-10-31 21:20:00-04:00
      46.100000
      58.943314
      0.202675
      0.000000
      0.0
      67.137276
      10
      0
      21
      70.800000
    
    
      2016-10-31 21:30:00-04:00
      45.943330
      58.886658
      0.291003
      0.000000
      0.0
      67.036450
      10
      0
      21
      70.700000
    
    
      2016-10-31 21:40:00-04:00
      44.743324
      61.413350
      0.370000
      0.000000
      0.0
      66.871980
      10
      0
      21
      70.699326
    
    
      2016-10-31 21:50:00-04:00
      44.600000
      61.500000
      0.357326
      0.000000
      0.0
      66.673386
      10
      0
      21
      70.600000
    
    
      2016-10-31 22:00:00-04:00
      44.943314
      60.200000
      0.108982
      0.000000
      0.0
      66.536150
      10
      0
      22
      70.599330
    
    
      2016-10-31 22:10:00-04:00
      43.943317
      64.256680
      0.132031
      0.000000
      0.0
      66.376480
      10
      0
      22
      70.500000
    
    
      2016-10-31 22:20:00-04:00
      43.243317
      66.000000
      0.551659
      0.000000
      0.0
      66.242240
      10
      0
      22
      70.400000
    
    
      2016-10-31 22:30:00-04:00
      43.300000
      66.400000
      0.227995
      0.000000
      0.0
      66.143050
      10
      0
      22
      70.400000
    
    
      2016-10-31 22:40:00-04:00
      43.044983
      63.944984
      0.452492
      0.000000
      0.0
      65.983696
      10
      0
      22
      70.400000
    
    
      2016-10-31 22:50:00-04:00
      43.800000
      61.200000
      0.209043
      0.000000
      0.0
      65.776405
      10
      0
      22
      70.400000
    
    
      2016-10-31 23:00:00-04:00
      43.779984
      62.795036
      0.402498
      0.000000
      0.0
      65.700000
      10
      0
      23
      70.300000
    
    
      2016-10-31 23:10:00-04:00
      42.955940
      66.000000
      0.147970
      0.000000
      0.0
      65.637310
      10
      0
      23
      70.300000
    
    
      2016-10-31 23:20:00-04:00
      43.800000
      62.000000
      0.280975
      0.000000
      0.0
      65.473580
      10
      0
      23
      70.200000
    
    
      2016-10-31 23:30:00-04:00
      44.100000
      61.655754
      0.402302
      0.000000
      0.0
      65.336876
      10
      0
      23
      70.199326
    
    
      2016-10-31 23:40:00-04:00
      43.456690
      64.373240
      0.650000
      0.000000
      0.0
      65.238236
      10
      0
      23
      70.100000
    
    
      2016-10-31 23:50:00-04:00
      43.686653
      64.026690
      0.477331
      0.000000
      0.0
      65.138695
      10
      0
      23
      70.000000
    
    
      2016-11-01 00:00:00-04:00
      43.375790
      64.059960
      0.214004
      0.000000
      0.0
      65.040290
      11
      1
      0
      70.000000
    
    
      2016-11-01 00:10:00-04:00
      42.733345
      66.400000
      0.534995
      0.000000
      0.0
      64.941410
      11
      1
      0
      70.000000
    
  

52705 rows × 10 columns



In [3]:

    
#The error metric
def chouErrorMetric(pred, real):
    """
    Reference:
    Jui Sheng Chou and Dac Khuong Bui. Modeling heating and cooling
    loads by artificial intelligence for energy-efficient building design.
    Energy and Buildings, 82:437–446, 2014.
    
    pred: numpy array of the predicted value
    real: numpy array of the real value
    """
    n = pred.shape[0];
    rmse = (
            (np.sum((pred-real)**2.0)/n)**0.5
            );
    
    mae = (
            np.sum(np.absolute((pred-real)))/n
            );
    
    r = (
         (np.sum(np.multiply(real,pred))*n 
        - np.sum(real)*np.sum(pred))/
        (np.sqrt(n*np.sum(real**2)-(np.sum(real)**2))*
         np.sqrt(n*np.sum(pred**2)-(np.sum(pred)**2)))
            );
    
    r_normal = (1-r)/2.0 #make r value between 0 and 1, 0 is the best
    errorList = [rmse, mae, r_normal];
    print ("error list",errorList);
    avgError = sum(errorList)/3.0;
    return avgError,errorList;



In [4]:

    
#min-max normalization
def normalize(data):
    """
    min max normalization
    data: np array, n*(m+1), n is sample number,
          m is feature number, plus 1 for the true value
    return: (np array 1*(m+1) for the max-min for each column,
             np array 1*(m+1) for the minimum value for each colmn)
    """
    colNum = data.shape[1];
    division = [];
    minVList = [];
    for i in range(colNum):
        col = data[:, i];
        maxV = (np.max(col));
        minV = (np.min(col));
        division.append(maxV-minV);
        minVList.append(minV);
        data[:,i] = data[:,i] - minV;

    division = np.array(division);
    minVList = np.array(minVList);
    return (np.true_divide(data, division[None,:]), division, minVList);



In [5]:

    
#Lazy learning



In [6]:

    
#lazy learning with non-weighted average kernel
class lazyLearning():
    
    def __init__(self, train, k):

        """
        perform lazy learning with linear average kernel and 
        eculidean distance.
        inputs: 
        train: numpy array, n*(m+1),  
               each row is a sample (total n samples),
               each col is a feature (total m features), 
               the last col is the real y
        k: k-nn in lazy learning
        """
        self.trainFeat = train[:,0:-1];
        self.trainY = train[:,-1];
        self.k = k;
    
    def predict(self,test):
        """
        test: numpy array 1*m, m is the number of feature
        return the predicted value
        """
        eclideanDist = np.linalg.norm(self.trainFeat - test, axis = 1);
        eclideanDistSort = eclideanDist.argsort(axis = 0);
        eclideanDistSort = eclideanDistSort[0:self.k];
        knneighbours = ([self.trainY[index] for 
                         index in eclideanDistSort]);
        yt = np.mean(knneighbours);
        return yt;

#cross validation

def getCrossValidationData(trainArray, fold):
    """
    get the data list for cross validation
    trainArray: raw train data, numpy array n*(m+1)
                where n is sample size, m is feature
                size, plus 1 for the true result
    fold: fold value
    return tuple ([trainNpArray_1, trainNpArray_2,...]
                 ,[testNpArray_1, testNpArray_2,...])
    """
    trainLength = trainArray.shape[0];
    numEachFold = int(trainLength/10);
    import random;
    np.random.shuffle(trainArray);#shuffle rows
    cvTrainList = [];
    cvTestList = [];
    for foldK in range(fold):
        foldStart = int(foldK*(trainLength)/fold);
        foldEnd = int(foldStart + trainLength/fold);
        if foldEnd >= trainLength:
            foldEnd = trainLength - 1;
        cvTest = trainArray[foldStart:foldEnd,:];
        cvTrain = np.delete(trainArray
                            ,range(foldStart, foldEnd, 1)
                            ,axis = 0);
        cvTrainList.append(cvTrain);
        cvTestList.append(cvTest);
    return (cvTrainList, cvTestList);


def crossValidateForChioces(choiceList, trainArray, mlMethod,
                            fold = 10):
    """
    Perform cross validation to select model.
    choiceList: 1D list containing different numeric choice
    trainArray: np array n*(m+1) for the training
    mlMethod: ml class
    fold: cross validation fold
    return: cross validation error using the chouErrorMetric
            over the choice list
    """
    cvTrainList, cvTestList = (
        getCrossValidationData(trainArray, fold));
    print ('Performing cross validation to find the best k...');
    cvErrorOverK = [];
    for k in choiceList:
        print ('Testing k=', k, ' ...')
        error_k = [];
        for fold_i in range(len(cvTrainList)):
            print ('Testing k=',k,' for fold=',fold_i,' ...')
            pred_fold_i = [];
            cvTrain_i = cvTrainList[fold_i];
            cvTest_i = cvTestList[fold_i];
            model = mlMethod(cvTrain_i,k);
            for testSample_i in range(len(cvTest_i)):
                pred_fold_i.append(model
                          .predict(cvTest_i[testSample_i,0:-1]));
            pred_fold_i = np.array(pred_fold_i);
            error_fold_i = chouErrorMetric(pred_fold_i,cvTest_i[:,-1]);
            print ('Error metric for k=',k,' fold=', fold_i,
              ' is ', error_fold_i);
            error_k.append(error_fold_i[0]);
        meanErrorFold_i = np.mean(np.array(error_k));
        print ('Cross validated error metric for k=',k,
          ' is ',meanErrorFold_i);
        cvErrorOverK.append(np.mean(np.array(error_k)));
    return cvErrorOverK



In [7]:

    
#Perform cross validation to choose K for LL-Avg
trainArray = train.as_matrix();#non-normalized data
trainArrayNorm, normDivision, normMinList = (
    normalize(train.as_matrix()));#normalized data
llKChoiceList = range(5,100,5);
trainArrayNorm
cvErrorOverK = crossValidateForChioces(
                llKChoiceList
                ,trainArrayNorm
                ,lazyLearning
                ,fold = 10
                );









    



Performing cross validation to find the best k...
Testing k= 5  ...
Testing k= 5  for fold= 0  ...






    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-7-5911334e1fdb> in <module>()
      9                 ,trainArrayNorm
     10                 ,lazyLearning
---> 11                 ,fold = 10
     12                 );

<ipython-input-6-c3b2fcbf6a6e> in crossValidateForChioces(choiceList, trainArray, mlMethod, fold)
     90             for testSample_i in range(len(cvTest_i)):
     91                 pred_fold_i.append(model
---> 92                           .predict(cvTest_i[testSample_i,0:-1]));
     93             pred_fold_i = np.array(pred_fold_i);
     94             error_fold_i = chouErrorMetric(pred_fold_i,cvTest_i[:,-1]);

<ipython-input-6-c3b2fcbf6a6e> in predict(self, test)
     25         """
     26         eclideanDist = np.linalg.norm(self.trainFeat - test, axis = 1);
---> 27         eclideanDistSort = eclideanDist.argsort(axis = 0);
     28         eclideanDistSort = eclideanDistSort[0:self.k];
     29         knneighbours = ([self.trainY[index] for 

KeyboardInterrupt:



In [8]:

    
#plot error metric over K for LL-Avg
print (cvErrorOverK)

plt.plot(llKChoiceList,cvErrorOverK,
            color="b",
            label="Lazy Learning with Uniform Average Kernel");
plt.xlabel(
            "Choice of K",
            fontsize = 14
            );
plt.ylabel(
            "Cross Validation Error (Error Metric)",
            fontsize = 14
            );
plt.legend();
plt.show();









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-5c2f52e87b47> in <module>()
      1 #plot error metric over K for LL-Avg
----> 2 print (cvErrorOverK)
      3 
      4 plt.plot(llKChoiceList,cvErrorOverK,
      5             color="b",

NameError: name 'cvErrorOverK' is not defined



In [ ]:

    
#LL-Avg: Test over the next year's data
testArray = test.as_matrix();
optK = llKChoiceList[np.argmin(np.array(cvErrorOverK))];

modelData = train.as_matrix();
modelDataLen = modelData.shape[0];
allData_test = np.append(modelData, testArray, 0); #model data + test array
#normalize both model data and test data
#to the same scale
allData_test,normDivision_test,normMinList_test = (
                                                normalize (allData_test));
#modelData: the dataset pool for LL
#testArray: the different dataset for the tesing
modelData = allData_test[0:modelDataLen,:];
testArray = np.delete(allData_test
                     ,range(modelDataLen)
                     ,axis = 0);

llTest = lazyLearning(modelData, optK);

testRest_norm = [];
for sample_i in range(len(testArray)):
    testRest_norm.append(llTest
                    .predict(testArray[sample_i,0:-1]));
testRest_norm = np.array(testRest_norm);
(testError_norm, detail) = chouErrorMetric(testRest_norm, testArray[:,-1]);
print ("Test error metric over the test data for LL-Avg is ",testError_norm,".");

#plot test data, pred vs. true (normalized)
plt.plot(testRest_norm
         ,testArray[:,-1]
         ,'.'
         ,color="b"
         ,label="Lazy Learning with Uniform Average Kernel");

plt.xlabel(
            "Predicted Value for the Test Data (Min-max Normalized)",
            fontsize = 12
            );
plt.ylabel(
            "True Value for the Test Data (Min-max Normalized)",
            fontsize = 12
            );
plt.legend();
plt.show();
#plot test data, random 1000 non-consective pred and true over time index (unnormalized)
testRest_unNorm = testRest_norm*normDivision_test[-1]+normMinList_test[-1];
testRest_unNorm = testRest_unNorm.reshape((1,len(testRest_unNorm)));
trueRest_unNorm = testArray[:,-1]*normDivision_test[-1]+normMinList_test[-1];
trueRest_unNorm = trueRest_unNorm.reshape((1,len(trueRest_unNorm)));
testNtrueRest_unNorm = np.append(testRest_unNorm, trueRest_unNorm, axis = 0);

plt.plot(testNtrueRest_unNorm[0,0:2000]
         ,color="b"
         ,label="Predicted Value");
plt.plot(testNtrueRest_unNorm[1,0:2000]
         ,color='r'
         ,label='True Value');
plt.axis([0,2000,50,80]);
plt.xlabel(
            "2015-01-01 to 2015-01-14",
            fontsize = 14
            );
plt.ylabel(
            "Indoor Air Temperature (F)",
            fontsize = 14
            );
plt.title("Lazy Learning with Uniform Average Kernel");
plt.legend();
plt.show();

plt.plot(testNtrueRest_unNorm[0,11233:13233]
         ,color="b"
         ,label="Predicted Value");
plt.plot(testNtrueRest_unNorm[1,11233:13233]
         ,color='r'
         ,label='True Value');
plt.axis([0,2000,50,80]);
plt.xlabel(
            "2015-03-20 to 2015-04-02",
            fontsize = 14
            );
plt.ylabel(
            "Indoor Air Temperature (F)",
            fontsize = 14
            );
plt.title("Lazy Learning with Uniform Average Kernel");
plt.legend();
plt.show();

plt.plot(testNtrueRest_unNorm[0,21745:23745]
         ,color="b"
         ,label="Predicted Value");
plt.plot(testNtrueRest_unNorm[1,21745:23745]
         ,color='r'
         ,label='True Value');
plt.axis([0,2000,50,80]);
plt.xlabel(
            "2015-06-01 to 2015-06-14",
            fontsize = 14
            );
plt.ylabel(
            "Indoor Air Temperature (F)",
            fontsize = 14
            );
plt.title("Lazy Learning with Uniform Average Kernel");
plt.legend();
plt.show();
    
plt.plot(testNtrueRest_unNorm[0,42049:44049]
         ,color="b"
         ,label="Predicted Value");
plt.plot(testNtrueRest_unNorm[1,42049:44049]
         ,color='r'
         ,label='True Value');
plt.axis([0,2000,50,80]);
plt.xlabel(
            "2015-10-20 to 2015-11-02",
            fontsize = 14
            );
plt.ylabel(
            "Indoor Air Temperature (F)",
            fontsize = 14
            );
plt.title("Lazy Learning with Uniform Average Kernel");
plt.legend(loc='lower left');
plt.show();



In [ ]:

    
def makeLaggedData(lagOrder, rawData):
    """
    add lagged feature to the original dataset
    :lagOrder type
        1D list of float
    :lagOrder para,
        lagOrder corresponding to each column of the raw data
        e.g. [2,1,1] means for the first column (the first feature) in
            the rawData, add 1&2 time steps lagged data as the new features;
            for the second column (the original second feature) in the
            rawData, add 1 step lagged data as the new feature. 
    :rawData: numpy array, n*(m+1)
    return: new dataset with lagged features
    """
    colNum = rawData.shape[1];
    rowNum = rawData.shape[0];
    autoRegNum = len(lagOrder);
    returnedData = [];
    maxRegOrder = max(lagOrder);
    for i in range(colNum):
        
        if i < autoRegNum:
            autoRegOrder = lagOrder[i];
            for j in range(0,autoRegOrder):
                appendToHead = np.zeros(j+1);
                returnedData.append(
                        np.append(appendToHead, 
                            rawData[0:rowNum-j-1,i]));
        returnedData.append(rawData[:,i]);
    
    returnedData = np.array(returnedData).transpose();
    returnedData = returnedData[maxRegOrder:,:];
    return returnedData;

#lazy learning with linear regression as the local model
class lazyLearning_localLinear():
    
    def __init__(self, train, k):

        """
        perform lazy learning with linear average kernel and 
        eculidean distance.
        inputs: 
        train: numpy array, n*(m+1),  
               each row is a sample (total n samples),
               each col is a feature (total m features), 
               the last col is the real y
        k: k-nn in lazy learning
        """
        self.trainFeat = train[:,0:-1];
        self.trainY = train[:,-1];
        self.k = k;
    
    def predict(self,test):
        """
        test: numpy array 1*m, m is the number of feature
        return the predicted value (scalar)
        """
        eclideanDist = np.linalg.norm(self.trainFeat - test, axis = 1);
        eclideanDistSort = eclideanDist.argsort(axis = 0);
        eclideanDistSort = eclideanDistSort[0:self.k];
        knneighboursY = np.array([self.trainY[index] for 
                         index in eclideanDistSort]).reshape(self.k,1);
        m = self.trainFeat.shape[1];
        knneighboursX = self.trainFeat[eclideanDistSort[0],:].reshape(1,m);
        for index in eclideanDistSort[1:]:
            knneighboursX = np.append(
                knneighboursX, self.trainFeat[index,:].reshape(1,m),0);
        from sklearn import linear_model
        regr = linear_model.LinearRegression(n_jobs = -1);
        regr.fit(knneighboursX, knneighboursY)
        test = test.reshape(1,m);
        return regr.predict(test)[0,0];



In [ ]:

    
#Cross validation choose k for lazy learning with linear regression
#Perform cross validation
trainArray = train.as_matrix();#non-normalized data
trainArrayNorm, normDivision, normMinList = (
    normalize(train.as_matrix()));#normalized data

llKChoiceList = range(100,500,100);
cvErrorOverK = crossValidateForChioces(
                llKChoiceList
                ,trainArrayNorm
                ,lazyLearning_localLinear
                ,fold = 10
                );



In [ ]:

    
cvErrorOverK



In [ ]:

    
#ll with uniform average kernel, but with lagged features
#Perform cross validation
trainArray = train.as_matrix();#non-normalized data
trainArrayNorm, normDivision, normMinList = (
    normalize(train.as_matrix()));#normalized data
lagOrder = [2,0,2,0,0,0,0,0,0,2];
trainArrayNormLagged = makeLaggedData(lagOrder, trainArrayNorm)
llKChoiceList = range(5,50,5);
cvErrorOverK = crossValidateForChioces(
                llKChoiceList
                ,trainArrayNormLagged
                ,lazyLearning
                ,fold = 10
                );



In [ ]:

    
cvErrorOverK_0 = cvErrorOverK



In [ ]:

    
#Test over the test dataset for LL-LR
testArray = test.as_matrix();
optK = 100;

modelData = train.as_matrix();
modelDataLen = modelData.shape[0];
allData_test = np.append(modelData, testArray, 0); #model data + test array
allData_test,normDivision_test,normMinList_test = (
                                                normalize (allData_test));
modelData = allData_test[0:modelDataLen,:];
testArray = np.delete(allData_test
                     ,range(modelDataLen)
                     ,axis = 0);

llTest = lazyLearning_localLinear(modelData, optK);

testRest_norm = [];
for sample_i in range(len(testArray)):
    testRest_norm.append(llTest
                    .predict(testArray[sample_i,0:-1]));
testRest_norm = np.array(testRest_norm);

(testError_norm, detail) = chouErrorMetric(testRest_norm, testArray[:,-1]);
print ("Test error metric over the test data is ",testError_norm,".");
print (detail)

#plot test data, pred vs. true (normalized)
plt.plot(testRest_norm
         ,testArray[:,-1]
         ,'.'
         ,color="b"
         ,label="Lazy Learning with Linear Regression Kernel");

plt.xlabel(
            "Predicted Value for the Test Data (Min-max Normalized)",
            fontsize = 10
            );
plt.ylabel(
            "True Value for the Test Data (Min-max Normalized)",
            fontsize = 10
            );
plt.axis([0,1,0,1])
plt.legend();
plt.show();
#plot test data, random 1000 non-consective pred and true over time index (unnormalized)
testRest_unNorm = testRest_norm*normDivision_test[-1]+normMinList_test[-1];
testRest_unNorm = testRest_unNorm.reshape((1,len(testRest_unNorm)));
trueRest_unNorm = testArray[:,-1]*normDivision_test[-1]+normMinList_test[-1];
trueRest_unNorm = trueRest_unNorm.reshape((1,len(trueRest_unNorm)));
testNtrueRest_unNorm = np.append(testRest_unNorm, trueRest_unNorm, axis = 0);

plt.plot(testNtrueRest_unNorm[0,0:2000]
         ,color="b"
         ,label="Predicted Value");
plt.plot(testNtrueRest_unNorm[1,0:2000]
         ,color='r'
         ,label='True Value');
plt.axis([0,2000,50,80]);
plt.xlabel(
            "2015-01-01 to 2015-01-14",
            fontsize = 14
            );
plt.ylabel(
            "Indoor Air Temperature (F)",
            fontsize = 14
            );
plt.title("Lazy Learning with Linear Regression Kernel");
plt.legend();
plt.show();

plt.plot(testNtrueRest_unNorm[0,11233:13233]
         ,color="b"
         ,label="Predicted Value");
plt.plot(testNtrueRest_unNorm[1,11233:13233]
         ,color='r'
         ,label='True Value');
plt.axis([0,2000,50,80]);
plt.xlabel(
            "2015-03-20 to 2015-04-02",
            fontsize = 14
            );
plt.ylabel(
            "Indoor Air Temperature (F)",
            fontsize = 14
            );
plt.title("Lazy Learning with Linear Regression Kernel");
plt.legend();
plt.show();

plt.plot(testNtrueRest_unNorm[0,21745:23745]
         ,color="b"
         ,label="Predicted Value");
plt.plot(testNtrueRest_unNorm[1,21745:23745]
         ,color='r'
         ,label='True Value');
plt.axis([0,2000,50,80]);
plt.xlabel(
            "2015-06-01 to 2015-06-14",
            fontsize = 14
            );
plt.ylabel(
            "Indoor Air Temperature (F)",
            fontsize = 14
            );
plt.title("Lazy Learning with Linear Regression Kernel");
plt.legend();
plt.show();
    
plt.plot(testNtrueRest_unNorm[0,42049:44049]
         ,color="b"
         ,label="Predicted Value");
plt.plot(testNtrueRest_unNorm[1,42049:44049]
         ,color='r'
         ,label='True Value');
plt.axis([0,2000,50,80]);
plt.xlabel(
            "2015-10-20 to 2015-11-02",
            fontsize = 14
            );
plt.ylabel(
            "Indoor Air Temperature (F)",
            fontsize = 14
            );
plt.title("Lazy Learning with Linear Regression Kernel");
plt.legend(loc='lower left');
plt.show();



In [ ]:

    
#Test over the test dataset for LL-Avg-Lag
cvErrorOverK = [0.006261511172007253,
                0.0078319748087286763,
                0.0092544725368474655,
                0.010491375591088101,
                0.011582034373530298,
                0.012577092631859799,
                0.013481123234175307,
                0.014271828395324013,
                0.014997619668965751]#copied from the previous computation

testArray = test.as_matrix();
optK = llKChoiceList[np.argmin(np.array(cvErrorOverK))];

modelData = train.as_matrix();
modelDataLen = modelData.shape[0];
allData_test = np.append(modelData, testArray, 0); #model data + test array
allData_test,normDivision_test,normMinList_test = (
                                                normalize (allData_test));
lagOrder = [2,0,2,0,0,0,0,0,0,2];
allData_test = makeLaggedData(lagOrder, allData_test);                                                                          #to the same scale
modelData = allData_test[0:modelDataLen,:];
testArray = np.delete(allData_test
                     ,range(modelDataLen)
                     ,axis = 0);

llTest = lazyLearning(modelData, optK);

testRest_norm = [];
#perform test.
#because lagged indoor air temperature (T_k-1, T_k-2, 
#k is the time step that we want to predict the T)
#is an input to the model, so the prediction
#is performed in an iterative way, that is:
#test data is divided into groups and each group
#contains the consectively 1000 sampes.
#For every 1000 test samples, the first sample's
#T_k-1 and T_k-2 is the real measured data; the
#second sample's T_k-1 is the predicted value from
#the last sample; the third sample's T_k-1 and 
#T_k-2 are the predicted values from the last two
#samples, so on and so forth. 
lagOrderT = lagOrder[-1];
test_sample_i = testArray[0,0:-1];
for sample_i in range(len(testArray)):
    pred_i = llTest.predict(test_sample_i);
    testRest_norm.append(pred_i);
    if sample_i + 1 < len(testArray):
        test_sample_i_new = testArray[sample_i+1,0:-1];
        sampleFeatM = len(test_sample_i_new);
        test_sample_i_new[sampleFeatM-lagOrderT] = pred_i;
        for lagOrderT_i in range(1,lagOrderT):
            test_sample_i_new[sampleFeatM - lagOrderT 
                              + lagOrderT_i] = test_sample_i[sampleFeatM 
                                                             - lagOrderT + lagOrderT_i - 1]
        test_sample_i = test_sample_i_new;
        
testRest_norm = np.array(testRest_norm);



In [ ]:

    
#continue...
(testError_norm, detail) = chouErrorMetric(testRest_norm, testArray[:,-1]);
print ("Test error metric over the test data is ",testError_norm,".");
print (detail)

#plot test data, pred vs. true (normalized)
plt.plot(testRest_norm
         ,testArray[:,-1]
         ,'.'
         ,color="b"
         ,label="LL with Average Kernel - Lagged Features");
plt.axis([0,1,0,1]);

plt.xlabel(
            "Predicted Value for the Test Data (Min-max Normalized)",
            fontsize = 10
            );
plt.ylabel(
            "True Value for the Test Data (Min-max Normalized)",
            fontsize = 10
            );
plt.legend();
plt.show();
#plot test data, samples from each season
testRest_unNorm = testRest_norm*normDivision_test[-1]+normMinList_test[-1];
testRest_unNorm = testRest_unNorm.reshape((1,len(testRest_unNorm)));
trueRest_unNorm = testArray[:,-1]*normDivision_test[-1]+normMinList_test[-1];
trueRest_unNorm = trueRest_unNorm.reshape((1,len(trueRest_unNorm)));
testNtrueRest_unNorm = np.append(testRest_unNorm, trueRest_unNorm, axis = 0);

plt.plot(testNtrueRest_unNorm[0,0:2000]
         ,color="b"
         ,label="Predicted Value");
plt.plot(testNtrueRest_unNorm[1,0:2000]
         ,color='r'
         ,label='True Value');
plt.axis([0,2000,50,80]);
plt.xlabel(
            "2015-01-01 to 2015-01-14",
            fontsize = 14
            );
plt.ylabel(
            "Indoor Air Temperature (F)",
            fontsize = 14
            );
plt.title("LL with Average Kernel - Lagged Features");
plt.legend();
plt.show();

plt.plot(testNtrueRest_unNorm[0,11000:13000]
         ,color="b"
         ,label="Predicted Value");
plt.plot(testNtrueRest_unNorm[1,11000:13000]
         ,color='r'
         ,label='True Value');
plt.axis([0,2000,50,80]);
plt.xlabel(
            "2015-03-19 to 2015-04-01",
            fontsize = 14
            );
plt.ylabel(
            "Indoor Air Temperature (F)",
            fontsize = 14
            );
plt.title("LL with Average Kernel - Lagged Features");
plt.legend(fontsize = 12);
plt.show();

plt.plot(testNtrueRest_unNorm[0,21800:23800]
         ,color="b"
         ,label="Predicted Value");
plt.plot(testNtrueRest_unNorm[1,21800:23800]
         ,color='r'
         ,label='True Value');
plt.axis([0,2000,50,80]);
plt.xlabel(
            "2015-06-02 to 2015-06-15",
            fontsize = 14
            );
plt.ylabel(
            "Indoor Air Temperature (F)",
            fontsize = 14
            );
plt.title("LL with Average Kernel - Lagged Features");
plt.legend();
plt.show();
    
plt.plot(testNtrueRest_unNorm[0,42000:44000]
         ,color="b"
         ,label="Predicted Value");
plt.plot(testNtrueRest_unNorm[1,42000:44000]
         ,color='r'
         ,label='True Value');
plt.axis([0,2000,50,80]);
plt.xlabel(
            "2015-10-20 to 2015-11-02",
            fontsize = 14
            );
plt.ylabel(
            "Indoor Air Temperature (F)",
            fontsize = 14
            );
plt.title("LL with Average Kernel - Lagged Features");
plt.legend(loc='lower left');
plt.show();

	oa	rh	sr	ws	dp	st	at	month	weekday	hour
time
2014-11-01 00:00:00-04:00	42.700000	86.589980	0.704026	0.000000	0.000000	62.440970	68.200000	11	5	0
2014-11-01 00:10:00-04:00	42.710010	86.600000	1.244994	0.000000	0.000000	62.341324	68.100000	11	5	0
2014-11-01 00:20:00-04:00	42.800000	86.600000	1.115994	0.000000	0.000000	63.057070	68.100000	11	5	0
2014-11-01 00:30:00-04:00	42.900000	86.500000	0.900000	0.062532	0.000000	64.773110	68.100000	11	5	0
2014-11-01 00:40:00-04:00	42.900000	86.410010	0.560000	0.000000	0.000000	66.950250	67.946335	11	5	0
2014-11-01 00:50:00-04:00	42.900000	86.500000	0.584016	0.000000	0.000000	68.192020	67.900000	11	5	0
2014-11-01 01:00:00-04:00	43.000000	86.510025	1.250954	0.000000	0.000000	68.516280	67.900000	11	5	1
2014-11-01 01:10:00-04:00	42.989980	86.510020	0.489018	1.250000	0.000000	68.658485	67.900000	11	5	1
2014-11-01 01:20:00-04:00	42.900000	86.600000	0.605010	1.124740	0.000000	68.700000	67.900000	11	5	1
2014-11-01 01:30:00-04:00	42.900000	86.500000	1.080000	0.125264	0.000000	68.758470	67.900000	11	5	1
2014-11-01 01:40:00-04:00	42.800000	86.720010	0.644996	0.000000	0.000000	68.741170	67.900000	11	5	1
2014-11-01 01:50:00-04:00	42.900000	86.900000	0.550990	0.000000	0.000000	68.700000	67.900000	11	5	1
2014-11-01 02:00:00-04:00	42.889980	86.700000	0.760000	0.000000	0.000000	68.758446	67.900000	11	5	2
2014-11-01 02:10:00-04:00	42.700000	86.410030	0.830000	0.000000	0.000000	68.741540	67.900000	11	5	2
2014-11-01 02:20:00-04:00	42.700000	86.410000	0.944005	0.000000	0.000000	68.700000	67.900000	11	5	2
2014-11-01 02:30:00-04:00	42.700000	86.400000	0.650981	0.000000	0.000000	68.700000	67.900000	11	5	2
2014-11-01 02:40:00-04:00	42.600000	86.500000	0.764005	0.000000	0.000000	68.641610	67.900000	11	5	2
2014-11-01 02:50:00-04:00	42.600000	86.500000	0.700000	0.000000	0.000000	68.600000	67.900000	11	5	2
2014-11-01 03:00:00-04:00	42.600000	86.189990	0.614994	0.000000	0.000000	68.600000	67.900000	11	5	3
2014-11-01 03:10:00-04:00	42.600000	86.000000	0.640979	0.000000	0.000000	68.600000	67.900000	11	5	3
2014-11-01 03:20:00-04:00	42.500000	85.300000	0.980000	0.000000	0.000000	68.600000	67.900000	11	5	3
2014-11-01 03:30:00-04:00	42.500000	85.289990	0.899011	0.000000	0.000000	68.600000	67.900000	11	5	3
2014-11-01 03:40:00-04:00	42.400000	85.210020	1.314989	0.000000	10.000000	68.659294	67.900000	11	5	3
2014-11-01 03:50:00-04:00	42.300000	85.100000	0.610000	0.000000	10.000000	68.640690	67.900000	11	5	3
2014-11-01 04:00:00-04:00	42.110000	85.100000	0.860003	0.000000	10.000000	68.600000	67.900000	11	5	4
2014-11-01 04:10:00-04:00	42.200000	85.110020	1.399976	0.563868	10.000000	68.541730	67.900000	11	5	4
2014-11-01 04:20:00-04:00	42.200000	84.989990	0.570008	0.000000	10.000000	68.500000	67.900000	11	5	4
2014-11-01 04:30:00-04:00	42.100000	84.689990	0.790976	0.000000	10.000000	68.441360	67.900000	11	5	4
2014-11-01 04:40:00-04:00	42.100000	84.600000	0.660000	0.000000	10.000000	68.458694	67.900000	11	5	4
2014-11-01 04:50:00-04:00	42.110012	82.689980	0.715007	0.000000	10.000000	68.440674	67.900000	11	5	4
...	...	...	...	...	...	...	...	...	...	...
2015-10-31 19:10:00-04:00	56.484997	44.500000	0.800000	1.062479	0.000000	72.188810	68.700000	10	5	19
2015-10-31 19:20:00-04:00	56.385000	44.515003	0.800000	0.843843	0.000000	72.088700	68.700000	10	5	19
2015-10-31 19:30:00-04:00	56.315002	44.984997	0.781005	3.218771	0.000000	71.988945	68.700000	10	5	19
2015-10-31 19:40:00-04:00	56.500000	45.000000	1.162498	1.062441	0.000000	71.888540	68.700000	10	5	19
2015-10-31 19:50:00-04:00	56.500000	45.115000	1.108995	0.656396	0.000000	71.788830	68.700000	10	5	19
2015-10-31 20:00:00-04:00	56.784996	45.184998	0.968506	4.125077	0.000000	71.688830	68.600000	10	5	20
2015-10-31 20:10:00-04:00	56.700000	45.200000	0.828998	1.625041	0.000000	71.600000	68.600000	10	5	20
2015-10-31 20:20:00-04:00	56.800000	45.100000	0.961005	0.000000	0.000000	71.588870	68.600000	10	5	20
2015-10-31 20:30:00-04:00	56.800000	45.100000	1.320000	2.345005	0.000000	71.489150	68.600000	10	5	20
2015-10-31 20:40:00-04:00	56.800000	45.100000	0.750000	3.125000	0.428571	71.400000	68.600000	10	5	20
2015-10-31 20:50:00-04:00	56.800000	45.200000	0.801494	0.656385	0.428571	71.389150	68.600000	10	5	20
2015-10-31 21:00:00-04:00	56.700000	45.315002	1.040000	0.281312	0.428571	71.289000	68.500000	10	5	21
2015-10-31 21:10:00-04:00	56.500000	45.800000	0.563551	0.000000	0.428571	71.200000	68.500000	10	5	21
2015-10-31 21:20:00-04:00	56.300000	46.300000	0.955003	0.000000	0.000000	71.189200	68.500000	10	5	21
2015-10-31 21:30:00-04:00	56.200000	46.415005	0.882498	0.000000	0.000000	71.089480	68.500000	10	5	21
2015-10-31 21:40:00-04:00	56.100000	47.584995	0.928993	0.000000	0.000000	71.000000	68.500000	10	5	21
2015-10-31 21:50:00-04:00	56.400000	47.000000	0.728039	0.000000	0.000000	70.988650	68.500000	10	5	21
2015-10-31 22:00:00-04:00	56.084995	48.175015	0.982498	0.000000	0.000000	70.888560	68.492330	10	5	22
2015-10-31 22:10:00-04:00	55.824993	54.635014	1.020000	0.000000	0.000000	70.800000	68.400000	10	5	22
2015-10-31 22:20:00-04:00	54.613335	58.473330	0.301331	0.000000	0.000000	70.777040	68.400000	10	5	22
2015-10-31 22:30:00-04:00	53.700000	61.900000	0.560000	0.000000	0.000000	70.577410	68.400000	10	5	22
2015-10-31 22:40:00-04:00	53.059990	66.266680	0.902003	0.000000	0.000000	70.388985	68.400000	10	5	22
2015-10-31 22:50:00-04:00	53.213337	65.259995	1.043337	0.000000	0.000000	70.277630	68.400000	10	5	22
2015-10-31 23:00:00-04:00	52.586662	69.153340	1.073332	0.000000	0.000000	70.088684	68.400000	10	5	23
2015-10-31 23:10:00-04:00	52.513336	67.446655	0.921330	0.000000	0.000000	69.988830	68.400000	10	5	23
2015-10-31 23:20:00-04:00	52.100000	72.040050	0.568689	0.000000	0.000000	69.888830	68.392650	10	5	23
2015-10-31 23:30:00-04:00	52.213337	70.606650	0.731331	0.000000	0.000000	69.777630	68.300000	10	5	23
2015-10-31 23:40:00-04:00	51.944275	70.700000	0.577379	5.717234	0.000000	69.588670	68.300000	10	5	23
2015-10-31 23:50:00-04:00	54.015003	65.539990	0.666001	6.968762	0.000000	69.488686	68.300000	10	5	23
2015-11-01 00:00:00-04:00	53.200000	65.200000	0.840000	0.127669	0.000000	69.388830	68.300000	11	6	0

	oa	rh	sr	ws	dp	st	month	weekday	hour	at
time
2015-11-01 00:10:00-04:00	55.811670	57.576660	1.297827	1.322937	0.0	69.288820	11	6	0	68.292000
2015-11-01 00:20:00-04:00	56.600000	54.911670	0.565834	3.864502	0.0	69.188680	11	6	0	68.200000
2015-11-01 00:30:00-04:00	56.611477	54.788520	0.654262	7.161926	0.0	69.088810	11	6	0	68.200000
2015-11-01 00:40:00-04:00	58.188522	54.500000	0.586068	8.391342	0.0	69.011314	11	6	0	68.208000
2015-11-01 00:50:00-04:00	60.210003	48.429990	0.995001	3.937762	0.0	69.100000	11	6	0	68.200000
2015-11-01 01:00:00-04:00	61.089996	44.900000	0.920994	4.249959	0.0	69.111040	11	6	1	68.200000
2015-11-01 01:10:00-04:00	60.576656	43.900000	0.465836	2.020896	0.0	69.222664	11	6	1	68.200000
2015-11-01 01:20:00-04:00	59.188330	44.123337	0.890000	5.697937	0.0	69.388670	11	6	1	68.200000
2015-11-01 01:30:00-04:00	58.076660	47.135010	0.840000	4.739687	0.0	69.288690	11	6	1	68.200000
2015-11-01 01:40:00-04:00	57.700000	47.435013	0.990503	7.500000	0.0	69.188810	11	6	1	68.200000
2015-11-01 01:50:00-04:00	57.900000	47.088330	0.905834	5.479141	0.0	69.097170	11	6	1	68.200000
2015-11-01 01:00:00-05:00	57.900000	47.000000	1.063660	10.520546	0.0	69.072174	11	6	1	68.200000
2015-11-01 01:10:00-05:00	57.888332	48.811670	0.454165	7.176813	0.0	69.047190	11	6	1	68.200000
2015-11-01 01:20:00-05:00	57.611668	49.176662	0.532170	3.937438	0.0	69.022200	11	6	1	68.200000
2015-11-01 01:30:00-05:00	57.600000	49.551690	0.913664	4.302073	0.0	68.997210	11	6	1	68.200000
2015-11-01 01:40:00-05:00	57.600000	53.300000	0.554165	6.729125	0.0	68.972220	11	6	1	68.200000
2015-11-01 01:50:00-05:00	57.488330	53.300000	0.565835	4.781188	0.0	68.947230	11	6	1	68.200000
2015-11-01 02:00:00-05:00	57.288330	54.400000	0.620502	7.750124	0.0	68.922240	11	6	2	68.200000
2015-11-01 02:10:00-05:00	56.300000	60.035010	1.199497	14.156187	0.0	68.888960	11	6	2	68.200000
2015-11-01 02:20:00-05:00	55.700000	63.646680	1.224165	7.614438	0.0	68.800000	11	6	2	68.191990
2015-11-01 02:30:00-05:00	55.400000	66.223335	0.280000	5.958255	0.0	68.778040	11	6	2	68.100000
2015-11-01 02:40:00-05:00	54.900000	68.111670	0.620503	3.604125	0.0	68.589130	11	6	2	68.100000
2015-11-01 02:50:00-05:00	54.600000	69.400000	0.735333	4.781219	0.0	68.500000	11	6	2	68.100000
2015-11-01 03:00:00-05:00	54.300000	70.746670	1.146336	2.427073	0.0	68.477740	11	6	3	68.100000
2015-11-01 03:10:00-05:00	54.000000	73.311676	1.437827	5.958250	0.0	68.289116	11	6	3	68.100000
2015-11-01 03:20:00-05:00	53.976660	74.011670	0.620503	1.656191	0.0	68.200000	11	6	3	68.100000
2015-11-01 03:30:00-05:00	53.800000	74.711670	1.181670	6.437375	0.0	68.188700	11	6	3	68.100000
2015-11-01 03:40:00-05:00	53.810444	74.910446	0.655377	7.994441	0.0	68.088960	11	6	3	68.100000
2015-11-01 03:50:00-05:00	53.700000	75.822960	0.654261	6.444604	0.0	67.977450	11	6	3	68.100000
2015-11-01 04:00:00-05:00	53.800000	76.600000	0.894005	1.125375	0.0	67.789130	11	6	4	68.100000
...	...	...	...	...	...	...	...	...	...	...
2016-10-31 19:20:00-04:00	48.200000	56.900000	0.410000	0.000000	0.0	69.572000	10	0	19	71.600000
2016-10-31 19:30:00-04:00	48.243324	54.510070	0.539344	0.000000	0.0	69.307680	10	0	19	71.599330
2016-10-31 19:40:00-04:00	47.600000	56.156677	0.600650	0.000000	0.0	69.007180	10	0	19	71.499330
2016-10-31 19:50:00-04:00	47.100000	57.500000	0.278342	0.000000	0.0	68.772606	10	0	19	71.400000
2016-10-31 20:00:00-04:00	46.800000	57.556683	0.510000	0.000000	0.0	68.508400	10	0	20	71.300000
2016-10-31 20:10:00-04:00	46.900000	57.543320	0.211011	0.000000	0.0	68.336160	10	0	20	71.200000
2016-10-31 20:20:00-04:00	46.656670	58.656670	0.630661	0.000000	0.0	68.175970	10	0	20	71.200000
2016-10-31 20:30:00-04:00	46.800000	58.513350	0.150000	0.000000	0.0	67.977390	10	0	20	71.100000
2016-10-31 20:40:00-04:00	46.756683	58.626736	0.459357	0.000000	0.0	67.774060	10	0	20	71.099335
2016-10-31 20:50:00-04:00	46.500000	59.113350	0.600657	0.708423	0.0	67.571690	10	0	20	71.000000
2016-10-31 21:00:00-04:00	46.400000	58.640068	0.397329	0.000000	0.0	67.438240	10	0	21	70.900000
2016-10-31 21:10:00-04:00	46.300000	58.643314	0.418982	0.000000	0.0	67.275300	10	0	21	70.899330
2016-10-31 21:20:00-04:00	46.100000	58.943314	0.202675	0.000000	0.0	67.137276	10	0	21	70.800000
2016-10-31 21:30:00-04:00	45.943330	58.886658	0.291003	0.000000	0.0	67.036450	10	0	21	70.700000
2016-10-31 21:40:00-04:00	44.743324	61.413350	0.370000	0.000000	0.0	66.871980	10	0	21	70.699326
2016-10-31 21:50:00-04:00	44.600000	61.500000	0.357326	0.000000	0.0	66.673386	10	0	21	70.600000
2016-10-31 22:00:00-04:00	44.943314	60.200000	0.108982	0.000000	0.0	66.536150	10	0	22	70.599330
2016-10-31 22:10:00-04:00	43.943317	64.256680	0.132031	0.000000	0.0	66.376480	10	0	22	70.500000
2016-10-31 22:20:00-04:00	43.243317	66.000000	0.551659	0.000000	0.0	66.242240	10	0	22	70.400000
2016-10-31 22:30:00-04:00	43.300000	66.400000	0.227995	0.000000	0.0	66.143050	10	0	22	70.400000
2016-10-31 22:40:00-04:00	43.044983	63.944984	0.452492	0.000000	0.0	65.983696	10	0	22	70.400000
2016-10-31 22:50:00-04:00	43.800000	61.200000	0.209043	0.000000	0.0	65.776405	10	0	22	70.400000
2016-10-31 23:00:00-04:00	43.779984	62.795036	0.402498	0.000000	0.0	65.700000	10	0	23	70.300000
2016-10-31 23:10:00-04:00	42.955940	66.000000	0.147970	0.000000	0.0	65.637310	10	0	23	70.300000
2016-10-31 23:20:00-04:00	43.800000	62.000000	0.280975	0.000000	0.0	65.473580	10	0	23	70.200000
2016-10-31 23:30:00-04:00	44.100000	61.655754	0.402302	0.000000	0.0	65.336876	10	0	23	70.199326
2016-10-31 23:40:00-04:00	43.456690	64.373240	0.650000	0.000000	0.0	65.238236	10	0	23	70.100000
2016-10-31 23:50:00-04:00	43.686653	64.026690	0.477331	0.000000	0.0	65.138695	10	0	23	70.000000
2016-11-01 00:00:00-04:00	43.375790	64.059960	0.214004	0.000000	0.0	65.040290	11	1	0	70.000000
2016-11-01 00:10:00-04:00	42.733345	66.400000	0.534995	0.000000	0.0	64.941410	11	1	0	70.000000