In [32]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import itertools
import pickle
# import openpyxl as px
# from pyexcel_xls import get_data

%matplotlib inline

First, read the csv data files, and convert the index 'Timestamp' to datetimeindex.


In [33]:
dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
CSVdata=pd.read_csv('Building Electrical.csv', parse_dates=[0], date_parser=dateparse)
Create another pandas data frame and add another 3 columns of the time which will be used in the groupby method.

In [34]:
data=pd.read_csv('Building Electrical.csv', parse_dates=[0], date_parser=dateparse)
data['Hour']=data['Timestamp'].dt.hour
data['Date']=data['Timestamp'].dt.date
data['Date1']=data['Timestamp'].dt.date
data['Porter Hall Electric Real Power']=data['Porter Hall Electric Real Power'].convert_objects(convert_numeric=True)
data


/Users/LIJING/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
Out[34]:
Timestamp Porter Hall Electric Real Power Baker Hall Electric Real Power Hunt Library Real Power Hour Date Date1
0 2015-01-01 00:00:06 54551.0 149552 152521 0 2015-01-01 2015-01-01
1 2015-01-01 00:01:05 57268.0 148793 146870 0 2015-01-01 2015-01-01
2 2015-01-01 00:02:00 58189.0 148086 149532 0 2015-01-01 2015-01-01
3 2015-01-01 00:03:08 56355.0 147411 145890 0 2015-01-01 2015-01-01
4 2015-01-01 00:04:00 56711.0 149034 152569 0 2015-01-01 2015-01-01
5 2015-01-01 00:05:08 56444.0 147423 145508 0 2015-01-01 2015-01-01
6 2015-01-01 00:06:00 56593.0 148155 150128 0 2015-01-01 2015-01-01
7 2015-01-01 00:07:06 56606.0 147818 145987 0 2015-01-01 2015-01-01
8 2015-01-01 00:08:03 58387.0 146809 151267 0 2015-01-01 2015-01-01
9 2015-01-01 00:09:06 56617.0 151880 146085 0 2015-01-01 2015-01-01
10 2015-01-01 00:10:00 56308.0 150355 151698 0 2015-01-01 2015-01-01
11 2015-01-01 00:11:05 57161.0 150266 146373 0 2015-01-01 2015-01-01
12 2015-01-01 00:12:00 57292.0 148040 151500 0 2015-01-01 2015-01-01
13 2015-01-01 00:13:05 55843.0 148777 148352 0 2015-01-01 2015-01-01
14 2015-01-01 00:14:00 56656.0 146893 150116 0 2015-01-01 2015-01-01
15 2015-01-01 00:15:08 55300.0 146391 150649 0 2015-01-01 2015-01-01
16 2015-01-01 00:16:00 55156.0 147141 153569 0 2015-01-01 2015-01-01
17 2015-01-01 00:17:06 54930.0 149311 147945 0 2015-01-01 2015-01-01
18 2015-01-01 00:18:00 56880.0 147869 150942 0 2015-01-01 2015-01-01
19 2015-01-01 00:19:06 56439.0 147047 146198 0 2015-01-01 2015-01-01
20 2015-01-01 00:20:00 57391.0 149503 156537 0 2015-01-01 2015-01-01
21 2015-01-01 00:21:05 57741.0 147432 149721 0 2015-01-01 2015-01-01
22 2015-01-01 00:22:00 57285.0 152046 153104 0 2015-01-01 2015-01-01
23 2015-01-01 00:23:05 56461.0 153298 149867 0 2015-01-01 2015-01-01
24 2015-01-01 00:24:01 56156.0 151125 153481 0 2015-01-01 2015-01-01
25 2015-01-01 00:25:08 56627.0 147600 148540 0 2015-01-01 2015-01-01
26 2015-01-01 00:26:00 57092.0 147154 151880 0 2015-01-01 2015-01-01
27 2015-01-01 00:27:08 57165.0 148690 146421 0 2015-01-01 2015-01-01
28 2015-01-01 00:28:02 56333.0 148437 151952 0 2015-01-01 2015-01-01
29 2015-01-01 00:29:06 55603.0 145835 150322 0 2015-01-01 2015-01-01
... ... ... ... ... ... ... ...
520483 2015-12-31 23:29:00 50880.0 137471 125720 23 2015-12-31 2015-12-31
520484 2015-12-31 23:30:02 50253.0 139148 125013 23 2015-12-31 2015-12-31
520485 2015-12-31 23:31:02 50943.0 138526 113433 23 2015-12-31 2015-12-31
520486 2015-12-31 23:32:00 51239.0 138556 119603 23 2015-12-31 2015-12-31
520487 2015-12-31 23:33:00 51446.0 139841 125968 23 2015-12-31 2015-12-31
520488 2015-12-31 23:34:06 51607.0 139110 124983 23 2015-12-31 2015-12-31
520489 2015-12-31 23:35:00 52400.0 141357 123636 23 2015-12-31 2015-12-31
520490 2015-12-31 23:36:00 52218.0 141877 124951 23 2015-12-31 2015-12-31
520491 2015-12-31 23:37:00 53505.0 142736 126263 23 2015-12-31 2015-12-31
520492 2015-12-31 23:38:00 53748.0 142613 127388 23 2015-12-31 2015-12-31
520493 2015-12-31 23:39:02 53009.0 138078 128552 23 2015-12-31 2015-12-31
520494 2015-12-31 23:40:00 53125.0 138635 117762 23 2015-12-31 2015-12-31
520495 2015-12-31 23:41:00 53162.0 140024 121749 23 2015-12-31 2015-12-31
520496 2015-12-31 23:42:06 53116.0 142648 124869 23 2015-12-31 2015-12-31
520497 2015-12-31 23:43:00 52644.0 140794 120144 23 2015-12-31 2015-12-31
520498 2015-12-31 23:44:00 52293.0 140679 126956 23 2015-12-31 2015-12-31
520499 2015-12-31 23:45:00 51766.0 140794 131881 23 2015-12-31 2015-12-31
520500 2015-12-31 23:46:00 51766.0 141824 126038 23 2015-12-31 2015-12-31
520501 2015-12-31 23:47:02 52158.0 141496 127745 23 2015-12-31 2015-12-31
520502 2015-12-31 23:48:00 52120.0 142062 128102 23 2015-12-31 2015-12-31
520503 2015-12-31 23:49:00 52045.0 139630 126430 23 2015-12-31 2015-12-31
520504 2015-12-31 23:50:00 52665.0 137801 124661 23 2015-12-31 2015-12-31
520505 2015-12-31 23:51:00 50704.0 139021 123706 23 2015-12-31 2015-12-31
520506 2015-12-31 23:52:00 51880.0 138240 123363 23 2015-12-31 2015-12-31
520507 2015-12-31 23:53:00 50774.0 138168 121931 23 2015-12-31 2015-12-31
520508 2015-12-31 23:54:00 50798.0 136795 120350 23 2015-12-31 2015-12-31
520509 2015-12-31 23:55:04 51292.0 138206 125898 23 2015-12-31 2015-12-31
520510 2015-12-31 23:56:00 50723.0 138236 128314 23 2015-12-31 2015-12-31
520511 2015-12-31 23:57:00 51315.0 141507 122713 23 2015-12-31 2015-12-31
520512 2015-12-31 23:58:00 51139.0 142073 122875 23 2015-12-31 2015-12-31

520513 rows × 7 columns

Now reset the index of CSVdata as Timestamp.


In [35]:
CSVdata.set_index('Timestamp', drop=True, append=False, inplace=True, verify_integrity=False)
CSVdata


Out[35]:
Porter Hall Electric Real Power Baker Hall Electric Real Power Hunt Library Real Power
Timestamp
2015-01-01 00:00:06 54551 149552 152521
2015-01-01 00:01:05 57268 148793 146870
2015-01-01 00:02:00 58189 148086 149532
2015-01-01 00:03:08 56355 147411 145890
2015-01-01 00:04:00 56711 149034 152569
2015-01-01 00:05:08 56444 147423 145508
2015-01-01 00:06:00 56593 148155 150128
2015-01-01 00:07:06 56606 147818 145987
2015-01-01 00:08:03 58387 146809 151267
2015-01-01 00:09:06 56617 151880 146085
2015-01-01 00:10:00 56308 150355 151698
2015-01-01 00:11:05 57161 150266 146373
2015-01-01 00:12:00 57292 148040 151500
2015-01-01 00:13:05 55843 148777 148352
2015-01-01 00:14:00 56656 146893 150116
2015-01-01 00:15:08 55300 146391 150649
2015-01-01 00:16:00 55156 147141 153569
2015-01-01 00:17:06 54930 149311 147945
2015-01-01 00:18:00 56880 147869 150942
2015-01-01 00:19:06 56439 147047 146198
2015-01-01 00:20:00 57391 149503 156537
2015-01-01 00:21:05 57741 147432 149721
2015-01-01 00:22:00 57285 152046 153104
2015-01-01 00:23:05 56461 153298 149867
2015-01-01 00:24:01 56156 151125 153481
2015-01-01 00:25:08 56627 147600 148540
2015-01-01 00:26:00 57092 147154 151880
2015-01-01 00:27:08 57165 148690 146421
2015-01-01 00:28:02 56333 148437 151952
2015-01-01 00:29:06 55603 145835 150322
... ... ... ...
2015-12-31 23:29:00 50880 137471 125720
2015-12-31 23:30:02 50253 139148 125013
2015-12-31 23:31:02 50943 138526 113433
2015-12-31 23:32:00 51239 138556 119603
2015-12-31 23:33:00 51446 139841 125968
2015-12-31 23:34:06 51607 139110 124983
2015-12-31 23:35:00 52400 141357 123636
2015-12-31 23:36:00 52218 141877 124951
2015-12-31 23:37:00 53505 142736 126263
2015-12-31 23:38:00 53748 142613 127388
2015-12-31 23:39:02 53009 138078 128552
2015-12-31 23:40:00 53125 138635 117762
2015-12-31 23:41:00 53162 140024 121749
2015-12-31 23:42:06 53116 142648 124869
2015-12-31 23:43:00 52644 140794 120144
2015-12-31 23:44:00 52293 140679 126956
2015-12-31 23:45:00 51766 140794 131881
2015-12-31 23:46:00 51766 141824 126038
2015-12-31 23:47:02 52158 141496 127745
2015-12-31 23:48:00 52120 142062 128102
2015-12-31 23:49:00 52045 139630 126430
2015-12-31 23:50:00 52665 137801 124661
2015-12-31 23:51:00 50704 139021 123706
2015-12-31 23:52:00 51880 138240 123363
2015-12-31 23:53:00 50774 138168 121931
2015-12-31 23:54:00 50798 136795 120350
2015-12-31 23:55:04 51292 138206 125898
2015-12-31 23:56:00 50723 138236 128314
2015-12-31 23:57:00 51315 141507 122713
2015-12-31 23:58:00 51139 142073 122875

520513 rows × 3 columns

Because we are not going to use the data of Baker Hall, therefore we dropped the column of Baker Hall consumption.


In [36]:
CSVdata.drop('Baker Hall Electric Real Power',axis=1, inplace=True)
CSVdata['Porter Hall Electric Real Power'] = CSVdata['Porter Hall Electric Real Power'].convert_objects(convert_numeric=True)


/Users/LIJING/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  from ipykernel import kernelapp as app

Because the data is to numerous, therefore we resampled the data with 5 minutes period.


In [37]:
resampled_data=CSVdata.resample('5T').mean()
resampled_data


Out[37]:
Porter Hall Electric Real Power Hunt Library Real Power
Timestamp
2015-01-01 00:00:00 56614.80 149476.4
2015-01-01 00:05:00 56929.40 147795.0
2015-01-01 00:10:00 56652.00 149607.8
2015-01-01 00:15:00 55741.00 149860.6
2015-01-01 00:20:00 57006.80 152542.0
2015-01-01 00:25:00 56564.00 149823.0
2015-01-01 00:30:00 54614.60 155550.6
2015-01-01 00:35:00 54623.60 151818.0
2015-01-01 00:40:00 55370.60 153877.6
2015-01-01 00:45:00 55230.00 153271.0
2015-01-01 00:50:00 54491.00 152274.6
2015-01-01 00:55:00 55341.60 151356.4
2015-01-01 01:00:00 54539.60 149731.0
2015-01-01 01:05:00 54509.40 150958.8
2015-01-01 01:10:00 57155.20 147516.6
2015-01-01 01:15:00 55685.60 154285.0
2015-01-01 01:20:00 54039.60 148446.8
2015-01-01 01:25:00 54632.00 149721.2
2015-01-01 01:30:00 54279.20 150564.4
2015-01-01 01:35:00 53868.20 148130.4
2015-01-01 01:40:00 55171.60 147175.6
2015-01-01 01:45:00 55897.80 148144.6
2015-01-01 01:50:00 54900.60 151227.2
2015-01-01 01:55:00 54567.00 145441.6
2015-01-01 02:00:00 55114.20 147959.8
2015-01-01 02:05:00 55583.40 150427.6
2015-01-01 02:10:00 54654.60 149593.6
2015-01-01 02:15:00 54990.00 149429.2
2015-01-01 02:20:00 54794.20 151820.4
2015-01-01 02:25:00 55208.40 152175.0
... ... ...
2015-12-31 21:30:00 58556.00 128169.4
2015-12-31 21:35:00 59042.40 127035.4
2015-12-31 21:40:00 58100.20 125464.6
2015-12-31 21:45:00 58423.60 128556.0
2015-12-31 21:50:00 59599.00 123430.0
2015-12-31 21:55:00 58212.00 127371.4
2015-12-31 22:00:00 56275.80 128514.2
2015-12-31 22:05:00 53522.25 128048.6
2015-12-31 22:10:00 53445.60 128685.8
2015-12-31 22:15:00 53677.80 128641.6
2015-12-31 22:20:00 53814.00 123180.2
2015-12-31 22:25:00 53268.20 124295.6
2015-12-31 22:30:00 52882.20 124192.6
2015-12-31 22:35:00 51636.20 125251.4
2015-12-31 22:40:00 52695.60 124424.6
2015-12-31 22:45:00 52810.80 130240.2
2015-12-31 22:50:00 51941.60 123757.2
2015-12-31 22:55:00 53595.60 127716.0
2015-12-31 23:00:00 51287.20 128440.2
2015-12-31 23:05:00 50917.00 128020.4
2015-12-31 23:10:00 52396.60 121797.0
2015-12-31 23:15:00 50589.60 129947.0
2015-12-31 23:20:00 51112.40 125263.4
2015-12-31 23:25:00 52045.60 126230.4
2015-12-31 23:30:00 51097.60 121800.0
2015-12-31 23:35:00 52976.00 126158.0
2015-12-31 23:40:00 52868.00 122296.0
2015-12-31 23:45:00 51971.00 128039.2
2015-12-31 23:50:00 51364.20 122802.2
2015-12-31 23:55:00 51117.25 124950.0

105120 rows × 2 columns

There are some Nulls in the dataset, we used the interpolate method to filled these null


In [38]:
filled_data=resampled_data.interpolate()
filled_data.isnull().sum().sum()


Out[38]:
0

Now we use the dataset grouped by date to plot the dailt elecricty consumption of the Porter Hall and Hunt Library.


In [39]:
fig1=plt.figure(figsize=(10,5))
plt.plot(filled_data['Porter Hall Electric Real Power'])
plt.title('Porter Hall daily electricity consumption')
plt.show()

fig2=plt.figure(figsize=(10,5))
plt.title('Hunt Library daily electricity consumption')
plt.plot(filled_data['Hunt Library Real Power'])
plt.show()


Now we use the data grouped by Hour to plot the hourly consumption of Porter Hall and Hunt Library.


In [40]:
data_groupbyHour=data.groupby(['Hour']).mean()
data_groupbyHour


Out[40]:
Porter Hall Electric Real Power Hunt Library Real Power
Hour
0 69502.823964 181685
1 67794.910638 180643
2 66911.079051 180475
3 65536.111444 173783
4 65310.150892 172685
5 64615.124793 178130
6 65101.962983 177857
7 66965.664360 181316
8 77209.325121 196087
9 87660.726070 205171
10 94414.706444 209356
11 98291.234657 213323
12 99176.429833 216957
13 100450.078837 218823
14 101052.946365 219597
15 100315.411708 218055
16 96364.562059 214733
17 88223.559841 198038
18 82053.198281 186075
19 78341.489107 182305
20 76343.025508 180746
21 74548.328054 174034
22 70800.248928 172802
23 70548.567422 176283

In [41]:
plt.plot(data_groupbyHour['Porter Hall Electric Real Power'])
plt.title('Porter Hall hourly consumption')
plt.xlabel('Hour')
plt.ylabel('Porter Hall Electric Real Power')
plt.show()

plt.plot(data_groupbyHour['Hunt Library Real Power'])
plt.title('Hunt Library hourly consumption')
plt.xlabel('Hour')
plt.ylabel('Hunt Library Real Powe')
plt.show()


We plot the hourly consumption of both dataset in one figure in order to compared the trend of the electric consumption.


In [42]:
fig6=plt.figure()
ax1=plt.subplot()
ax1.plot(data_groupbyHour['Porter Hall Electric Real Power'],color='b')
plt.ylabel('Porter Hall Electric Real Power')
plt.xlabel('Hour')

ax2=ax1.twinx()
ax2.plot(data_groupbyHour['Hunt Library Real Power'],color='r')
plt.ylabel('Hunt Library Real Power')
plt.xlabel('Hour')
plt.legend()
plt.show()


Now we use the data grouped by date to plot the daily consumption of Porter Hall and Hunt Library.


In [72]:
data_groupbyDate=data.groupby(['Date']).mean()
data_groupbyDate


Out[72]:
Porter Hall Electric Real Power Hunt Library Real Power Hour DayOfYear
Date
2015-01-01 56448.404167 147581.155556 11.500000 1.0
2015-01-02 62028.961806 148426.591667 11.500000 2.0
2015-01-03 56550.805285 147290.475330 11.500347 3.0
2015-01-04 57441.488889 146173.771528 11.500000 4.0
2015-01-05 73615.118750 197326.864583 11.500000 5.0
2015-01-06 73268.491975 194809.591068 11.482903 6.0
2015-01-07 69641.068056 196610.704861 11.500000 7.0
2015-01-08 74347.162500 175815.981250 11.500000 8.0
2015-01-09 78396.432639 165250.492361 11.500000 9.0
2015-01-10 57756.335883 131297.517721 11.492703 10.0
2015-01-11 58981.716667 128374.602083 11.500000 11.0
2015-01-12 87356.511111 188938.523611 11.500000 12.0
2015-01-13 87241.436806 211731.209028 11.500000 13.0
2015-01-14 87847.725313 211778.278164 11.499305 14.0
2015-01-15 87365.122917 215726.036111 11.500000 15.0
2015-01-16 86097.918750 206990.187500 11.500000 16.0
2015-01-17 66559.795139 173205.467361 11.500000 17.0
2015-01-18 61884.872917 174549.265278 11.500000 18.0
2015-01-19 67360.597222 131992.944444 11.500000 19.0
2015-01-20 85724.481250 194118.202778 11.500000 20.0
2015-01-21 85399.337274 206404.685893 11.492703 21.0
2015-01-22 85099.566667 210714.389583 11.500000 22.0
2015-01-23 85206.883333 204861.573611 11.500000 23.0
2015-01-24 62088.511806 171367.679861 11.500000 24.0
2015-01-25 64457.419444 172836.426389 11.500000 25.0
2015-01-26 83532.484722 197523.806944 11.500000 26.0
2015-01-27 87935.520833 209502.118056 11.500000 27.0
2015-01-28 87129.358333 208853.832639 11.500000 28.0
2015-01-29 88243.631944 208687.975694 11.500000 29.0
2015-01-30 84718.001389 203338.872222 11.500000 30.0
... ... ... ... ...
2015-12-02 93764.452514 202148.428771 11.516061 336.0
2015-12-03 93628.952778 207891.806250 11.500000 337.0
2015-12-04 89625.128895 204492.293201 11.429887 338.0
2015-12-05 72604.684284 176128.610570 11.501391 339.0
2015-12-06 71944.653926 170288.569145 11.499653 340.0
2015-12-07 91918.369006 198409.402363 11.495483 341.0
2015-12-08 94024.720251 210637.267223 11.494781 342.0
2015-12-09 92274.079443 211961.002091 11.508014 343.0
2015-12-10 93718.437500 209785.235417 11.500000 344.0
2015-12-11 91097.193885 206052.881167 11.502432 345.0
2015-12-12 74620.874218 179123.250174 11.494093 346.0
2015-12-13 79296.964485 184162.367688 11.510446 347.0
2015-12-14 93182.748227 206893.586525 11.494326 348.0
2015-12-15 87791.860821 210655.852470 11.504523 349.0
2015-12-16 87926.253138 211491.835425 11.519526 350.0
2015-12-17 89369.993026 212032.808926 11.486750 351.0
2015-12-18 81966.878746 208322.338676 11.483624 352.0
2015-12-19 61800.077832 178030.560111 11.507992 353.0
2015-12-20 60448.331250 177173.485417 11.500000 354.0
2015-12-21 68027.048780 183794.833449 11.494077 355.0
2015-12-22 68576.891911 170363.214784 11.506276 356.0
2015-12-23 66128.321528 153850.788889 11.500000 357.0
2015-12-24 61267.625174 117784.982615 11.515994 358.0
2015-12-25 57965.602091 116972.173519 11.506620 359.0
2015-12-26 54310.827658 117294.430855 11.499653 360.0
2015-12-27 55528.938761 119506.092425 11.505212 361.0
2015-12-28 59526.125283 161693.577677 11.758673 362.0
2015-12-29 60415.849306 160985.225000 11.500000 363.0
2015-12-30 59218.091794 157181.687283 11.501042 364.0
2015-12-31 56123.254368 123032.353023 11.492008 365.0

363 rows × 4 columns


In [44]:
fig3=plt.figure(figsize=(12,5))
plt.plot(data_groupbyDate['Porter Hall Electric Real Power'])
plt.title('Porter Hall daily consumption')
plt.ylabel('Porter Hall Electric Real Power')
plt.show()

fig4=plt.figure(figsize=(12,5))
plt.title('Hunt Library daily consumption')
plt.plot(data_groupbyDate['Hunt Library Real Power'])
plt.ylabel('Hunt Library Electric Real Power')
plt.show()


We plot the daily consumption of both dataset in one figure in order to compared the trend of the electric consumption.


In [45]:
fig5=plt.figure(figsize=(12,5))
ax1=plt.subplot()
ax1.plot(data_groupbyDate['Porter Hall Electric Real Power'],color='b')
plt.ylabel('Porter Hall daily consumption')
plt.xlabel('Date')

ax2=ax1.twinx()
ax2.plot(data_groupbyDate['Hunt Library Real Power'],color='r')
plt.ylabel('Hunt Library daily consumption')
plt.xlabel('Date')
plt.legend()
plt.show()


Now we are going to plot the heat map of the electric consumption of both dataset.


In [46]:
data['DayOfYear'] = data['Timestamp'].dt.dayofyear
loadCurves1 = data.groupby(['DayOfYear', 'Hour'])['Porter Hall Electric Real Power'].mean().unstack()
loadCurves2 = data.groupby(['DayOfYear', 'Hour'])['Hunt Library Real Power'].mean().unstack()

In [47]:
import matplotlib.colors as clrs
plt.imshow(loadCurves1, aspect='auto',cmap='summer')
plt.title('Heatmap of Porter Hall Electric Consumption')
plt.ylabel('Day of Year')
plt.xlabel('Hour of the Day')
plt.colorbar()


Out[47]:
<matplotlib.colorbar.Colorbar at 0x11a355630>

In [48]:
plt.imshow(loadCurves2, aspect='auto',cmap='summer')
plt.title('Heatmap of Hunt Library Electric Consumption')
plt.ylabel('Day of Year')
plt.xlabel('Hour of the Day')
plt.colorbar()


Out[48]:
<matplotlib.colorbar.Colorbar at 0x12212a630>

In [71]:
data_groupbyDate


Out[71]:
Porter Hall Electric Real Power Hunt Library Real Power Hour DayOfYear
Date
2015-01-01 56448.404167 147581.155556 11.500000 1.0
2015-01-02 62028.961806 148426.591667 11.500000 2.0
2015-01-03 56550.805285 147290.475330 11.500347 3.0
2015-01-04 57441.488889 146173.771528 11.500000 4.0
2015-01-05 73615.118750 197326.864583 11.500000 5.0
2015-01-06 73268.491975 194809.591068 11.482903 6.0
2015-01-07 69641.068056 196610.704861 11.500000 7.0
2015-01-08 74347.162500 175815.981250 11.500000 8.0
2015-01-09 78396.432639 165250.492361 11.500000 9.0
2015-01-10 57756.335883 131297.517721 11.492703 10.0
2015-01-11 58981.716667 128374.602083 11.500000 11.0
2015-01-12 87356.511111 188938.523611 11.500000 12.0
2015-01-13 87241.436806 211731.209028 11.500000 13.0
2015-01-14 87847.725313 211778.278164 11.499305 14.0
2015-01-15 87365.122917 215726.036111 11.500000 15.0
2015-01-16 86097.918750 206990.187500 11.500000 16.0
2015-01-17 66559.795139 173205.467361 11.500000 17.0
2015-01-18 61884.872917 174549.265278 11.500000 18.0
2015-01-19 67360.597222 131992.944444 11.500000 19.0
2015-01-20 85724.481250 194118.202778 11.500000 20.0
2015-01-21 85399.337274 206404.685893 11.492703 21.0
2015-01-22 85099.566667 210714.389583 11.500000 22.0
2015-01-23 85206.883333 204861.573611 11.500000 23.0
2015-01-24 62088.511806 171367.679861 11.500000 24.0
2015-01-25 64457.419444 172836.426389 11.500000 25.0
2015-01-26 83532.484722 197523.806944 11.500000 26.0
2015-01-27 87935.520833 209502.118056 11.500000 27.0
2015-01-28 87129.358333 208853.832639 11.500000 28.0
2015-01-29 88243.631944 208687.975694 11.500000 29.0
2015-01-30 84718.001389 203338.872222 11.500000 30.0
... ... ... ... ...
2015-12-02 93764.452514 202148.428771 11.516061 336.0
2015-12-03 93628.952778 207891.806250 11.500000 337.0
2015-12-04 89625.128895 204492.293201 11.429887 338.0
2015-12-05 72604.684284 176128.610570 11.501391 339.0
2015-12-06 71944.653926 170288.569145 11.499653 340.0
2015-12-07 91918.369006 198409.402363 11.495483 341.0
2015-12-08 94024.720251 210637.267223 11.494781 342.0
2015-12-09 92274.079443 211961.002091 11.508014 343.0
2015-12-10 93718.437500 209785.235417 11.500000 344.0
2015-12-11 91097.193885 206052.881167 11.502432 345.0
2015-12-12 74620.874218 179123.250174 11.494093 346.0
2015-12-13 79296.964485 184162.367688 11.510446 347.0
2015-12-14 93182.748227 206893.586525 11.494326 348.0
2015-12-15 87791.860821 210655.852470 11.504523 349.0
2015-12-16 87926.253138 211491.835425 11.519526 350.0
2015-12-17 89369.993026 212032.808926 11.486750 351.0
2015-12-18 81966.878746 208322.338676 11.483624 352.0
2015-12-19 61800.077832 178030.560111 11.507992 353.0
2015-12-20 60448.331250 177173.485417 11.500000 354.0
2015-12-21 68027.048780 183794.833449 11.494077 355.0
2015-12-22 68576.891911 170363.214784 11.506276 356.0
2015-12-23 66128.321528 153850.788889 11.500000 357.0
2015-12-24 61267.625174 117784.982615 11.515994 358.0
2015-12-25 57965.602091 116972.173519 11.506620 359.0
2015-12-26 54310.827658 117294.430855 11.499653 360.0
2015-12-27 55528.938761 119506.092425 11.505212 361.0
2015-12-28 59526.125283 161693.577677 11.758673 362.0
2015-12-29 60415.849306 160985.225000 11.500000 363.0
2015-12-30 59218.091794 157181.687283 11.501042 364.0
2015-12-31 56123.254368 123032.353023 11.492008 365.0

363 rows × 4 columns

Now we are using the regression tree to analyze the data.


In [81]:
def plot_regdataOfPorter():
    plt.plot(data_groupbyDate['DayOfYear'],data_groupbyDate['Porter Hall Electric Real Power'],'rd')
    plt.xlabel('DayOfYear')
    plt.ylabel('Porter Hall Electric Real Power')
def plot_regdataOfHunt():
    plt.plot(data_groupbyDate['DayOfYear'],data_groupbyDate['Hunt Library Real Power'],'rd')
    plt.xlabel('DayOfYear')
    plt.ylabel('Hunt Library Real Power')

In [85]:
from sklearn import tree
x = data_groupbyDate['DayOfYear']
y = data_groupbyDate['Porter Hall Electric Real Power']
xrange = np.arange(x.min(),x.max(),(x.max()-x.min())/100).reshape(100,1)
x = x[:, None]
reg = tree.DecisionTreeRegressor() # Default parameters, though you can tweak these!
reg.fit(x,y)

plot_regdataOfPorter()
plt.title('Regression of Porter Hall Electric Consumption')
plt.plot(xrange,reg.predict(xrange),'b--',linewidth=3)
plt.show()



In [86]:
print(reg.score(x,y))


1.0

In [87]:
x = data_groupbyDate['DayOfYear']
y = data_groupbyDate['Hunt Library Real Power']
xrange = np.arange(x.min(),x.max(),(x.max()-x.min())/100).reshape(100,1)
x = x[:, None]
reg = tree.DecisionTreeRegressor() # Default parameters, though you can tweak these!
reg.fit(x,y)

plot_regdataOfHunt()
plt.title('Regression of Hunt Library Consumption')
plt.plot(xrange,reg.predict(xrange),'b--',linewidth=3)
plt.show()



In [88]:
print(reg.score(x,y))


1.0

In [ ]:


In [ ]: