In [1]:
%matplotlib inline

import datetime
import json 
import math
import random
import operator
import os
from calendar import monthrange

import pandas as pd
import numpy as np

import sqlalchemy as sa
from sqlalchemy import *

import matplotlib 
import matplotlib.pyplot as pyplt
from pylab import *

data_directory = '../data/common_appliances/'

Most common appliances


In [2]:
# to find days in a certain month for a certain year:
monthrange(2011, 2)


Out[2]:
(1, 28)

In [3]:
# [SENSITIVE] Create database engine
user_name = 'USERNAME'
pw='PASSWORD'
host = "db.wiki-energy.org"
port = "5432"
db = "postgres"
url = "postgresql"+"://"+user_name+":"+pw+"@"+host+":"+port+"/"+db

eng = create_engine(url)

In [4]:
# runs in < 1s
def get_unique_dataids(eng,group,month):
    '''gets all dataids for a specifc group(int) for a specific month (int)'''
    query = 'select distinct dataid from "PecanStreet_CuratedSets".group{0}_disaggregated_2013_{1:02d}'.format(group,month)
    eng_object=eng.execute(query)
    df = pd.DataFrame.from_records(eng_object.fetchall())
    return df[0]

In [5]:
dataids = get_unique_dataids(eng,1,1)
dataids[0]


Out[5]:
5949

In [6]:
# takes about 5s to run
def get_home_month(eng,group,year,month,dataid):
    '''Returns a dictionary where the keys are the homes and the values are 
    dataframe represenations oft the tables associated with that home
    '''
    
    query = 'select * from "PecanStreet_CuratedSets".group{0}_disaggregated_{1}_{2:02d} where dataid={3}'.format(group,year,month,dataid)
    eng_object=eng.execute(query)
    df = pd.DataFrame.from_records(eng_object.fetchall())
    df.columns = eng_object.keys()
    return df

In [8]:
# get all houses data for january. takes about 2 min
houses_data = [get_home_month(eng,1,2013,1,dataid) for dataid in dataids]

In [10]:
# get data for a single home for a whole year, by month (no december data). takes about 1 min
house_1_2013 = [get_home_month(eng,1,2013,month,dataids[0]) for month in range(1,12)]

In [11]:
houses_data[0]


Out[11]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2976 entries, 0 to 2975
Data columns (total 74 columns):
id                      2976  non-null values
dataid                  2976  non-null values
utc_15min               2976  non-null values
air1                    2976  non-null values
air2                    2976  non-null values
air3                    2976  non-null values
airwindowunit1          2976  non-null values
aquarium1               2976  non-null values
bathroom1               2976  non-null values
bathroom2               2976  non-null values
bedroom1                2976  non-null values
bedroom2                2976  non-null values
bedroom3                2976  non-null values
bedroom4                2976  non-null values
bedroom5                2976  non-null values
car1                    0  non-null values
clotheswasher1          2976  non-null values
clotheswasher_dryg1     2976  non-null values
diningroom1             2976  non-null values
diningroom2             2976  non-null values
dishwasher1             2976  non-null values
disposal1               2976  non-null values
drye1                   2976  non-null values
dryg1                   2976  non-null values
freezer1                2976  non-null values
furnace1                2976  non-null values
furnace2                2976  non-null values
garage1                 2976  non-null values
garage2                 2976  non-null values
heater1                 2976  non-null values
housefan1               2976  non-null values
icemaker1               2976  non-null values
jacuzzi1                2976  non-null values
kitchen1                2976  non-null values
kitchen2                2976  non-null values
kitchenapp1             2976  non-null values
kitchenapp2             2976  non-null values
lights_plugs1           2976  non-null values
lights_plugs2           2976  non-null values
lights_plugs3           2976  non-null values
lights_plugs4           2976  non-null values
lights_plugs5           2976  non-null values
lights_plugs6           2976  non-null values
livingroom1             2976  non-null values
livingroom2             2976  non-null values
microwave1              2976  non-null values
office1                 2976  non-null values
outsidelights_plugs1    2976  non-null values
outsidelights_plugs2    2976  non-null values
oven1                   2976  non-null values
oven2                   2976  non-null values
pool1                   2976  non-null values
pool2                   2976  non-null values
poollight1              2976  non-null values
poolpump1               2976  non-null values
pump1                   2976  non-null values
range1                  2976  non-null values
refrigerator1           2976  non-null values
refrigerator2           2976  non-null values
security1               2976  non-null values
shed1                   2976  non-null values
sprinkler1              2976  non-null values
subpanel1               2976  non-null values
subpanel2               2976  non-null values
unknown1                2976  non-null values
unknown2                2976  non-null values
unknown3                2976  non-null values
unknown4                2976  non-null values
use                     2976  non-null values
utilityroom1            2976  non-null values
venthood1               2976  non-null values
waterheater1            2976  non-null values
waterheater2            2976  non-null values
winecooler1             2976  non-null values
dtypes: int64(2), object(72)

In [12]:
def get_total_disaggregated_usage(house_data):
    '''Given a dataframe of 15min data for a house, sums all columns corresponding to appliances.
    '''
    # drop everything that's not an appliance
    house_data = house_data.drop(['dataid','id','utc_15min','subpanel1','subpanel2','use'],axis=1)
    
    # sum each column
    totals = house_data.sum().fillna(0.)
    
    return totals

In [14]:
# for each house, compute sum for a single month. Takes about 2 min
house_sums = [get_total_disaggregated_usage(house_data) for house_data in houses_data]

In [15]:
# for one house, compute monthly sums
house_1_2013_monthly_sums = [get_total_disaggregated_usage(house_month) for house_month in house_1_2013]

In [16]:
print len(house_sums)
print len(house_1_2013_monthly_sums)


25
11

In [17]:
# sort all house_sums so that highest power appliances are at the top.
for house_sum in house_sums:
    house_sum.sort(ascending=False)

In [18]:
# sum all months
def sum_monthly_sums(month_sums):
    '''Given a list of appliance usage sum series, return a single series of the total sums
    '''
    sums = month_sums[0]
    for month_sum in month_sums[1:]:
        sums += month_sum
    return sums

In [19]:
house_appliance_totals = sum_monthly_sums(house_1_2013_monthly_sums)
house_appliance_totals.sort(ascending=False)
house_appliance_totals


Out[19]:
air1             13096.111
kitchenapp1       1681.102
lights_plugs1      520.677
dishwasher1        464.390
oven1              344.291
bedroom1            88.404
disposal1            0.366
jacuzzi1             0.000
icemaker1            0.000
housefan1            0.000
heater1              0.000
garage1              0.000
furnace2             0.000
furnace1             0.000
freezer1             0.000
...
poollight1              0
pool2                   0
pool1                   0
oven2                   0
outsidelights_plugs2    0
outsidelights_plugs1    0
office1                 0
microwave1              0
livingroom2             0
livingroom1             0
lights_plugs6           0
lights_plugs5           0
lights_plugs4           0
lights_plugs3           0
kitchenapp2             0
Length: 68, dtype: float64

In [20]:
def get_top_k_appliances(house_appliance_totals,k):
    '''Get the top k appliances for a given summed series.'''
    return house_appliance_totals[:k].keys()

In [21]:
top_5_appliances = get_top_k_appliances(house_appliance_totals,5)

In [22]:
def get_usage_for_appliances(house_sums,appliances):
    return [house_sum[appliances] for house_sum in house_sums]

In [23]:
top_5_appliance_usages = get_usage_for_appliances(house_1_2013_monthly_sums,top_5_appliances)

In [164]:
def get_usage_csv(appliance_usages,default_day=1,month_range=(1,13),year=2013):
    # organize data
    keys = appliance_usages[0].keys()
    appliances = {key:[] for key in keys}
    for usages in appliance_usages:
        for appliance in keys:
            appliances[appliance].append(usages[appliance])
    
    # create csv string
    csv_string = "key,value,date\n"
    for key in keys:
        for month in range(*month_range):
            csv_string += ','.join([key,str(appliances[key][month-1/1]),'{0:02d}/{1:02d}/{2:02d}'.format(month,default_day,year%100)]) + '\n'
    return csv_string

In [26]:
csv_string = get_usage_csv(top_5_appliance_usages,month_range=(1,12))

In [27]:
print csv_string


key,value,date
air1,0.003,01/01/13
air1,0.0,02/01/13
air1,0.003,03/01/13
air1,135.839,04/01/13
air1,1110.492,05/01/13
air1,2297.781,06/01/13
air1,3035.443,07/01/13
air1,3813.852,08/01/13
air1,2207.333,09/01/13
air1,489.391,10/01/13
air1,5.974,11/01/13
kitchenapp1,166.286,01/01/13
kitchenapp1,143.105,02/01/13
kitchenapp1,137.548,03/01/13
kitchenapp1,153.873,04/01/13
kitchenapp1,156.753,05/01/13
kitchenapp1,148.582,06/01/13
kitchenapp1,163.429,07/01/13
kitchenapp1,165.57,08/01/13
kitchenapp1,162.922,09/01/13
kitchenapp1,123.476,10/01/13
kitchenapp1,159.558,11/01/13
lights_plugs1,47.973,01/01/13
lights_plugs1,44.92,02/01/13
lights_plugs1,51.521,03/01/13
lights_plugs1,40.575,04/01/13
lights_plugs1,71.041,05/01/13
lights_plugs1,34.916,06/01/13
lights_plugs1,35.488,07/01/13
lights_plugs1,58.462,08/01/13
lights_plugs1,40.384,09/01/13
lights_plugs1,39.319,10/01/13
lights_plugs1,56.078,11/01/13
dishwasher1,40.754,01/01/13
dishwasher1,42.99,02/01/13
dishwasher1,52.102,03/01/13
dishwasher1,39.346,04/01/13
dishwasher1,44.994,05/01/13
dishwasher1,34.017,06/01/13
dishwasher1,43.723,07/01/13
dishwasher1,51.436,08/01/13
dishwasher1,38.059,09/01/13
dishwasher1,30.583,10/01/13
dishwasher1,46.386,11/01/13
oven1,40.524,01/01/13
oven1,38.607,02/01/13
oven1,35.514,03/01/13
oven1,17.228,04/01/13
oven1,20.491,05/01/13
oven1,14.462,06/01/13
oven1,25.896,07/01/13
oven1,22.424,08/01/13
oven1,38.707,09/01/13
oven1,27.203,10/01/13
oven1,63.235,11/01/13


In [36]:
def get_home_summary_csv(eng,group,dataid,year,months,k=5):
    data = [get_home_month(eng,group,year,month,dataid) for month in months]
    monthly_sums = [get_total_disaggregated_usage(house_month) for house_month in data]
    appliance_totals = sum_monthly_sums(monthly_sums)
    appliance_totals.sort(ascending=False)
    appliance_totals
    top_k_appliances = get_top_k_appliances(appliance_totals,k)
    top_k_appliance_usages = get_usage_for_appliances(monthly_sums,top_k_appliances)
    csv_string = get_usage_csv(top_k_appliance_usages)
    return csv_string

In [37]:
# takes about 4 minutes per home
for dataid in dataids[:10]:
    if dataid == 9937:
        print "skipping 9937"
        print
        continue
    csv_string = get_home_summary_csv(eng,1,dataid,2013,range(1,12),6)
    print dataid
    print csv_string
    print


5949
key,value,date
air1,0.003,01/01/13
air1,0.0,02/01/13
air1,0.003,03/01/13
air1,135.839,04/01/13
air1,1110.492,05/01/13
air1,2297.781,06/01/13
air1,3035.443,07/01/13
air1,3813.852,08/01/13
air1,2207.333,09/01/13
air1,489.391,10/01/13
air1,5.974,11/01/13
kitchenapp1,166.286,01/01/13
kitchenapp1,143.105,02/01/13
kitchenapp1,137.548,03/01/13
kitchenapp1,153.873,04/01/13
kitchenapp1,156.753,05/01/13
kitchenapp1,148.582,06/01/13
kitchenapp1,163.429,07/01/13
kitchenapp1,165.57,08/01/13
kitchenapp1,162.922,09/01/13
kitchenapp1,123.476,10/01/13
kitchenapp1,159.558,11/01/13
lights_plugs1,47.973,01/01/13
lights_plugs1,44.92,02/01/13
lights_plugs1,51.521,03/01/13
lights_plugs1,40.575,04/01/13
lights_plugs1,71.041,05/01/13
lights_plugs1,34.916,06/01/13
lights_plugs1,35.488,07/01/13
lights_plugs1,58.462,08/01/13
lights_plugs1,40.384,09/01/13
lights_plugs1,39.319,10/01/13
lights_plugs1,56.078,11/01/13
dishwasher1,40.754,01/01/13
dishwasher1,42.99,02/01/13
dishwasher1,52.102,03/01/13
dishwasher1,39.346,04/01/13
dishwasher1,44.994,05/01/13
dishwasher1,34.017,06/01/13
dishwasher1,43.723,07/01/13
dishwasher1,51.436,08/01/13
dishwasher1,38.059,09/01/13
dishwasher1,30.583,10/01/13
dishwasher1,46.386,11/01/13
oven1,40.524,01/01/13
oven1,38.607,02/01/13
oven1,35.514,03/01/13
oven1,17.228,04/01/13
oven1,20.491,05/01/13
oven1,14.462,06/01/13
oven1,25.896,07/01/13
oven1,22.424,08/01/13
oven1,38.707,09/01/13
oven1,27.203,10/01/13
oven1,63.235,11/01/13
bedroom1,2.619,01/01/13
bedroom1,2.624,02/01/13
bedroom1,5.259,03/01/13
bedroom1,8.926,04/01/13
bedroom1,11.463,05/01/13
bedroom1,10.036,06/01/13
bedroom1,11.52,07/01/13
bedroom1,10.832,08/01/13
bedroom1,9.595,09/01/13
bedroom1,6.711,10/01/13
bedroom1,8.819,11/01/13


2158
key,value,date
air1,150.315,01/01/13
air1,145.826,02/01/13
air1,505.566,03/01/13
air1,906.695,04/01/13
air1,1898.723,05/01/13
air1,3039.759,06/01/13
air1,3056.591,07/01/13
air1,3597.661,08/01/13
air1,2632.359,09/01/13
air1,1174.752,10/01/13
air1,323.458,11/01/13
furnace1,601.694,01/01/13
furnace1,610.694,02/01/13
furnace1,725.65,03/01/13
furnace1,865.163,04/01/13
furnace1,1047.776,05/01/13
furnace1,1431.758,06/01/13
furnace1,1441.244,07/01/13
furnace1,1612.961,08/01/13
furnace1,1334.7,09/01/13
furnace1,723.652,10/01/13
furnace1,541.004,11/01/13
refrigerator1,574.315,01/01/13
refrigerator1,541.815,02/01/13
refrigerator1,615.876,03/01/13
refrigerator1,593.741,04/01/13
refrigerator1,613.825,05/01/13
refrigerator1,553.182,06/01/13
refrigerator1,462.975,07/01/13
refrigerator1,455.172,08/01/13
refrigerator1,420.578,09/01/13
refrigerator1,316.31,10/01/13
refrigerator1,363.734,11/01/13
drye1,501.597,01/01/13
drye1,397.008,02/01/13
drye1,359.05,03/01/13
drye1,313.707,04/01/13
drye1,331.753,05/01/13
drye1,178.47,06/01/13
drye1,182.267,07/01/13
drye1,236.828,08/01/13
drye1,259.949,09/01/13
drye1,214.743,10/01/13
drye1,405.263,11/01/13
lights_plugs1,202.97,01/01/13
lights_plugs1,123.14,02/01/13
lights_plugs1,92.234,03/01/13
lights_plugs1,124.605,04/01/13
lights_plugs1,180.443,05/01/13
lights_plugs1,218.36,06/01/13
lights_plugs1,185.868,07/01/13
lights_plugs1,190.688,08/01/13
lights_plugs1,215.94,09/01/13
lights_plugs1,103.406,10/01/13
lights_plugs1,0.0,11/01/13
outsidelights_plugs1,154.938,01/01/13
outsidelights_plugs1,114.019,02/01/13
outsidelights_plugs1,146.495,03/01/13
outsidelights_plugs1,130.285,04/01/13
outsidelights_plugs1,117.087,05/01/13
outsidelights_plugs1,118.909,06/01/13
outsidelights_plugs1,153.356,07/01/13
outsidelights_plugs1,98.274,08/01/13
outsidelights_plugs1,158.146,09/01/13
outsidelights_plugs1,76.077,10/01/13
outsidelights_plugs1,209.279,11/01/13


5785
key,value,date
air1,0.255,01/01/13
air1,0.384,02/01/13
air1,18.972,03/01/13
air1,243.643,04/01/13
air1,916.355,05/01/13
air1,2540.386,06/01/13
air1,2662.649,07/01/13
air1,3114.385,08/01/13
air1,2277.589,09/01/13
air1,593.993,10/01/13
air1,26.059,11/01/13
drye1,93.909,01/01/13
drye1,144.942,02/01/13
drye1,133.271,03/01/13
drye1,173.596,04/01/13
drye1,111.328,05/01/13
drye1,137.928,06/01/13
drye1,90.398,07/01/13
drye1,113.487,08/01/13
drye1,102.236,09/01/13
drye1,68.328,10/01/13
drye1,151.708,11/01/13
garage1,0.0,01/01/13
garage1,0.0,02/01/13
garage1,0.0,03/01/13
garage1,0.0,04/01/13
garage1,0.0,05/01/13
garage1,0.0,06/01/13
garage1,0.0,07/01/13
garage1,0.0,08/01/13
garage1,0.0,09/01/13
garage1,0.0,10/01/13
garage1,0.0,11/01/13
disposal1,0.0,01/01/13
disposal1,0.0,02/01/13
disposal1,0.0,03/01/13
disposal1,0.0,04/01/13
disposal1,0.0,05/01/13
disposal1,0.0,06/01/13
disposal1,0.0,07/01/13
disposal1,0.0,08/01/13
disposal1,0.0,09/01/13
disposal1,0.0,10/01/13
disposal1,0.0,11/01/13
dryg1,0.0,01/01/13
dryg1,0.0,02/01/13
dryg1,0.0,03/01/13
dryg1,0.0,04/01/13
dryg1,0.0,05/01/13
dryg1,0.0,06/01/13
dryg1,0.0,07/01/13
dryg1,0.0,08/01/13
dryg1,0.0,09/01/13
dryg1,0.0,10/01/13
dryg1,0.0,11/01/13
freezer1,0.0,01/01/13
freezer1,0.0,02/01/13
freezer1,0.0,03/01/13
freezer1,0.0,04/01/13
freezer1,0.0,05/01/13
freezer1,0.0,06/01/13
freezer1,0.0,07/01/13
freezer1,0.0,08/01/13
freezer1,0.0,09/01/13
freezer1,0.0,10/01/13
freezer1,0.0,11/01/13


9737
key,value,date
air1,12.283,01/01/13
air1,10.142,02/01/13
air1,190.603,03/01/13
air1,609.283,04/01/13
air1,2270.126,05/01/13
air1,5016.947,06/01/13
air1,4372.131,07/01/13
air1,5049.981,08/01/13
air1,3951.401,09/01/13
air1,822.934,10/01/13
air1,89.718,11/01/13
furnace1,369.447,01/01/13
furnace1,188.007,02/01/13
furnace1,277.195,03/01/13
furnace1,321.911,04/01/13
furnace1,683.176,05/01/13
furnace1,1256.942,06/01/13
furnace1,1139.612,07/01/13
furnace1,1313.535,08/01/13
furnace1,1077.478,09/01/13
furnace1,425.702,10/01/13
furnace1,381.713,11/01/13
air2,0.0,01/01/13
air2,0.0,02/01/13
air2,0.0,03/01/13
air2,25.01,04/01/13
air2,121.084,05/01/13
air2,1013.829,06/01/13
air2,1574.891,07/01/13
air2,1821.963,08/01/13
air2,1112.928,09/01/13
air2,156.655,10/01/13
air2,67.399,11/01/13
air3,10.338,01/01/13
air3,0.238,02/01/13
air3,88.009,03/01/13
air3,327.344,04/01/13
air3,992.773,05/01/13
air3,1495.091,06/01/13
air3,728.045,07/01/13
air3,920.522,08/01/13
air3,577.092,09/01/13
air3,277.29,10/01/13
air3,32.403,11/01/13
garage1,577.683,01/01/13
garage1,501.291,02/01/13
garage1,590.296,03/01/13
garage1,486.854,04/01/13
garage1,589.126,05/01/13
garage1,590.206,06/01/13
garage1,538.406,07/01/13
garage1,558.573,08/01/13
garage1,560.811,09/01/13
garage1,237.922,10/01/13
garage1,0.0,11/01/13
furnace2,101.905,01/01/13
furnace2,67.337,02/01/13
furnace2,96.001,03/01/13
furnace2,111.46,04/01/13
furnace2,276.492,05/01/13
furnace2,1620.95,06/01/13
furnace2,684.697,07/01/13
furnace2,563.782,08/01/13
furnace2,369.369,09/01/13
furnace2,227.255,10/01/13
furnace2,134.155,11/01/13


7989
key,value,date
air1,131.633,01/01/13
air1,106.857,02/01/13
air1,130.491,03/01/13
air1,245.969,04/01/13
air1,799.555,05/01/13
air1,2025.824,06/01/13
air1,2242.886,07/01/13
air1,2744.197,08/01/13
air1,1912.714,09/01/13
air1,469.448,10/01/13
air1,139.629,11/01/13
office1,691.832,01/01/13
office1,557.915,02/01/13
office1,591.283,03/01/13
office1,653.045,04/01/13
office1,708.596,05/01/13
office1,718.783,06/01/13
office1,763.759,07/01/13
office1,690.769,08/01/13
office1,759.012,09/01/13
office1,526.681,10/01/13
office1,689.201,11/01/13
pool2,857.239,01/01/13
pool2,766.933,02/01/13
pool2,860.421,03/01/13
pool2,914.475,04/01/13
pool2,976.446,05/01/13
pool2,947.683,06/01/13
pool2,923.424,07/01/13
pool2,311.731,08/01/13
pool2,0.0,09/01/13
pool2,0.0,10/01/13
pool2,0.0,11/01/13
poolpump1,0.0,01/01/13
poolpump1,0.0,02/01/13
poolpump1,0.0,03/01/13
poolpump1,0.0,04/01/13
poolpump1,0.0,05/01/13
poolpump1,0.0,06/01/13
poolpump1,0.0,07/01/13
poolpump1,547.291,08/01/13
poolpump1,857.989,09/01/13
poolpump1,674.372,10/01/13
poolpump1,713.682,11/01/13
furnace1,154.49,01/01/13
furnace1,118.076,02/01/13
furnace1,129.866,03/01/13
furnace1,140.801,04/01/13
furnace1,122.741,05/01/13
furnace1,206.845,06/01/13
furnace1,225.072,07/01/13
furnace1,265.638,08/01/13
furnace1,209.959,09/01/13
furnace1,72.982,10/01/13
furnace1,68.395,11/01/13
drye1,199.372,01/01/13
drye1,140.348,02/01/13
drye1,120.394,03/01/13
drye1,104.047,04/01/13
drye1,77.409,05/01/13
drye1,105.896,06/01/13
drye1,72.455,07/01/13
drye1,149.184,08/01/13
drye1,106.948,09/01/13
drye1,128.649,10/01/13
drye1,156.115,11/01/13


2156
key,value,date
air1,454.33,01/01/13
air1,195.676,02/01/13
air1,490.2,03/01/13
air1,446.451,04/01/13
air1,1210.408,05/01/13
air1,2412.837,06/01/13
air1,2014.55,07/01/13
air1,2586.194,08/01/13
air1,1909.403,09/01/13
air1,635.659,10/01/13
air1,493.539,11/01/13
waterheater1,841.817,01/01/13
waterheater1,764.269,02/01/13
waterheater1,1103.744,03/01/13
waterheater1,731.472,04/01/13
waterheater1,789.639,05/01/13
waterheater1,619.861,06/01/13
waterheater1,651.214,07/01/13
waterheater1,620.038,08/01/13
waterheater1,537.807,09/01/13
waterheater1,377.7,10/01/13
waterheater1,756.114,11/01/13
furnace1,85.687,01/01/13
furnace1,50.272,02/01/13
furnace1,101.608,03/01/13
furnace1,84.728,04/01/13
furnace1,216.147,05/01/13
furnace1,442.77,06/01/13
furnace1,365.133,07/01/13
furnace1,442.071,08/01/13
furnace1,344.491,09/01/13
furnace1,129.063,10/01/13
furnace1,102.107,11/01/13
drye1,242.476,01/01/13
drye1,199.784,02/01/13
drye1,277.669,03/01/13
drye1,219.554,04/01/13
drye1,264.899,05/01/13
drye1,215.87,06/01/13
drye1,227.439,07/01/13
drye1,233.59,08/01/13
drye1,109.526,09/01/13
drye1,71.387,10/01/13
drye1,182.282,11/01/13
refrigerator1,201.647,01/01/13
refrigerator1,182.292,02/01/13
refrigerator1,221.586,03/01/13
refrigerator1,199.721,04/01/13
refrigerator1,229.552,05/01/13
refrigerator1,208.817,06/01/13
refrigerator1,223.756,07/01/13
refrigerator1,184.401,08/01/13
refrigerator1,220.212,09/01/13
refrigerator1,144.634,10/01/13
refrigerator1,177.516,11/01/13
oven1,71.241,01/01/13
oven1,85.131,02/01/13
oven1,140.786,03/01/13
oven1,67.964,04/01/13
oven1,75.93,05/01/13
oven1,87.348,06/01/13
oven1,78.966,07/01/13
oven1,119.485,08/01/13
oven1,96.956,09/01/13
oven1,39.254,10/01/13
oven1,109.297,11/01/13


9654
key,value,date
air1,0.0,01/01/13
air1,0.0,02/01/13
air1,0.0,03/01/13
air1,92.636,04/01/13
air1,514.444,05/01/13
air1,1167.968,06/01/13
air1,1768.834,07/01/13
air1,1906.73,08/01/13
air1,1673.621,09/01/13
air1,478.365,10/01/13
air1,27.181,11/01/13
kitchen1,206.9,01/01/13
kitchen1,196.617,02/01/13
kitchen1,211.501,03/01/13
kitchen1,226.994,04/01/13
kitchen1,254.867,05/01/13
kitchen1,240.596,06/01/13
kitchen1,261.977,07/01/13
kitchen1,276.707,08/01/13
kitchen1,267.378,09/01/13
kitchen1,185.229,10/01/13
kitchen1,193.364,11/01/13
furnace1,186.555,01/01/13
furnace1,50.156,02/01/13
furnace1,33.317,03/01/13
furnace1,51.73,04/01/13
furnace1,160.488,05/01/13
furnace1,332.274,06/01/13
furnace1,490.522,07/01/13
furnace1,509.194,08/01/13
furnace1,450.502,09/01/13
furnace1,140.523,10/01/13
furnace1,114.347,11/01/13
unknown2,262.501,01/01/13
unknown2,207.193,02/01/13
unknown2,205.743,03/01/13
unknown2,189.118,04/01/13
unknown2,192.866,05/01/13
unknown2,177.531,06/01/13
unknown2,182.055,07/01/13
unknown2,169.199,08/01/13
unknown2,310.625,09/01/13
unknown2,239.721,10/01/13
unknown2,209.438,11/01/13
drye1,112.982,01/01/13
drye1,64.168,02/01/13
drye1,84.071,03/01/13
drye1,83.514,04/01/13
drye1,60.499,05/01/13
drye1,69.906,06/01/13
drye1,83.466,07/01/13
drye1,119.332,08/01/13
drye1,101.056,09/01/13
drye1,93.898,10/01/13
drye1,80.887,11/01/13
bedroom1,52.618,01/01/13
bedroom1,45.722,02/01/13
bedroom1,49.586,03/01/13
bedroom1,73.792,04/01/13
bedroom1,72.843,05/01/13
bedroom1,105.797,06/01/13
bedroom1,117.609,07/01/13
bedroom1,112.425,08/01/13
bedroom1,93.14,09/01/13
bedroom1,39.739,10/01/13
bedroom1,60.993,11/01/13


2829
key,value,date
air1,1.364,01/01/13
air1,15.676,02/01/13
air1,63.472,03/01/13
air1,141.358,04/01/13
air1,825.231,05/01/13
air1,1827.589,06/01/13
air1,2195.62,07/01/13
air1,2504.961,08/01/13
air1,1313.481,09/01/13
air1,247.105,10/01/13
air1,18.503,11/01/13
bedroom2,412.919,01/01/13
bedroom2,349.517,02/01/13
bedroom2,399.588,03/01/13
bedroom2,425.402,04/01/13
bedroom2,439.225,05/01/13
bedroom2,387.671,06/01/13
bedroom2,466.109,07/01/13
bedroom2,435.426,08/01/13
bedroom2,307.578,09/01/13
bedroom2,242.237,10/01/13
bedroom2,292.478,11/01/13
refrigerator1,279.556,01/01/13
refrigerator1,274.275,02/01/13
refrigerator1,306.93,03/01/13
refrigerator1,307.371,04/01/13
refrigerator1,330.796,05/01/13
refrigerator1,339.921,06/01/13
refrigerator1,358.176,07/01/13
refrigerator1,342.797,08/01/13
refrigerator1,322.472,09/01/13
refrigerator1,234.608,10/01/13
refrigerator1,248.773,11/01/13
furnace1,301.545,01/01/13
furnace1,205.538,02/01/13
furnace1,210.705,03/01/13
furnace1,194.412,04/01/13
furnace1,205.824,05/01/13
furnace1,207.063,06/01/13
furnace1,287.611,07/01/13
furnace1,308.413,08/01/13
furnace1,212.004,09/01/13
furnace1,107.115,10/01/13
furnace1,201.56,11/01/13
clotheswasher_dryg1,95.034,01/01/13
clotheswasher_dryg1,83.427,02/01/13
clotheswasher_dryg1,90.287,03/01/13
clotheswasher_dryg1,93.546,04/01/13
clotheswasher_dryg1,103.95,05/01/13
clotheswasher_dryg1,92.732,06/01/13
clotheswasher_dryg1,97.352,07/01/13
clotheswasher_dryg1,85.531,08/01/13
clotheswasher_dryg1,74.309,09/01/13
clotheswasher_dryg1,16.724,10/01/13
clotheswasher_dryg1,59.67,11/01/13
bedroom1,63.759,01/01/13
bedroom1,52.772,02/01/13
bedroom1,62.226,03/01/13
bedroom1,44.398,04/01/13
bedroom1,47.861,05/01/13
bedroom1,34.358,06/01/13
bedroom1,49.337,07/01/13
bedroom1,41.197,08/01/13
bedroom1,35.332,09/01/13
bedroom1,27.928,10/01/13
bedroom1,31.767,11/01/13


6941
key,value,date
air2,122.872,01/01/13
air2,103.452,02/01/13
air2,102.08,03/01/13
air2,238.582,04/01/13
air2,1050.939,05/01/13
air2,2485.724,06/01/13
air2,2245.608,07/01/13
air2,3145.625,08/01/13
air2,1927.77,09/01/13
air2,570.846,10/01/13
air2,177.068,11/01/13
air1,686.972,01/01/13
air1,295.799,02/01/13
air1,216.219,03/01/13
air1,82.994,04/01/13
air1,225.46,05/01/13
air1,567.794,06/01/13
air1,383.308,07/01/13
air1,772.895,08/01/13
air1,641.886,09/01/13
air1,223.593,10/01/13
air1,357.86,11/01/13
refrigerator1,312.076,01/01/13
refrigerator1,285.851,02/01/13
refrigerator1,312.86,03/01/13
refrigerator1,331.12,04/01/13
refrigerator1,375.027,05/01/13
refrigerator1,366.606,06/01/13
refrigerator1,378.658,07/01/13
refrigerator1,360.062,08/01/13
refrigerator1,353.056,09/01/13
refrigerator1,248.719,10/01/13
refrigerator1,299.688,11/01/13
heater1,478.561,01/01/13
heater1,186.664,02/01/13
heater1,211.169,03/01/13
heater1,63.179,04/01/13
heater1,57.323,05/01/13
heater1,73.252,06/01/13
heater1,183.893,07/01/13
heater1,282.238,08/01/13
heater1,235.301,09/01/13
heater1,198.788,10/01/13
heater1,408.19,11/01/13
drye1,177.148,01/01/13
drye1,162.109,02/01/13
drye1,220.441,03/01/13
drye1,145.125,04/01/13
drye1,162.207,05/01/13
drye1,175.213,06/01/13
drye1,56.559,07/01/13
drye1,104.187,08/01/13
drye1,129.686,09/01/13
drye1,99.487,10/01/13
drye1,120.243,11/01/13
dishwasher1,69.074,01/01/13
dishwasher1,53.332,02/01/13
dishwasher1,50.546,03/01/13
dishwasher1,54.577,04/01/13
dishwasher1,57.587,05/01/13
dishwasher1,45.21,06/01/13
dishwasher1,25.538,07/01/13
dishwasher1,47.37,08/01/13
dishwasher1,52.466,09/01/13
dishwasher1,28.728,10/01/13
dishwasher1,51.71,11/01/13


5746
key,value,date
air1,42.999,01/01/13
air1,8.81,02/01/13
air1,113.538,03/01/13
air1,476.474,04/01/13
air1,1364.882,05/01/13
air1,2642.753,06/01/13
air1,2766.182,07/01/13
air1,3218.832,08/01/13
air1,2055.029,09/01/13
air1,593.088,10/01/13
air1,92.834,11/01/13
furnace1,257.704,01/01/13
furnace1,159.97,02/01/13
furnace1,182.206,03/01/13
furnace1,225.95,04/01/13
furnace1,358.499,05/01/13
furnace1,552.243,06/01/13
furnace1,609.897,07/01/13
furnace1,745.628,08/01/13
furnace1,530.477,09/01/13
furnace1,230.371,10/01/13
furnace1,211.749,11/01/13
bedroom2,219.13,01/01/13
bedroom2,244.841,02/01/13
bedroom2,241.937,03/01/13
bedroom2,257.548,04/01/13
bedroom2,270.981,05/01/13
bedroom2,264.902,06/01/13
bedroom2,322.004,07/01/13
bedroom2,292.949,08/01/13
bedroom2,283.343,09/01/13
bedroom2,212.158,10/01/13
bedroom2,246.517,11/01/13
bedroom1,27.35,01/01/13
bedroom1,22.262,02/01/13
bedroom1,27.509,03/01/13
bedroom1,28.311,04/01/13
bedroom1,89.412,05/01/13
bedroom1,103.823,06/01/13
bedroom1,18.873,07/01/13
bedroom1,3.293,08/01/13
bedroom1,0.872,09/01/13
bedroom1,0.317,10/01/13
bedroom1,2.05,11/01/13
livingroom1,3.296,01/01/13
livingroom1,4.15,02/01/13
livingroom1,5.077,03/01/13
livingroom1,4.342,04/01/13
livingroom1,4.012,05/01/13
livingroom1,2.072,06/01/13
livingroom1,3.991,07/01/13
livingroom1,6.338,08/01/13
livingroom1,4.282,09/01/13
livingroom1,2.948,10/01/13
livingroom1,5.508,11/01/13
range1,2.079,01/01/13
range1,1.051,02/01/13
range1,1.08,03/01/13
range1,1.54,04/01/13
range1,1.465,05/01/13
range1,1.9,06/01/13
range1,2.708,07/01/13
range1,8.467,08/01/13
range1,8.066,09/01/13
range1,4.888,10/01/13
range1,4.251,11/01/13



In [178]:
shared_dataids = [6836,2470,1782,661,7875,
                  7850,4767,6941,7940,2638,
                  5357,8197,1714,3795,9830,
                  3367,624,9932,3723,9934,
                  2814,2769,8142,8046,9609,
                  3482,9729,4505,5109,6910,
                  1953,9484,6139,7863,4526,
                  4641,4135,3192,8669,8645,
                  4957,4998]

def get_dataframe(eng,query):
    '''Returns a pandas dataframe with the query results
    '''
    eng_object=eng.execute(query)
    df = pd.DataFrame.from_records(eng_object.fetchall())
    df.columns = eng_object.keys()
    return df

def shared_dataset_pull(eng,dataid,month,year):
    '''Grab one month for one dataid from the pecan street shared dataset given the query engine and the dataid
    '''
    query = 'select * from "PecanStreet_SharedData".validated_{0:02d}_{1} where dataid={2}'.format(month,year,dataid)
    df = get_dataframe(eng,query)
    index = df['localminute'].apply(pd.to_datetime)
    df = df.drop(['dataid','localminute','use'],axis=1)
    df.index = index
    return df

In [179]:
data_6836_01_2014 = shared_dataset_pull(eng,shared_dataids[0],1,2014)

In [180]:
type(data_6836_01_2014.index)


Out[180]:
pandas.tseries.index.DatetimeIndex

In [181]:
data_6836_01_2014


Out[181]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 44640 entries, 2014-01-01 00:00:00-06:00 to 2014-01-31 23:59:00-06:00
Data columns (total 70 columns):
air1                    44640  non-null values
air2                    0  non-null values
air3                    0  non-null values
airwindowunit1          0  non-null values
aquarium1               0  non-null values
bathroom1               44640  non-null values
bathroom2               0  non-null values
bedroom1                44640  non-null values
bedroom2                0  non-null values
bedroom3                0  non-null values
bedroom4                0  non-null values
bedroom5                0  non-null values
car1                    44640  non-null values
clotheswasher1          44640  non-null values
clotheswasher_dryg1     0  non-null values
diningroom1             0  non-null values
diningroom2             0  non-null values
dishwasher1             44640  non-null values
disposal1               44640  non-null values
drye1                   44640  non-null values
dryg1                   0  non-null values
freezer1                0  non-null values
furnace1                44640  non-null values
furnace2                0  non-null values
garage1                 0  non-null values
garage2                 0  non-null values
gen                     44640  non-null values
grid                    0  non-null values
heater1                 0  non-null values
housefan1               0  non-null values
icemaker1               0  non-null values
jacuzzi1                0  non-null values
kitchen1                0  non-null values
kitchen2                0  non-null values
kitchenapp1             44640  non-null values
kitchenapp2             44640  non-null values
lights_plugs1           0  non-null values
lights_plugs2           0  non-null values
lights_plugs3           0  non-null values
lights_plugs4           0  non-null values
lights_plugs5           0  non-null values
lights_plugs6           0  non-null values
livingroom1             44640  non-null values
livingroom2             0  non-null values
microwave1              44640  non-null values
office1                 0  non-null values
outsidelights_plugs1    0  non-null values
outsidelights_plugs2    0  non-null values
oven1                   0  non-null values
oven2                   0  non-null values
pool1                   0  non-null values
pool2                   0  non-null values
poollight1              0  non-null values
poolpump1               0  non-null values
pump1                   0  non-null values
range1                  0  non-null values
refrigerator1           44640  non-null values
refrigerator2           0  non-null values
security1               0  non-null values
shed1                   0  non-null values
sprinkler1              0  non-null values
unknown1                44640  non-null values
unknown2                44640  non-null values
unknown3                0  non-null values
unknown4                0  non-null values
utilityroom1            0  non-null values
venthood1               0  non-null values
waterheater1            0  non-null values
waterheater2            0  non-null values
winecooler1             0  non-null values
dtypes: object(70)

In [182]:
def convert_1min_to_15min(dataframe):
    how = {col:'sum' for col in dataframe.columns}
    res = dataframe.resample('15Min', how=how)
    return res
data_6836_01_2014_15min = convert_1min_to_15min(data_6836_01_2014)

In [183]:
data_6836_01_2014_15min.index


Out[183]:
<class 'pandas.tseries.index.DatetimeIndex'>
[2014-01-01 00:00:00, ..., 2014-01-31 23:45:00]
Length: 2976, Freq: 15T, Timezone: psycopg2.tz.FixedOffsetTimezone(offset=-360, name=None)

In [184]:
data_6836_01_2014_15min


Out[184]:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2976 entries, 2014-01-01 00:00:00-06:00 to 2014-01-31 23:45:00-06:00
Freq: 15T
Data columns (total 70 columns):
lights_plugs2           0  non-null values
lights_plugs3           0  non-null values
dishwasher1             2976  non-null values
lights_plugs1           0  non-null values
lights_plugs6           0  non-null values
jacuzzi1                0  non-null values
lights_plugs4           0  non-null values
lights_plugs5           0  non-null values
bedroom1                2976  non-null values
bedroom2                0  non-null values
car1                    2976  non-null values
bedroom4                0  non-null values
clotheswasher_dryg1     0  non-null values
furnace2                0  non-null values
winecooler1             0  non-null values
drye1                   2976  non-null values
range1                  0  non-null values
garage1                 0  non-null values
pump1                   0  non-null values
unknown4                0  non-null values
oven2                   0  non-null values
airwindowunit1          0  non-null values
disposal1               2976  non-null values
dryg1                   0  non-null values
diningroom1             0  non-null values
unknown2                2976  non-null values
diningroom2             0  non-null values
utilityroom1            0  non-null values
shed1                   0  non-null values
unknown3                0  non-null values
furnace1                2976  non-null values
waterheater2            0  non-null values
livingroom1             2976  non-null values
kitchen1                0  non-null values
kitchen2                0  non-null values
clotheswasher1          2976  non-null values
office1                 0  non-null values
refrigerator2           0  non-null values
pool2                   0  non-null values
pool1                   0  non-null values
refrigerator1           2976  non-null values
sprinkler1              0  non-null values
microwave1              2976  non-null values
oven1                   0  non-null values
bedroom3                0  non-null values
bathroom2               0  non-null values
bathroom1               2976  non-null values
kitchenapp1             2976  non-null values
kitchenapp2             2976  non-null values
outsidelights_plugs1    0  non-null values
outsidelights_plugs2    0  non-null values
bedroom5                0  non-null values
icemaker1               0  non-null values
freezer1                0  non-null values
grid                    0  non-null values
livingroom2             0  non-null values
poollight1              0  non-null values
gen                     2976  non-null values
garage2                 0  non-null values
heater1                 0  non-null values
poolpump1               0  non-null values
housefan1               0  non-null values
waterheater1            0  non-null values
venthood1               0  non-null values
security1               0  non-null values
aquarium1               0  non-null values
air1                    2976  non-null values
air2                    0  non-null values
air3                    0  non-null values
unknown1                2976  non-null values
dtypes: float64(53), object(17)

In [185]:
sum_data_6836_01_2014_15min = data_6836_01_2014_15min.sum().fillna(0.)
sum_data_6836_01_2014_15min.sort(ascending=False)

top_6_appliances_6836_01_2014 = get_top_k_appliances(sum_data_6836_01_2014_15min,6)
top_6_appliances_6836_01_2014_usages = sum_data_6836_01_2014_15min[top_6_appliances_6836_01_2014]
csv_string = get_usage_csv([top_6_appliances_6836_01_2014_usages],1,(1,2),2014)

In [186]:
print csv_string


key,value,date
gen,19849.026,01/01/14
car1,5992.75,01/01/14
furnace1,3783.219,01/01/14
drye1,2831.3,01/01/14
refrigerator1,2156.019,01/01/14
livingroom1,1465.6,01/01/14


In [ ]: