In [2]:
import psycopg2
import pandas as pd
import sqlalchemy as sa
from sqlalchemy import *
import xlwt


/Users/sabina/anaconda/lib/python2.7/site-packages/pandas/io/excel.py:626: UserWarning: Installed openpyxl is not supported at this time. Use >=1.6.1 and <2.0.0.
  .format(openpyxl_compat.start_ver, openpyxl_compat.stop_ver))

In [3]:
user_name = 'USERNAME'
pw='PASSWORD'
host = "db.wiki-energy.org"
port = "5432"
db = "postgres"

In [4]:
#trying sqlalchemy engine
#url format:
#dialect+driver://username:password@host:port/database
url = "postgresql"+"://"+user_name+":"+pw+"@"+host+":"+port+"/"+db

In [5]:
engine = create_engine(url)
engine.echo=True

In [6]:
#df = pd.DataFrame.from_records(engine.execute(query).fetchall(),index_col=1)

In [7]:
l = engine.execute("""select distinct("PecanStreet_CuratedSets".group1_disaggregated_2013_01.dataid) from "PecanStreet_CuratedSets".group1_disaggregated_2013_01""").fetchall()
l=[e[0] for e in l]


2014-06-10 08:45:22,446 INFO sqlalchemy.engine.base.Engine select version()
INFO:sqlalchemy.engine.base.Engine:select version()
2014-06-10 08:45:22,447 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-10 08:45:22,537 INFO sqlalchemy.engine.base.Engine select current_schema()
INFO:sqlalchemy.engine.base.Engine:select current_schema()
2014-06-10 08:45:22,537 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-10 08:45:22,626 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
INFO:sqlalchemy.engine.base.Engine:SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2014-06-10 08:45:22,628 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-10 08:45:22,671 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
INFO:sqlalchemy.engine.base.Engine:SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2014-06-10 08:45:22,672 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-10 08:45:22,801 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
INFO:sqlalchemy.engine.base.Engine:show standard_conforming_strings
2014-06-10 08:45:22,802 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-10 08:45:22,892 INFO sqlalchemy.engine.base.Engine select distinct("PecanStreet_CuratedSets".group1_disaggregated_2013_01.dataid) from "PecanStreet_CuratedSets".group1_disaggregated_2013_01
INFO:sqlalchemy.engine.base.Engine:select distinct("PecanStreet_CuratedSets".group1_disaggregated_2013_01.dataid) from "PecanStreet_CuratedSets".group1_disaggregated_2013_01
2014-06-10 08:45:22,893 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}

In [8]:
query = """select * from "PecanStreet_CuratedSets".group1_disaggregated_2013_01"""
eng = engine.execute(query)


2014-06-10 08:45:23,085 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_CuratedSets".group1_disaggregated_2013_01
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_CuratedSets".group1_disaggregated_2013_01
2014-06-10 08:45:23,087 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}

In [9]:
fall = eng.fetchall()
keys = eng.keys()

In [10]:
df = pd.DataFrame.from_records(fall)
df.columns = keys

In [11]:
df2=df.groupby('dataid')

In [12]:
df.head()


Out[12]:
id dataid utc_15min air1 air2 air3 airwindowunit1 aquarium1 bathroom1 bathroom2 ... unknown1 unknown2 unknown3 unknown4 use utilityroom1 venthood1 waterheater1 waterheater2 winecooler1
0 1 9654 2013-01-01 06:00:00-06:00 0.000 0 0 0 0 0 0 ... 0.000 0.104 0 0 0.561 0 0 0 0 0
1 2 9654 2013-01-01 06:15:00-06:00 0.000 0 0 0 0 0 0 ... 0.000 0.104 0 0 0.201 0 0 0 0 0
2 3 9654 2013-01-01 06:30:00-06:00 0.000 0 0 0 0 0 0 ... 0.000 0.375 0 0 0.496 0 0 0 0 0
3 4 9654 2013-01-01 06:45:00-06:00 0.000 0 0 0 0 0 0 ... 0.000 0.374 0 0 0.499 0 0 0 0 0
4 5 9654 2013-01-01 07:00:00-06:00 0.000 0 0 0 0 0 0 ... 0.000 0.029 0 0 0.105 0 0 0 0 0

5 rows × 74 columns


In [13]:
##should actually hook this up so it is only taking the houses with audit data or some other subset
dict_df={}
for i in df2:
   if i[0]!=9923: 
       dict_df[i[0]]=i

In [14]:
df_temp = (dict_df[9654][1]).drop('dataid',1)
df_temp = df_temp.drop('id',1)

In [20]:
##could write to excel may be easier than hooking this up to Pecan as is
cv=[str(name) for name in list(df_temp.columns.values)]

column_names

column_names


In [25]:
(cv)


Out[25]:
['utc_15min',
 'air1',
 'air2',
 'air3',
 'airwindowunit1',
 'aquarium1',
 'bathroom1',
 'bathroom2',
 'bedroom1',
 'bedroom2',
 'bedroom3',
 'bedroom4',
 'bedroom5',
 'car1',
 'clotheswasher1',
 'clotheswasher_dryg1',
 'diningroom1',
 'diningroom2',
 'dishwasher1',
 'disposal1',
 'drye1',
 'dryg1',
 'freezer1',
 'furnace1',
 'furnace2',
 'garage1',
 'garage2',
 'heater1',
 'housefan1',
 'icemaker1',
 'jacuzzi1',
 'kitchen1',
 'kitchen2',
 'kitchenapp1',
 'kitchenapp2',
 'lights_plugs1',
 'lights_plugs2',
 'lights_plugs3',
 'lights_plugs4',
 'lights_plugs5',
 'lights_plugs6',
 'livingroom1',
 'livingroom2',
 'microwave1',
 'office1',
 'outsidelights_plugs1',
 'outsidelights_plugs2',
 'oven1',
 'oven2',
 'pool1',
 'pool2',
 'poollight1',
 'poolpump1',
 'pump1',
 'range1',
 'refrigerator1',
 'refrigerator2',
 'security1',
 'shed1',
 'sprinkler1',
 'subpanel1',
 'subpanel2',
 'unknown1',
 'unknown2',
 'unknown3',
 'unknown4',
 'use',
 'utilityroom1',
 'venthood1',
 'waterheater1',
 'waterheater2',
 'winecooler1']

In [23]:
appliance_name_mapping = {
    'ksac': 'kitchen outlets',
    'tvroom': 'misc',
    'garage/refrigerator': 'fridge',
    'office': 'desktop computer',
    'dryg': 'washer dryer',
    'bath': 'bathroom misc',
    'genlight': 'lighting',
    'oven': 'oven',
    'bedroom': 'bedroom misc',
    'subl': 'subpanel',
    'masterbed': 'bedroom misc',
    'bathroom': 'bathroom misc',
    'livingroom': 'misc',
    'sprinkler': 'sprinkler',
    'disposal': 'disposal',
    'masterbath': 'bathroom misc',
    'microwave': 'microwave',
    'drye': 'washer dryer',
    'smallappliance': 'misc',
    'washer': 'washing machine',
    'furnace': 'central heater boiler',
    'gri': 'grid',
    'lighting&plugs': 'plugs',
    'famroom': 'misc',
    'dryer': 'washer dryer',
    'diningroom': 'misc',
    'ove': 'oven',
    'backyard': 'misc',
    'cooktop': 'cooker',
    'refrigerator': 'fridge',
    'kitchen': 'kitchen misc',
    'dishwasher': 'dishwasher',
    'theater': 'entertainment',
    'washingmachine': 'washing machine',
    'car': 'electric vehicle',
    'air': 'air conditioner',
    'garage': 'misc',
    'range': 'cooker',
    'waterheater': 'water heater',
    'security': 'security',
    'ai': 'air conditioner'
}

In [28]:
##are any of the names the same?
##no
for a in appliance_name_mapping.keys():
    if a in cv:
        print a

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [31]:
(sorted(appliance_name_mapping))


Out[31]:
['ai',
 'air',
 'backyard',
 'bath',
 'bathroom',
 'bedroom',
 'car',
 'cooktop',
 'diningroom',
 'dishwasher',
 'disposal',
 'drye',
 'dryer',
 'dryg',
 'famroom',
 'furnace',
 'garage',
 'garage/refrigerator',
 'genlight',
 'gri',
 'kitchen',
 'ksac',
 'lighting&plugs',
 'livingroom',
 'masterbath',
 'masterbed',
 'microwave',
 'office',
 'ove',
 'oven',
 'range',
 'refrigerator',
 'security',
 'smallappliance',
 'sprinkler',
 'subl',
 'theater',
 'tvroom',
 'washer',
 'washingmachine',
 'waterheater']

In [32]:
cv


Out[32]:
['utc_15min',
 'air1',
 'air2',
 'air3',
 'airwindowunit1',
 'aquarium1',
 'bathroom1',
 'bathroom2',
 'bedroom1',
 'bedroom2',
 'bedroom3',
 'bedroom4',
 'bedroom5',
 'car1',
 'clotheswasher1',
 'clotheswasher_dryg1',
 'diningroom1',
 'diningroom2',
 'dishwasher1',
 'disposal1',
 'drye1',
 'dryg1',
 'freezer1',
 'furnace1',
 'furnace2',
 'garage1',
 'garage2',
 'heater1',
 'housefan1',
 'icemaker1',
 'jacuzzi1',
 'kitchen1',
 'kitchen2',
 'kitchenapp1',
 'kitchenapp2',
 'lights_plugs1',
 'lights_plugs2',
 'lights_plugs3',
 'lights_plugs4',
 'lights_plugs5',
 'lights_plugs6',
 'livingroom1',
 'livingroom2',
 'microwave1',
 'office1',
 'outsidelights_plugs1',
 'outsidelights_plugs2',
 'oven1',
 'oven2',
 'pool1',
 'pool2',
 'poollight1',
 'poolpump1',
 'pump1',
 'range1',
 'refrigerator1',
 'refrigerator2',
 'security1',
 'shed1',
 'sprinkler1',
 'subpanel1',
 'subpanel2',
 'unknown1',
 'unknown2',
 'unknown3',
 'unknown4',
 'use',
 'utilityroom1',
 'venthood1',
 'waterheater1',
 'waterheater2',
 'winecooler1']

In [ ]: