In [2]:
import pandas as pd
import sqlalchemy as sa
from sqlalchemy import *
import numpy as np
import json 
import os


/usr/local/lib/python2.7/dist-packages/pandas/io/excel.py:626: UserWarning: Installed openpyxl is not supported at this time. Use >=1.6.1 and <2.0.0.
  .format(openpyxl_compat.start_ver, openpyxl_compat.stop_ver))

In [3]:
##engine information
user_name = 'USERNAME'
pw='PASSWORD'
host = "db.wiki-energy.org"
port = "5432"
db = "postgres"
schema = '\"PecanStreet_SharedData\"'

In [4]:
#Sabina's functions:
def get_eng():
    url = "postgresql"+"://"+user_name+":"+pw+"@"+host+":"+port+"/"+db
    eng = create_engine(url)
    eng.echo=True
    return eng

def get_dataids(eng,table):
    query = "select distinct("+ schema + "."+table + ".dataid" +") from " + schema + "." +table
    eng_object=eng.execute(query)
    l=list(eng_object.fetchall())
    return l

def get_table_by_id(eng,table,i):
    query = "select * from "+schema+"."+table+ " where " +schema+"."+table+".dataid="+i
    eng_object=eng.execute(query)
    df = pd.DataFrame.from_records(eng_object.fetchall())
    df.columns = eng_object.keys()
    return df

def get_query(kind,col,table):
    if kind=='sum':
        return "select sum("+col +") from " + schema +"."+ table
    if kind =='count':
        return "select count("+col+ ") from " + schema +"."+ table
    if kind =='distinct':
        return "select distinct(" + schema  +"."+table+"."+col+ ") from " + schema +"."+ table
    #+"."+table+"."+col+ ") from " + schema +"."+ table
    
def execute_query(eng,query,form):
    ds =[]
    eng_object=eng.execute(query)
    if form == 'num':
        ds=(eng_object.fetchall())
    elif form=='ls':
        ds = list(eng_object.fetchall())
    elif form =='df':
        ds = pd.DataFrame.from_records(eng_object.fetchall())
        ds.columns = eng_object.keys()
    return ds

In [5]:
def make_JSON_files(device_type,df,device_name): 
    time_15_array=[]
    value_15_array=[]
    store_json={}
    json_15={}
    path='../PecanStreet/'+device_type+'/'+device_name
    yesterday=0
    print str(device_name) + ' ' + str(device_type)
    if(schema=='\"PecanStreet_SharedData\"'):
        timeColumn='localminute'
    elif(schema=='\"PecanStreet_CuratedSets\"'):
        timeColumn='utc_15min'
    for i,value in enumerate(df[device_type]):
        if(df[device_type][i] is not None):
            if(yesterday!=df[timeColumn][i].date() and yesterday!=0):
                time_15_array.append(str(df[timeColumn][i].replace(tzinfo=None)))
                value_15_array.append(int(df[device_type][i]*1000))
                json_15['times']=time_15_array
                json_15['values']=value_15_array
                store_json['time_15']=json_15
                if(np.count_nonzero(value_15_array)>0):
                    filename=device_type+'_'+device_name+'_'+str(df[timeColumn][i].date())+'.json'
                    if not os.path.exists(path):
                        os.makedirs(path)    
                    with open(path+'/'+filename, 'w') as outfile:
                        json.dump(store_json, outfile)
                    time_15_array=[]
                    value_15_array=[]
            else:
                time_15_array.append(str(df[timeColumn][i].replace(tzinfo=None)))
                value_15_array.append(int(df[device_type][i]*1000))
            yesterday= df[timeColumn][i].date()

In [6]:
eng = get_eng()

In [7]:
#How to query a single house id for a single month
#query data by id
#house_id=1782
#year_month='01_2014'
#query = get_table_by_id(eng,'validated_'+year_month,str(house_id))

In [8]:
#create dataframe
df = pd.DataFrame(query)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-984a0280f306> in <module>()
      1 #create dataframe
----> 2 df = pd.DataFrame(query)

NameError: name 'query' is not defined

In [9]:
#view head of df
df.head()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-7a4cf30d96a0> in <module>()
      1 #view head of df
----> 2 df.head()

NameError: name 'df' is not defined

In [10]:
houses=['6836', '2470', '1782', '661', '7875', '7850', '4767', '6941', '7940', '2638', '5357', '8197', '1714', '3795', '9830', '3367', '624', '9932', '3723', '9934', '2814', '2769', '8142', '8046', '9609', '3482', '9729', '4505', '5109', '6910', '1953', '9484', '6139' '7863', '4526', '4641', '4135', '3192', '8669', '8645', '4957' '4998']

In [10]:


In [11]:
#2012-01-16 00:45:05
#This ignores traces that contain only zeros
year_month='05_2014'

devices_types=['air1', 'car1', 'clotheswasher1', 'dishwasher1', 'disposal1', 'furnace1', 'gen', 'kitchenapp1', 'lights_plugs1', 'lights_plugs2', 'microwave1', 'oven1', 'oven2', 'refrigerator1', 'refrigerator2', 'venthood1']
for house_id in houses:
    query = get_table_by_id(eng,'validated_'+year_month,str(house_id))
    df = pd.DataFrame(query)
    device_name=str(house_id)
    for device_type in devices_types:
        if(df[device_type][0] is not None):
            make_JSON_files(device_type,df,device_name)


2014-06-30 09:10:29,677 INFO sqlalchemy.engine.base.Engine select version()
INFO:sqlalchemy.engine.base.Engine:select version()
2014-06-30 09:10:29,679 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-30 09:10:29,805 INFO sqlalchemy.engine.base.Engine select current_schema()
INFO:sqlalchemy.engine.base.Engine:select current_schema()
2014-06-30 09:10:29,807 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-30 09:10:29,927 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
INFO:sqlalchemy.engine.base.Engine:SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2014-06-30 09:10:29,928 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-30 09:10:29,982 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
INFO:sqlalchemy.engine.base.Engine:SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2014-06-30 09:10:29,983 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-30 09:10:30,151 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
INFO:sqlalchemy.engine.base.Engine:show standard_conforming_strings
2014-06-30 09:10:30,152 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-30 09:10:30,273 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=6836
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=6836
2014-06-30 09:10:30,274 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
6836 air1
6836 car1
6836 clotheswasher1
6836 dishwasher1
6836 disposal1
6836 furnace1
6836 gen
6836 kitchenapp1
6836 microwave1
6836 refrigerator1
2014-06-30 09:12:26,827 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=2470
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=2470
2014-06-30 09:12:26,828 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2470 air1
2470 car1
2470 clotheswasher1
2470 dishwasher1
2470 furnace1
2470 gen
2470 kitchenapp1
2470 lights_plugs1
2470 refrigerator1
2014-06-30 09:14:16,699 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=1782
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=1782
2014-06-30 09:14:16,700 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
1782 air1
1782 car1
1782 clotheswasher1
1782 dishwasher1
1782 gen
1782 kitchenapp1
1782 microwave1
1782 oven1
1782 refrigerator1
1782 refrigerator2
2014-06-30 09:15:55,936 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=661
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=661
2014-06-30 09:15:55,937 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
661 air1
661 car1
661 clotheswasher1
661 dishwasher1
661 disposal1
661 furnace1
661 gen
661 kitchenapp1
661 microwave1
661 refrigerator1
2014-06-30 09:18:11,267 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=7875
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=7875
2014-06-30 09:18:11,268 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
7875 air1
7875 car1
7875 dishwasher1
7875 furnace1
7875 gen
7875 microwave1
7875 refrigerator1
2014-06-30 09:19:32,660 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=7850
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=7850
2014-06-30 09:19:32,660 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
7850 air1
7850 car1
7850 clotheswasher1
7850 dishwasher1
7850 disposal1
7850 furnace1
7850 gen
7850 kitchenapp1
7850 microwave1
7850 refrigerator1
2014-06-30 09:22:57,682 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=4767
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=4767
2014-06-30 09:22:57,683 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
4767 air1
4767 car1
4767 clotheswasher1
4767 dishwasher1
4767 furnace1
4767 gen
4767 kitchenapp1
4767 microwave1
4767 oven1
4767 refrigerator1
2014-06-30 09:26:23,587 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=6941
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_SharedData".validated_05_2014 where "PecanStreet_SharedData".validated_05_2014.dataid=6941
2014-06-30 09:26:23,588 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-11-901c56d49e20> in <module>()
      5 devices_types=['air1', 'car1', 'clotheswasher1', 'dishwasher1', 'disposal1', 'furnace1', 'gen', 'kitchenapp1', 'lights_plugs1', 'lights_plugs2', 'microwave1', 'oven1', 'oven2', 'refrigerator1', 'refrigerator2', 'venthood1']
      6 for house_id in houses:
----> 7     query = get_table_by_id(eng,'validated_'+year_month,str(house_id))
      8     df = pd.DataFrame(query)
      9     device_name=str(house_id)

<ipython-input-4-d9edeebf5b74> in get_table_by_id(eng, table, i)
     14 def get_table_by_id(eng,table,i):
     15     query = "select * from "+schema+"."+table+ " where " +schema+"."+table+".dataid="+i
---> 16     eng_object=eng.execute(query)
     17     df = pd.DataFrame.from_records(eng_object.fetchall())
     18     df.columns = eng_object.keys()

/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in execute(self, statement, *multiparams, **params)
   1680 
   1681         connection = self.contextual_connect(close_with_result=True)
-> 1682         return connection.execute(statement, *multiparams, **params)
   1683 
   1684     def scalar(self, statement, *multiparams, **params):

/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in execute(self, object, *multiparams, **params)
    710         """
    711         if isinstance(object, util.string_types[0]):
--> 712             return self._execute_text(object, multiparams, params)
    713         try:
    714             meth = object._execute_on_connection

/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in _execute_text(self, statement, multiparams, params)
    859             statement,
    860             parameters,
--> 861             statement, parameters
    862         )
    863         if self._has_events or self.engine._has_events:

/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args)
    938                                      statement,
    939                                      parameters,
--> 940                                      context)
    941         except Exception as e:
    942             self._handle_dbapi_exception(

/usr/local/lib/python2.7/dist-packages/sqlalchemy/engine/default.pyc in do_execute(self, cursor, statement, parameters, context)
    433 
    434     def do_execute(self, cursor, statement, parameters, context=None):
--> 435         cursor.execute(statement, parameters)
    436 
    437     def do_execute_no_params(self, cursor, statement, context=None):

<string> in __new__(_cls, name, type_code, display_size, internal_size, precision, scale, null_ok)

KeyboardInterrupt: 

In [ ]:
device_type='clotheswasher1'
if(df[device_type][0] is not None and np.count_nonzero(np.toarray(df[device_type])):
    make_JSON_files(device_type,df,device_name)

In [16]:
#create a json object from our dataframe
df_to_json = df.to_json()

In [85]:
#read the df_to_json back to df..... this will create 'None's into NaN
#and rearrange the df in abc order
read_json = pd.read_json(df_to_json)

In [ ]:
read_json.head()

In [17]:
#save the json
with open('../PecanStreet/'+str(house_id)+'_'+str(year_month)+'.json', 'w') as outfile:
  json.dump(df_to_json, outfile)

In [ ]: