In [84]:
import pandas as pd
import sqlalchemy as sa
from sqlalchemy import *
import json 
import os

In [8]:
##engine information
user_name = 'USERNAME'
pw='PASSWORD'
host = "db.wiki-energy.org"
port = "5432"
db = "postgres"
schema = '\"PecanStreet_CuratedSets\"'

In [9]:
#Sabina's functions:
def get_eng():
    url = "postgresql"+"://"+user_name+":"+pw+"@"+host+":"+port+"/"+db
    eng = create_engine(url)
    eng.echo=True
    return eng

def get_dataids(eng,table):
    query = "select distinct("+ schema + "."+table + ".dataid" +") from " + schema + "." +table
    eng_object=eng.execute(query)
    l=list(eng_object.fetchall())
    return l

def get_table_by_id(eng,table,i):
    query = "select * from "+schema+"."+table+ " where " +schema+"."+table+".dataid="+i
    eng_object=eng.execute(query)
    df = pd.DataFrame.from_records(eng_object.fetchall())
    df.columns = eng_object.keys()
    return df

def get_query(kind,col,table):
    if kind=='sum':
        return "select sum("+col +") from " + schema +"."+ table
    if kind =='count':
        return "select count("+col+ ") from " + schema +"."+ table
    if kind =='distinct':
        return "select distinct(" + schema  +"."+table+"."+col+ ") from " + schema +"."+ table
    #+"."+table+"."+col+ ") from " + schema +"."+ table
    
def execute_query(eng,query,form):
    ds =[]
    eng_object=eng.execute(query)
    if form == 'num':
        ds=(eng_object.fetchall())
    elif form=='ls':
        ds = list(eng_object.fetchall())
    elif form =='df':
        ds = pd.DataFrame.from_records(eng_object.fetchall())
        ds.columns = eng_object.keys()
    return ds

In [10]:
eng = get_eng()

In [37]:
#query data by id
house_id=2829
year_month='2013_03'
query = get_table_by_id(eng,'group1_disaggregated_'+year_month,str(house_id))


2014-06-23 17:32:09,604 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_CuratedSets".group1_disaggregated_2013_03 where "PecanStreet_CuratedSets".group1_disaggregated_2013_03.dataid=2829
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_CuratedSets".group1_disaggregated_2013_03 where "PecanStreet_CuratedSets".group1_disaggregated_2013_03.dataid=2829
2014-06-23 17:32:09,605 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}

In [39]:
#create dataframe
df = pd.DataFrame(query)

In [40]:
#view head of df
df.head()


Out[40]:
id dataid utc_15min air1 air2 air3 airwindowunit1 aquarium1 bathroom1 bathroom2 ... unknown1 unknown2 unknown3 unknown4 use utilityroom1 venthood1 waterheater1 waterheater2 winecooler1
0 56555 2829 2013-03-01 08:30:00-06:00 0.000 0 0 0 0 0 0 ... 0 0 0 0 0.615 0 0 0 0 0
1 56556 2829 2013-03-01 08:45:00-06:00 0.000 0 0 0 0 0 0 ... 0 0 0 0 0.442 0 0 0 0 0
2 56545 2829 2013-03-01 06:00:00-06:00 0.000 0 0 0 0 0 0 ... 0 0 0 0 0.438 0 0 0 0 0
3 56546 2829 2013-03-01 06:15:00-06:00 0.000 0 0 0 0 0 0 ... 0 0 0 0 0.421 0 0 0 0 0
4 56547 2829 2013-03-01 06:30:00-06:00 0.000 0 0 0 0 0 0 ... 0 0 0 0 0.602 0 0 0 0 0

5 rows × 74 columns


In [91]:
time_15_array=[]
value_15_array=[]
store_json={}
json_15={}
device_name=str(house_id)
device_type='bedroom2'
yesterday=0
for i,value in enumerate(df[device_type]):
    times.append(df['utc_15min'][i].replace(tzinfo=None))
    #values.append(df[device_type])
    if(yesterday!=df['utc_15min'][i].date() and yesterday!=0):
        json_15['times']=time_15_array
        json_15['values']=value_15_array
        store_json['time_15']=json_15
        filename=device_type+'_'+device_name+'_'+str(df['utc_15min'][i].date())+'.json'
        if not os.path.exists('PecanStreet/'+device_type+'/'+device_name):
            os.makedirs('PecanStreet/'+device_type+'/'+device_name)    
        with open('PecanStreet/'+device_type+'/'+device_name+'/'+filename, 'w') as outfile:
            json.dump(store_json, outfile)
        time_15_array=[]
        value_15_array=[]
    else:
        time_15_array.append(str(df['utc_15min'][i].replace(tzinfo=None)))
        value_15_array.append(int(df[device_type][i]*1000))
    yesterday= df['utc_15min'][i].date()

In [ ]:


In [24]:
#create a json object from our dataframe
df_to_json = df.to_json()

In [85]:
#read the df_to_json back to df..... this will create 'None's into NaN
#and rearrange the df in abc order
read_json = pd.read_json(df_to_json)

In [86]:
read_json.head()


Out[86]:
air1 air2 air3 airwindowunit1 aquarium1 bathroom1 bathroom2 bedroom1 bedroom2 bedroom3 bedroom4 bedroom5 car1 clotheswasher1 clotheswasher_dryg1 dataid diningroom1 diningroom2 dishwasher1 disposal1
0 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN NaN 2638 NaN NaN NaN NaN ...
1 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN NaN 2638 NaN NaN NaN NaN ...
10 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN NaN 2638 NaN NaN NaN NaN ...
100 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN NaN 2638 NaN NaN NaN NaN ...
1000 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN NaN 2638 NaN NaN NaN NaN ...

5 rows × 73 columns


In [46]:
#save the json
with open('PecanStreet/'+str(house_id)+'_'+str(year_month)+'.json', 'w') as outfile:
  json.dump(df_to_json, outfile)