notebook.community

Edit and run



In [84]:

    
import pandas as pd
import sqlalchemy as sa
from sqlalchemy import *
import json 
import os



In [8]:

    
##engine information
user_name = 'USERNAME'
pw='PASSWORD'
host = "db.wiki-energy.org"
port = "5432"
db = "postgres"
schema = '\"PecanStreet_CuratedSets\"'



In [9]:

    
#Sabina's functions:
def get_eng():
    url = "postgresql"+"://"+user_name+":"+pw+"@"+host+":"+port+"/"+db
    eng = create_engine(url)
    eng.echo=True
    return eng

def get_dataids(eng,table):
    query = "select distinct("+ schema + "."+table + ".dataid" +") from " + schema + "." +table
    eng_object=eng.execute(query)
    l=list(eng_object.fetchall())
    return l

def get_table_by_id(eng,table,i):
    query = "select * from "+schema+"."+table+ " where " +schema+"."+table+".dataid="+i
    eng_object=eng.execute(query)
    df = pd.DataFrame.from_records(eng_object.fetchall())
    df.columns = eng_object.keys()
    return df

def get_query(kind,col,table):
    if kind=='sum':
        return "select sum("+col +") from " + schema +"."+ table
    if kind =='count':
        return "select count("+col+ ") from " + schema +"."+ table
    if kind =='distinct':
        return "select distinct(" + schema  +"."+table+"."+col+ ") from " + schema +"."+ table
    #+"."+table+"."+col+ ") from " + schema +"."+ table
    
def execute_query(eng,query,form):
    ds =[]
    eng_object=eng.execute(query)
    if form == 'num':
        ds=(eng_object.fetchall())
    elif form=='ls':
        ds = list(eng_object.fetchall())
    elif form =='df':
        ds = pd.DataFrame.from_records(eng_object.fetchall())
        ds.columns = eng_object.keys()
    return ds



In [10]:

    
eng = get_eng()



In [37]:

    
#query data by id
house_id=2829
year_month='2013_03'
query = get_table_by_id(eng,'group1_disaggregated_'+year_month,str(house_id))









    



2014-06-23 17:32:09,604 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_CuratedSets".group1_disaggregated_2013_03 where "PecanStreet_CuratedSets".group1_disaggregated_2013_03.dataid=2829






    



INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_CuratedSets".group1_disaggregated_2013_03 where "PecanStreet_CuratedSets".group1_disaggregated_2013_03.dataid=2829






    



2014-06-23 17:32:09,605 INFO sqlalchemy.engine.base.Engine {}






    



INFO:sqlalchemy.engine.base.Engine:{}



In [39]:

    
#create dataframe
df = pd.DataFrame(query)



In [40]:

    
#view head of df
df.head()









    Out[40]:






  
    
      
      id
      dataid
      utc_15min
      air1
      air2
      air3
      airwindowunit1
      aquarium1
      bathroom1
      bathroom2
      ...
      unknown1
      unknown2
      unknown3
      unknown4
      use
      utilityroom1
      venthood1
      waterheater1
      waterheater2
      winecooler1
    
  
  
    
      0
       56555
       2829
       2013-03-01 08:30:00-06:00
       0.000
       0
       0
       0
       0
       0
       0
      ...
       0
       0
       0
       0
       0.615
       0
       0
       0
       0
       0
    
    
      1
       56556
       2829
       2013-03-01 08:45:00-06:00
       0.000
       0
       0
       0
       0
       0
       0
      ...
       0
       0
       0
       0
       0.442
       0
       0
       0
       0
       0
    
    
      2
       56545
       2829
       2013-03-01 06:00:00-06:00
       0.000
       0
       0
       0
       0
       0
       0
      ...
       0
       0
       0
       0
       0.438
       0
       0
       0
       0
       0
    
    
      3
       56546
       2829
       2013-03-01 06:15:00-06:00
       0.000
       0
       0
       0
       0
       0
       0
      ...
       0
       0
       0
       0
       0.421
       0
       0
       0
       0
       0
    
    
      4
       56547
       2829
       2013-03-01 06:30:00-06:00
       0.000
       0
       0
       0
       0
       0
       0
      ...
       0
       0
       0
       0
       0.602
       0
       0
       0
       0
       0
    
  

5 rows × 74 columns



In [91]:

    
time_15_array=[]
value_15_array=[]
store_json={}
json_15={}
device_name=str(house_id)
device_type='bedroom2'
yesterday=0
for i,value in enumerate(df[device_type]):
    times.append(df['utc_15min'][i].replace(tzinfo=None))
    #values.append(df[device_type])
    if(yesterday!=df['utc_15min'][i].date() and yesterday!=0):
        json_15['times']=time_15_array
        json_15['values']=value_15_array
        store_json['time_15']=json_15
        filename=device_type+'_'+device_name+'_'+str(df['utc_15min'][i].date())+'.json'
        if not os.path.exists('PecanStreet/'+device_type+'/'+device_name):
            os.makedirs('PecanStreet/'+device_type+'/'+device_name)    
        with open('PecanStreet/'+device_type+'/'+device_name+'/'+filename, 'w') as outfile:
            json.dump(store_json, outfile)
        time_15_array=[]
        value_15_array=[]
    else:
        time_15_array.append(str(df['utc_15min'][i].replace(tzinfo=None)))
        value_15_array.append(int(df[device_type][i]*1000))
    yesterday= df['utc_15min'][i].date()



In [ ]:



In [24]:

    
#create a json object from our dataframe
df_to_json = df.to_json()



In [85]:

    
#read the df_to_json back to df..... this will create 'None's into NaN
#and rearrange the df in abc order
read_json = pd.read_json(df_to_json)



In [86]:

    
read_json.head()









    Out[86]:






  
    
      
      air1
      air2
      air3
      airwindowunit1
      aquarium1
      bathroom1
      bathroom2
      bedroom1
      bedroom2
      bedroom3
      bedroom4
      bedroom5
      car1
      clotheswasher1
      clotheswasher_dryg1
      dataid
      diningroom1
      diningroom2
      dishwasher1
      disposal1
      
    
  
  
    
      0   
       0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
       0
      NaN
      NaN
       2638
      NaN
      NaN
      NaN
      NaN
      ...
    
    
      1   
       0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
       0
      NaN
      NaN
       2638
      NaN
      NaN
      NaN
      NaN
      ...
    
    
      10  
       0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
       0
      NaN
      NaN
       2638
      NaN
      NaN
      NaN
      NaN
      ...
    
    
      100 
       0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
       0
      NaN
      NaN
       2638
      NaN
      NaN
      NaN
      NaN
      ...
    
    
      1000
       0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
       0
      NaN
      NaN
       2638
      NaN
      NaN
      NaN
      NaN
      ...
    
  

5 rows × 73 columns



In [46]:

    
#save the json
with open('PecanStreet/'+str(house_id)+'_'+str(year_month)+'.json', 'w') as outfile:
  json.dump(df_to_json, outfile)

	id	dataid	utc_15min	...	use
0	56555	2829	2013-03-01 08:30:00-06:00	...	0.615
1	56556	2829	2013-03-01 08:45:00-06:00	...	0.442
2	56545	2829	2013-03-01 06:00:00-06:00	...	0.438
3	56546	2829	2013-03-01 06:15:00-06:00	...	0.421
4	56547	2829	2013-03-01 06:30:00-06:00	...	0.602

	air2	air3	airwindowunit1	aquarium1	bathroom1	bathroom2	bedroom1	bedroom2	bedroom3	bedroom4	bedroom5	clotheswasher1	clotheswasher_dryg1	dataid	diningroom1	diningroom2	dishwasher1	disposal1
0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2638	NaN	NaN	NaN	NaN	...
1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2638	NaN	NaN	NaN	NaN	...
10	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2638	NaN	NaN	NaN	NaN	...
100	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2638	NaN	NaN	NaN	NaN	...
1000	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2638	NaN	NaN	NaN	NaN	...