In [84]:
import pandas as pd
import sqlalchemy as sa
from sqlalchemy import *
import json
import os
In [8]:
##engine information
user_name = 'USERNAME'
pw='PASSWORD'
host = "db.wiki-energy.org"
port = "5432"
db = "postgres"
schema = '\"PecanStreet_CuratedSets\"'
In [9]:
#Sabina's functions:
def get_eng():
url = "postgresql"+"://"+user_name+":"+pw+"@"+host+":"+port+"/"+db
eng = create_engine(url)
eng.echo=True
return eng
def get_dataids(eng,table):
query = "select distinct("+ schema + "."+table + ".dataid" +") from " + schema + "." +table
eng_object=eng.execute(query)
l=list(eng_object.fetchall())
return l
def get_table_by_id(eng,table,i):
query = "select * from "+schema+"."+table+ " where " +schema+"."+table+".dataid="+i
eng_object=eng.execute(query)
df = pd.DataFrame.from_records(eng_object.fetchall())
df.columns = eng_object.keys()
return df
def get_query(kind,col,table):
if kind=='sum':
return "select sum("+col +") from " + schema +"."+ table
if kind =='count':
return "select count("+col+ ") from " + schema +"."+ table
if kind =='distinct':
return "select distinct(" + schema +"."+table+"."+col+ ") from " + schema +"."+ table
#+"."+table+"."+col+ ") from " + schema +"."+ table
def execute_query(eng,query,form):
ds =[]
eng_object=eng.execute(query)
if form == 'num':
ds=(eng_object.fetchall())
elif form=='ls':
ds = list(eng_object.fetchall())
elif form =='df':
ds = pd.DataFrame.from_records(eng_object.fetchall())
ds.columns = eng_object.keys()
return ds
In [10]:
eng = get_eng()
In [37]:
#query data by id
house_id=2829
year_month='2013_03'
query = get_table_by_id(eng,'group1_disaggregated_'+year_month,str(house_id))
In [39]:
#create dataframe
df = pd.DataFrame(query)
In [40]:
#view head of df
df.head()
Out[40]:
In [91]:
time_15_array=[]
value_15_array=[]
store_json={}
json_15={}
device_name=str(house_id)
device_type='bedroom2'
yesterday=0
for i,value in enumerate(df[device_type]):
times.append(df['utc_15min'][i].replace(tzinfo=None))
#values.append(df[device_type])
if(yesterday!=df['utc_15min'][i].date() and yesterday!=0):
json_15['times']=time_15_array
json_15['values']=value_15_array
store_json['time_15']=json_15
filename=device_type+'_'+device_name+'_'+str(df['utc_15min'][i].date())+'.json'
if not os.path.exists('PecanStreet/'+device_type+'/'+device_name):
os.makedirs('PecanStreet/'+device_type+'/'+device_name)
with open('PecanStreet/'+device_type+'/'+device_name+'/'+filename, 'w') as outfile:
json.dump(store_json, outfile)
time_15_array=[]
value_15_array=[]
else:
time_15_array.append(str(df['utc_15min'][i].replace(tzinfo=None)))
value_15_array.append(int(df[device_type][i]*1000))
yesterday= df['utc_15min'][i].date()
In [ ]:
In [24]:
#create a json object from our dataframe
df_to_json = df.to_json()
In [85]:
#read the df_to_json back to df..... this will create 'None's into NaN
#and rearrange the df in abc order
read_json = pd.read_json(df_to_json)
In [86]:
read_json.head()
Out[86]:
In [46]:
#save the json
with open('PecanStreet/'+str(house_id)+'_'+str(year_month)+'.json', 'w') as outfile:
json.dump(df_to_json, outfile)