In [90]:
import pandas as pd
import sqlalchemy as sa
from sqlalchemy import *
import json

In [91]:
##engine information
user_name = 'ssuffian'
pw='W&;R4P9@j'
host = "db.wiki-energy.org"
port = "5432"
db = "postgres"
schema = '\"PecanStreet_SharedData\"'

In [92]:
#Sabina's functions:
def get_eng():
    url = "postgresql"+"://"+user_name+":"+pw+"@"+host+":"+port+"/"+db
    eng = create_engine(url)
    eng.echo=True
    return eng

def get_dataids(eng,table):
    query = "select distinct("+ schema + "."+table + ".dataid" +") from " + schema + "." +table
    eng_object=eng.execute(query)
    l=list(eng_object.fetchall())
    return l

def get_table_by_id(eng,table,i):
    query = "select * from "+schema+"."+table+ " where " +schema+"."+table+".dataid="+i
    eng_object=eng.execute(query)
    df = pd.DataFrame.from_records(eng_object.fetchall())
    df.columns = eng_object.keys()
    return df

def get_query(kind,col,table):
    if kind=='sum':
        return "select sum("+col +") from " + schema +"."+ table
    if kind =='count':
        return "select count("+col+ ") from " + schema +"."+ table
    if kind =='distinct':
        return "select distinct(" + schema  +"."+table+"."+col+ ") from " + schema +"."+ table
    #+"."+table+"."+col+ ") from " + schema +"."+ table
    
def execute_query(eng,query,form):
    ds =[]
    eng_object=eng.execute(query)
    if form == 'num':
        ds=(eng_object.fetchall())
    elif form=='ls':
        ds = list(eng_object.fetchall())
    elif form =='df':
        ds = pd.DataFrame.from_records(eng_object.fetchall())
        ds.columns = eng_object.keys()
    return ds

In [93]:
eng = get_eng()

In [94]:
#query data by id
query = get_table_by_id(eng,'validated_01_2014','2638')


2014-06-23 16:32:28,055 INFO sqlalchemy.engine.base.Engine select version()
INFO:sqlalchemy.engine.base.Engine:select version()
2014-06-23 16:32:28,056 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-23 16:32:28,155 INFO sqlalchemy.engine.base.Engine select current_schema()
INFO:sqlalchemy.engine.base.Engine:select current_schema()
2014-06-23 16:32:28,159 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-23 16:32:28,262 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
INFO:sqlalchemy.engine.base.Engine:SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2014-06-23 16:32:28,263 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-23 16:32:28,319 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
INFO:sqlalchemy.engine.base.Engine:SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2014-06-23 16:32:28,320 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-23 16:32:28,474 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
INFO:sqlalchemy.engine.base.Engine:show standard_conforming_strings
2014-06-23 16:32:28,475 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-23 16:32:28,581 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_SharedData".validated_01_2014 where "PecanStreet_SharedData".validated_01_2014.dataid=2638
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_SharedData".validated_01_2014 where "PecanStreet_SharedData".validated_01_2014.dataid=2638
2014-06-23 16:32:28,583 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}

In [95]:
#create dataframe
df = pd.DataFrame(query)

In [96]:
#view head of df
df.head()


Out[96]:
dataid localminute use air1 air2 air3 airwindowunit1 aquarium1 bathroom1 bathroom2 bedroom1 bedroom2 bedroom3 bedroom4 bedroom5 car1 clotheswasher1 clotheswasher_dryg1 diningroom1 diningroom2
0 2638 2014-01-01 01:30:00-06:00 1.066 0 None None None None None None None None None None None 0 None None None None ...
1 2638 2014-01-01 01:31:00-06:00 1.205 0 None None None None None None None None None None None 0 None None None None ...
2 2638 2014-01-01 01:32:00-06:00 1.149 0 None None None None None None None None None None None 0 None None None None ...
3 2638 2014-01-01 01:33:00-06:00 1.247 0 None None None None None None None None None None None 0 None None None None ...
4 2638 2014-01-01 01:34:00-06:00 1.344 0 None None None None None None None None None None None 0 None None None None ...

5 rows × 73 columns


In [84]:
#create a json object from our dataframe
df_to_json = df.to_json()

In [85]:
#read the df_to_json back to df..... this will create 'None's into NaN
#and rearrange the df in abc order
read_json = pd.read_json(df_to_json)

In [86]:
read_json.head()


Out[86]:
air1 air2 air3 airwindowunit1 aquarium1 bathroom1 bathroom2 bedroom1 bedroom2 bedroom3 bedroom4 bedroom5 car1 clotheswasher1 clotheswasher_dryg1 dataid diningroom1 diningroom2 dishwasher1 disposal1
0 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN NaN 2638 NaN NaN NaN NaN ...
1 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN NaN 2638 NaN NaN NaN NaN ...
10 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN NaN 2638 NaN NaN NaN NaN ...
100 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN NaN 2638 NaN NaN NaN NaN ...
1000 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 NaN NaN 2638 NaN NaN NaN NaN ...

5 rows × 73 columns


In [75]:
#save the json
with open('energy.json', 'w') as outfile:
  json.dump(df_to_json, outfile)

In [ ]: