In [1]:
import datetime
import pandas as pd
import sqlalchemy as sa
from sqlalchemy import *


/Users/sabina/anaconda/lib/python2.7/site-packages/pandas/io/excel.py:626: UserWarning: Installed openpyxl is not supported at this time. Use >=1.6.1 and <2.0.0.
  .format(openpyxl_compat.start_ver, openpyxl_compat.stop_ver))

In [2]:
##engine information
user_name = 'USERNAME'
pw='PASSWORD'
host = "db.wiki-energy.org"
port = "5432"
db = "postgres"

In [3]:
print datetime.datetime.now()


2014-06-10 12:35:23.460816

In [3]:


In [4]:
##query
query = """select * from "PecanStreet_CuratedSets".group1_disaggregated_2013_01"""

In [7]:
url = "postgresql"+"://"+user_name+":"+pw+"@"+host+":"+port+"/"+db

In [8]:
dataids={}
with open('dataid_dict.txt') as fp:
    for line in fp:
        did=line.split(':')[0]
        if line[-3]=='1':
            dataids[did]=''

In [10]:
time1 = datetime.datetime.now()
eng = create_engine(url)
eng.echo=True
eng_object=eng.execute(query)
time2=datetime.datetime.now()
print time2-time1


2014-06-10 12:39:23,868 INFO sqlalchemy.engine.base.Engine select version()
INFO:sqlalchemy.engine.base.Engine:select version()
2014-06-10 12:39:23,869 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-10 12:39:23,962 INFO sqlalchemy.engine.base.Engine select current_schema()
INFO:sqlalchemy.engine.base.Engine:select current_schema()
2014-06-10 12:39:23,963 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-10 12:39:24,058 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
INFO:sqlalchemy.engine.base.Engine:SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2014-06-10 12:39:24,060 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-10 12:39:24,108 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
INFO:sqlalchemy.engine.base.Engine:SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2014-06-10 12:39:24,110 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-10 12:39:24,277 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
INFO:sqlalchemy.engine.base.Engine:show standard_conforming_strings
2014-06-10 12:39:24,278 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-10 12:39:24,369 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_CuratedSets".group1_disaggregated_2013_01
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_CuratedSets".group1_disaggregated_2013_01
2014-06-10 12:39:24,370 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
0:01:13.823426

In [11]:
time1 = datetime.datetime.now()
df = pd.DataFrame.from_records(eng_object.fetchall())
df.columns = eng_object.keys()
time2=datetime.datetime.now()
print time2-time1
    #url = "postgresql"+"://"+user_name+":"+pw+"@"+host+":"+port+"/"+db


0:00:30.664888

In [12]:
time1=datetime.datetime.now()
dfs_dict = {}
dfs=df.groupby('dataid')
for df in dfs:
    df[1].drop('dataid',1)
            #df[1].drop('id',1)
    dfs_dict[df[0]]=df[1]

time2=datetime.datetime.now()
print time2-time1


0:00:00.461206

In [ ]: