In [1]:
import datetime
import pandas as pd
import sqlalchemy as sa
from sqlalchemy import *
In [2]:
##engine information
user_name = 'USERNAME'
pw='PASSWORD'
host = "db.wiki-energy.org"
port = "5432"
db = "postgres"
In [3]:
print datetime.datetime.now()
In [3]:
In [4]:
##query
query = """select * from "PecanStreet_CuratedSets".group1_disaggregated_2013_01"""
In [7]:
url = "postgresql"+"://"+user_name+":"+pw+"@"+host+":"+port+"/"+db
In [8]:
dataids={}
with open('dataid_dict.txt') as fp:
for line in fp:
did=line.split(':')[0]
if line[-3]=='1':
dataids[did]=''
In [10]:
time1 = datetime.datetime.now()
eng = create_engine(url)
eng.echo=True
eng_object=eng.execute(query)
time2=datetime.datetime.now()
print time2-time1
In [11]:
time1 = datetime.datetime.now()
df = pd.DataFrame.from_records(eng_object.fetchall())
df.columns = eng_object.keys()
time2=datetime.datetime.now()
print time2-time1
#url = "postgresql"+"://"+user_name+":"+pw+"@"+host+":"+port+"/"+db
In [12]:
time1=datetime.datetime.now()
dfs_dict = {}
dfs=df.groupby('dataid')
for df in dfs:
df[1].drop('dataid',1)
#df[1].drop('id',1)
dfs_dict[df[0]]=df[1]
time2=datetime.datetime.now()
print time2-time1
In [ ]: