In [64]:
###It would be nicer to have a method for doing this that could be changed by the group, however there are so many 
###variables for each group so it makes just as much sense to do it like this though it looks clunky

In [1]:
import psycopg2
import pandas as pd
import sqlalchemy as sa
from sqlalchemy import *


/Users/sabina/anaconda/lib/python2.7/site-packages/pandas/io/excel.py:626: UserWarning: Installed openpyxl is not supported at this time. Use >=1.6.1 and <2.0.0.
  .format(openpyxl_compat.start_ver, openpyxl_compat.stop_ver))

In [2]:
user_name = 'USERNAME'
pw='PASSWORD'
host = "db.wiki-energy.org"
port = "5432"
db = "postgres"
connect_string = "dbname = 'postgres' host='db.wiki-energy.org'"+ " user="+user_name+" password="+pw

In [3]:
#trying sqlalchemy engine
#url format:
#dialect+driver://username:password@host:port/database
url = "postgresql"+"://"+user_name+":"+pw+"@"+host+":"+port+"/"+db

In [4]:
engine = create_engine(url)
engine.echo=True

In [72]:
def get_list_by_group(group):
    query_g = """"PecanStreet_CuratedSets"."""+group
    query = """select distinct(""" +query_g+"""_disaggregated_2013_05.dataid) from """+query_g+"""_disaggregated_2013_05"""
    return ((engine.execute(query).fetchall()))

In [80]:
def get_dataid_dict():    
    dataid_dict={}
    for i in range(1,4):
        ls = get_list_by_group("group"+str(i))
        for item in ls:
            if not (item[0] in dataid_dict) and item[0]!=9923:
                dataid_dict[item[0]]=[]
    return dataid_dict

In [90]:
id_dict = get_dataid_dict()


2014-06-09 12:01:34,173 INFO sqlalchemy.engine.base.Engine select distinct("PecanStreet_CuratedSets".group1_disaggregated_2013_05.dataid) from "PecanStreet_CuratedSets".group1_disaggregated_2013_05
INFO:sqlalchemy.engine.base.Engine:select distinct("PecanStreet_CuratedSets".group1_disaggregated_2013_05.dataid) from "PecanStreet_CuratedSets".group1_disaggregated_2013_05
2014-06-09 12:01:34,174 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-09 12:01:34,345 INFO sqlalchemy.engine.base.Engine select distinct("PecanStreet_CuratedSets".group2_disaggregated_2013_05.dataid) from "PecanStreet_CuratedSets".group2_disaggregated_2013_05
INFO:sqlalchemy.engine.base.Engine:select distinct("PecanStreet_CuratedSets".group2_disaggregated_2013_05.dataid) from "PecanStreet_CuratedSets".group2_disaggregated_2013_05
2014-06-09 12:01:34,347 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}
2014-06-09 12:01:34,530 INFO sqlalchemy.engine.base.Engine select distinct("PecanStreet_CuratedSets".group3_disaggregated_2013_05.dataid) from "PecanStreet_CuratedSets".group3_disaggregated_2013_05
INFO:sqlalchemy.engine.base.Engine:select distinct("PecanStreet_CuratedSets".group3_disaggregated_2013_05.dataid) from "PecanStreet_CuratedSets".group3_disaggregated_2013_05
2014-06-09 12:01:34,531 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}

In [92]:
#get city data
query = """select * from "PecanStreet_RawData".cities"""

In [93]:
cities = pd.DataFrame.from_records(engine.execute(query).fetchall())


2014-06-09 12:01:52,957 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_RawData".cities
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_RawData".cities
2014-06-09 12:01:52,958 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}

In [95]:
for row in cities.iterrows():
    if row[1][0] in id_dict:
        id_dict[row[1][0]].append(row[1][1])

In [100]:
count = 0
for row in cities.iterrows():
    if row[1][1]=='Austin':
        if row[1][0] in id_dict:
            count=count+1
print count


73

In [102]:
#get audit as dataframe
audit_df = pd.DataFrame.from_records(engine.execute("""select * from "PecanStreet_RawData".audits_2011""").fetchall())


2014-06-09 12:10:14,465 INFO sqlalchemy.engine.base.Engine select * from "PecanStreet_RawData".audits_2011
INFO:sqlalchemy.engine.base.Engine:select * from "PecanStreet_RawData".audits_2011
2014-06-09 12:10:14,467 INFO sqlalchemy.engine.base.Engine {}
INFO:sqlalchemy.engine.base.Engine:{}

In [109]:
audit_df.iloc[[0]]


Out[109]:
0 1 2 3 4 5 6 7 8 9 ... 301 302 303 304 305 306 307 308 309 310
0 585 3/30/2011 1 3 2008 not assessed 1720 15480 North None ... 997 1540 None 6 N/A None None None None 49

1 rows × 311 columns


In [117]:
count = 0 
for row in audit_df.iterrows():
    if row[1][0] in id_dict:
        id_dict[row[1][0]].append(1)
        count=count+1

In [118]:
print count


36

In [119]:
#http://stackoverflow.com/questions/19295886/writing-a-python-dictionary-to-a-text-file-python
with open ('dataid_dict.txt', 'w') as fp:
    for p in id_dict.items():
        fp.write("%s:%s\n" % p)

In [ ]: