Setup

Extract and transform routine (pull and process data from SCUBA)


In [1]:
import psycopg2 as pg
import pandas.io.sql as psql
import config

anonymityDict = config.anonymityDict

def extract_and_transform(agencyID,
                          county_shapefile,
                          where_clause,
                          pop_col,
                          irr_area_col,
                          eto_col):
    
    # connect to database
    database = config.database
    user = config.user
    password = config.password
    host = config.host
    port = config.port
    
    connection = pg.connect(database=database,
                        user=user,
                        password=password,
                        host=host,
                        port=port
                        )
    
    query = """
    WITH cte_0 AS (
        SELECT {3} pop , {4} irr_area, utility_name, cust_loc_id, cust_loc_is_current, geom
        FROM cust_loc
        WHERE
            (cust_loc_class = 'RESIDENTIAL_SINGLE'
            OR cust_loc_class = 'RESIDENTIAL_MULTI') 
            AND utility_name = '{0}'
        
        ),
    
    cte_1 AS (
        SELECT geoid10, pop, irr_area, utility_name, cust_loc_id, cust_loc_is_current
        FROM cte_0, {1} 
        WHERE
            ST_Within(cte_0.geom,{1}.geom)
        ),
    
    cte_2 AS (
        SELECT
        usage_ccf, {5} eto, make_date(usage.usage_year, usage.usage_month, '01') usage_date,
        cte_1.geoid10, cte_1.pop, cte_1.irr_area, cte_1.utility_name, cte_1.cust_loc_id
        FROM cte_1
        JOIN usage
        ON cte_1.cust_loc_id = usage.cust_loc_id
        {2}
        )
    
    SELECT
    geoid10,
    usage_date,
    SUM(pop) hhsize,
    SUM(irr_area) irr_area_sf,
    SUM(usage_ccf) usage_ccf,
    AVG(eto) usage_et_amount
    
    FROM
    cte_2
    GROUP BY
    geoid10, usage_date""".format(agencyID,
                                  county_shapefile,
                                  where_clause,
                                  pop_col,
                                  irr_area_col,
                                  eto_col)
    
    tempDF = psql.read_sql(query, connection)
    
    path = "./tempOut/"
    fname = "%s_census_block_usage.csv" % (anonymityDict[agencyID])
    tempDF.to_csv(path+fname)
    
    # kill database connection
    connection.close()

Load routine (push processed data to CARTO)


In [2]:
import requests
import config

username = config.carto_user
apikey = config.apikey

def drop_old_table(agencyID):
    sql = "DROP TABLE %s_census_block_usage" % (anonymityDict[agencyID])
    url = "https://%s.carto.com/api/v2/sql/?q=%s&api_key=%s" % (username, sql, apikey)
    response = requests.get(url)
    
def load_new_table(agencyID):
    url = "https://%s.carto.com/api/v1/imports/?privacy=link&api_key=%s" % (username, apikey)
    
    path = "./tempOUT/"
    fname ="%s_census_block_usage.csv" % (anonymityDict[agencyID])
    f = {'file': open(path+fname)}
    
    response = requests.post(url, files=f)

Execution


In [15]:
import config

current_users = config.current_users
for user in current_users:    
    extract_and_transform(user['agencyID'],
                          user['county_shapefile'],
                          user['where_clause'],
                          user['pop_col'],
                          user['irr_area_col'],
                          user['eto_col']
                         )
    
    drop_old_table(user['agencyID'])


    WITH cte_0 AS (
        SELECT cust_loc_hhsize pop , cust_loc_irr_area_sf irr_area, utility_name, cust_loc_id, cust_loc_is_current, geom
        FROM cust_loc
        WHERE
            (cust_loc_class = 'RESIDENTIAL_SINGLE'
            OR cust_loc_class = 'RESIDENTIAL_MULTI') 
            AND utility_name = 'EMWD'
        
        ),
    
    cte_1 AS (
        SELECT geoid10, pop, irr_area, utility_name, cust_loc_id, cust_loc_is_current
        FROM cte_0, census_block_polygons_2010_riverside 
        WHERE
            ST_Within(cte_0.geom,census_block_polygons_2010_riverside.geom)
        ),
    
    cte_2 AS (
        SELECT
        usage_ccf, usage_et_amount eto, make_date(usage.usage_year, usage.usage_month, '01') usage_date,
        cte_1.geoid10, cte_1.pop, cte_1.irr_area, cte_1.utility_name, cte_1.cust_loc_id
        FROM cte_1
        JOIN usage
        ON cte_1.cust_loc_id = usage.cust_loc_id
        WHERE (pop > 0) AND cust_loc_is_current = 'TRUE'
        )
    
    SELECT
    geoid10,
    usage_date,
    SUM(pop) hhsize,
    SUM(irr_area) irr_area_sf,
    SUM(usage_ccf) usage_ccf,
    AVG(eto) usage_et_amount
    
    FROM
    cte_2
    GROUP BY
    geoid10, usage_date

Manually refresh account (to update account metadata)


In [16]:
for user in current_users:
    load_new_table(user['agencyID'])

Manually refresh account (to update account metadata)