In [52]:
import json
import requests
from lxml import html


class XPathObject(object):
    input_properties = {}
    """Dict of keys (property names) and XPaths (to read vals from)"""

    @classmethod
    def FromHTML(cls, html_contents):
        inst = cls()
        print("Reading through {b} bytes for {c} properties...".format(
            b=len(html_contents),
            c=len(cls.input_properties)))

        tree = html.fromstring(html_contents)

        for attr_name, xpath in cls.input_properties.items():
            print("Searching for '{n}': {x}".format(
                n=attr_name,
                x=xpath))
            elements = tree.xpath(xpath)

            if not len(elements):
                print("Failed to find '{n}': {x}".format(
                    n=attr_name,
                    x=xpath))
                continue

            setattr(
                inst,
                attr_name,
                elements[0].text)

        return inst

    def __repr__(self):
        return json.dumps(
            self.__dict__,
            indent=4,
            separators=(',', ': '))


class RefusePickup(XPathObject):
    input_properties = {
        'success_msg': '//*[@id="nConf"]/h1',
        'route_garbage': '//*[@id="nConf"]/strong[1]',
        'next_pickup_garbage': '//*[@id="nConf"]/strong[2]',
        'route_recyle': '//*[@id="nConf"]/strong[3]',
        'next_pickup_recycle_after': '//*[@id="nConf"]/strong[4]',
        'next_pickup_recycle_before': '//*[@id="nConf"]/strong[5]',
    }


class RefuseQueryAddress(object):
    STREET_TYPES = [
        'AV', # Avenue
        'BL', #
        'CR', # Circle
        'CT', # Court
        'DR', # Drive
        'LA', # Lane
        'PK', # Parkway
        'PL', # Place
        'RD', # Road
        'SQ', # Square
        'ST', # Street
        'TR', # Terrace
        'WY', # Way
    ]
    def __init__(self, house_number, direction, street_name, street_type):
        self.house_number = house_number
        self.direction = direction
        self._street_name = street_name
        self._street_type = street_type

        assert self.street_type in self.STREET_TYPES, \
            "Invalid street type: {st}".format(
                st=self.street_type)

    @property
    def street_name(self):
        return self._street_name.upper()
    
    @property
    def street_type(self):
        return self._street_type.upper()


class RefuseQuery(object):
    form_url = 'http://mpw.milwaukee.gov/services/garbage_day'
    parse_xpath = RefusePickup
    
    @classmethod
    def Execute(cls, refuse_address):
        response = requests.post(
            cls.form_url,
            data={
                'laddr': refuse_address.house_number,
                'sdir': refuse_address.direction,
                'sname': refuse_address.street_name,
                'stype': refuse_address.street_type,
                'Submit': 'Submit',
            })
        response_method = getattr(cls.parse_xpath, 'FromHTML')
        return response_method(response.text)

Define An Address

The following address is of a Walgreens for an example.


In [53]:
address = RefuseQueryAddress(
    house_number=2727,
    direction='S',
    street_name='27th',
    street_type='st')

Execute The Query

Call the RefuseQuery class to fetch, parse, and return the status of future refuse pickups.


In [54]:
pickup = RefuseQuery.Execute(address)


Reading through 13790 bytes for 6 properties...
Searching for 'success_msg': //*[@id="nConf"]/h1
Searching for 'next_pickup_recycle_before': //*[@id="nConf"]/strong[5]
Searching for 'next_pickup_recycle_after': //*[@id="nConf"]/strong[4]
Searching for 'route_recyle': //*[@id="nConf"]/strong[3]
Searching for 'next_pickup_garbage': //*[@id="nConf"]/strong[2]
Searching for 'route_garbage': //*[@id="nConf"]/strong[1]

Assess Results

Look at the response object to determine what route the address is on, when the next garbage pickup is, and when the next recycle pickup will likely be.


In [55]:
print(repr(pickup))


{
    "success_msg": "2727 S 27TH ST - Address located!",
    "route_garbage": "SP1-3A",
    "next_pickup_recycle_after": "FRIDAY MARCH 11, 2016",
    "route_recyle": "SR2-2-16",
    "next_pickup_recycle_before": "SATURDAY MARCH 12, 2016",
    "next_pickup_garbage": "MONDAY MARCH 14, 2016"
}

In [ ]: