In [ ]:
# this is the boilerplate import language needed to get Selenium
# up and running. Just make sure you include this for any Selenium script
# you want to write.

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from lxml import html
import csv

# -*- coding: utf-8 -*-

# here, we create a function to go to the next page of our target site
# Selenium acts like a browser, so we can tell it to find the "next page" link and "click" on it
def go_to_next_page():
    next_page_link = driver.find_element_by_xpath('//table[@id="datagrid_results"]//tr//td//span/following-sibling::a')
    print ("Going to page", next_page_link.text)
    next_page_link.click()

# here we loop through the rows to capture the data from the table at our target page
# to pull off each row of data, we loop through its rows based on the xpath
def get_data(source):
    texts = []
    # convert the page source into something lxml can parse
    content = html.fromstring(source)
    # grab all of the rows from the data table
    rows = content.xpath(".//table[@id='datagrid_results']//tr")
    # loop through each of the rows to get the content
    for row in rows:
        columns = row.xpath(".//td")
        text = [col.text_content() for col in columns]
        texts.append(text)
    # Remove the first and last columns (header + pagination)
    return texts[1:-1]

## Everything above was our functions - our tools - that we'll now use below in the actual 
## sequence of the script

# Load the first page
# When we load the page, a firefox web browswer window will actually open up
# and Selenium will control it based on our instructions
print ("Loading the first page")
driver = webdriver.Firefox()
driver.get("http://www.licensepa.state.pa.us/")

# Select medicine and surgery from the dropdown menu
print ("Selecting the dropdown")
dropdown = Select(driver.find_element_by_name("t_web_lookup__license_type_name"))
dropdown.select_by_value("Medical Physician and Surgeon")

# Here we'll enter a last name letter and wildcard
# Since this site is interolably slow, we'll carve out just last names starting with a certain
# letter to keep things from crashing.
currentletter = "x"
print ("Selecting last names that start with '" + currentletter + "'")
textinput = driver.find_element_by_name("t_web_lookup__last_name")
textinput.send_keys(currentletter + "*")

# Click the search button
print ("Clicking the search button")
search_button = driver.find_element_by_id("sch_button")
search_button.click()

# If everything worked as planned, we'll copy the data we've captured into
# a .csv file that includes the name of the letter of the last names we've pulled down
failed = False
currentcsv = "pa-" + currentletter + ".csv"
with open(currentcsv, "w") as output:
    writer = csv.writer(output, delimiter=',')
    
    while not failed:
        print ("Trying a new page")
        # Get the data on the page
        data_from_page = get_data(driver.page_source)
        # Write them to the CSV
        writer.writerows(data_from_page)
        # Go to the next page
        try:
            # And then it just loops back up
            go_to_next_page()
        except:
            # But it goes into here if it can't find the next link
            failed = True

# Here we'll write a print statemtnt to let us know if everything worked correctly
# If there was an error, we'll get an error message. If we don't see any error
# messages and see this printed statement below instead, we know the script executed correctly.
print ("This job processed successfully.")    
print ("Names with the letter '" + currentletter + "' are done!")

# Finally we must "close" the Selenium driver at the end. This is very important, and just leave this
# as the final line for any Selenium script you try to write.
driver.close()