In [ ]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
In [ ]:
def compendex(username,password,keywords,no_of_pages,pg_no):
"""
Function to download results from Compendex
"""
#Initializing driver
driver = webdriver.Chrome()
driver.get("https://www-engineeringvillage-com.offcampus.lib.washington.edu/search/quick.url?CID=quickSearch&database=compendex")
#UW Net ID Login
if (driver.title == "UW NetID Weblogin"):
username_textbox = driver.find_element_by_id('weblogin_netid')
username_textbox.send_keys(username)
password_textbox = driver.find_element_by_id('weblogin_password')
password_textbox.send_keys(password)
submit_button = driver.find_element_by_name('submit').click()
#Search keywords
driver.find_element_by_id("srchWrd1").send_keys(keywords[0])
driver.find_element_by_id("addsearchllink").click()
for i in range(len(keywords)-1):
if i<2:
dropdown1 = driver.find_element_by_id("cbnt%d"%(i+1))
Select(dropdown1).select_by_visible_text("OR")
driver.find_element_by_id("srchWrd%d"%(i+2)).send_keys(keywords[i+1])
else:
dropdown1 = driver.find_element_by_id('connector')
Select(dropdown1).select_by_visible_text("OR")
driver.find_element_by_name("searchWords").send_keys(keywords[i+1])
driver.find_element_by_id("addsearchllink").click()
driver.find_element_by_xpath('//*[@id="advancedOptionstoggleAnchor"]').click()
WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.XPATH,'//*[@id="startyrrange"]')));
dropdown1 = driver.find_element_by_xpath('//*[@id="startyrrange"]')
Select(dropdown1).select_by_visible_text("2000")
driver.find_element_by_id("submitsearch_tool").click()
#Increase number of results in page to 100
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.ID, "pageSizeVal_top")))
dropdown1=driver.find_element_by_id("pageSizeVal_top")
Select(dropdown1).select_by_visible_text("100")
#Page number
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH, '//*[@id="gotopage_top"]/span[2]/input[1]')))
driver.find_element_by_xpath('//*[@id="gotopage_top"]/span[2]/input[1]').clear()
driver.find_element_by_xpath('//*[@id="gotopage_top"]/span[2]/input[1]').send_keys(pg_no)
driver.find_element_by_xpath('//*[@id="gotopage_top"]/span[2]/input[2]').click()
#Select all results
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.ID, "pageckbx")))
driver.find_element_by_id("pageckbx").click()
for i in range(4):
driver.find_element_by_class_name("blackpipeleft").click()
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.ID, "pageckbx")))
driver.find_element_by_id("pageckbx").click()
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.ID, "downloadli")))
WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.ID,'downloadli')));
driver.find_element_by_id("downloadli").click()
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.ID, "rdCsv")))
driver.find_element_by_id("rdCsv").click()
driver.find_element_by_id("rdDet").click()
driver.find_element_by_id("savePrefsButton").click()
for i in range(no_of_pages-1):
driver.find_element_by_id("clearbasket").click()
#WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,"/html/body/div[12]/div[3]/div/button[1]")))
driver.find_element_by_xpath("/html/body/div[12]/div[3]/div/button[1]").click()
for i in range(5):
driver.find_element_by_class_name("blackpipeleft").click()
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.ID, "pageckbx")))
driver.find_element_by_id("pageckbx").click()
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.ID, "downloadli")))
WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.ID,'downloadli')));
driver.find_element_by_id("downloadli").click()
return
In [ ]:
username = '<username>'
password = '<password>'
keywords = ['nuclear','atomic','uranium','centrifuge']
#Each page has 500 results
no_of_pages = 8
#Page Number
pg_no = 1
compendex(username,password,keywords,no_of_pages,pg_no)