In [ ]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
The scripts are divided into 5 functions:
In [ ]:
def wos_login(driver,username,password):
"""
Function to login into WOS
"""
#Input Username
username_textbox = driver.find_element_by_name('username')
username_textbox.send_keys(username)
#Input Password
password_textbox = driver.find_element_by_name('password')
password_textbox.send_keys(password)
#Click on Sign In
submit_button = driver.find_element_by_name('image').click()
return
In [ ]:
def keyword_search(driver,keywords):
"""
Function to input given keywords to search bar //*[@id="select2-value(bool_1_2)-result-mnjh-OR"]
"""
a = 14;
for i in range(len(keywords)):
#Input keyword
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="value(input%d)"]'%(i+1))))
driver.find_element_by_xpath('//*[@id="value(input%d)"]'%(i+1)).send_keys(keywords[i])
#Add new Search Row
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="addSearchRow%d"]/span[1]'%(i+1))))
driver.find_element_by_xpath('//*[@id="addSearchRow%d"]/span[1]'%(i+1)).click()
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="select2-value(bool_%d_%d)-container"]'%(i+1,i+2))))
driver.find_element_by_xpath('//*[@id="select2-value(bool_%d_%d)-container"]'%(i+1,i+2)).click()
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="select2-value(bool_%d_%d)-result-7put-OR"]'%(i+1,i+2))))
driver.find_element_by_xpath('//*[@id="select2-value(bool_%d_%d)-result-7put-OR"]'%(i+1,i+2)).click()
a=a+5
return
In [ ]:
def select_year(driver,year):
"""
Function to select year
"""
driver.find_element_by_xpath('//*[@id="periodRange"]').click()
a1=101
a2=18
b=year-2000
#Start year
#Opening dropdown of starting year
driver.find_element_by_xpath('//*[@id="s2id_autogen4"]/a').click()
#Select year from dropdown
a=a1+b
driver.find_element_by_xpath('//*[@id="select2-results-5"]/li[%d]'%a).click()
#Opening dropdown of ending year
driver.find_element_by_xpath('//*[@id="s2id_autogen6"]/a').click()
#Select year from dropdown
a=a2-b
driver.find_element_by_xpath('//*[@id="select2-results-7"]/li[%d]'%a).click()
return
In [ ]:
def download_page(driver,page_no,file_format):
"""
Function to download page results
"""
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="summary_navigation"]/table/tbody/tr/td[2]/input')))
page=driver.find_element_by_xpath('//*[@id="summary_navigation"]/table/tbody/tr/td[2]/input')
page.clear()
page.send_keys(page_no)
page.send_keys(Keys.RETURN)
#Open dropdown menu to save
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="s2id_saveToMenu"]/a/span[2]/b')))
driver.find_element_by_xpath('//*[@id="s2id_saveToMenu"]/a/span[2]/b').click()
#Click to other file formats
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="select2-results-1"]/li[5]')))
driver.find_element_by_xpath('//*[@id="select2-results-1"]/li[5]').click()
#Full Record
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.ID,'bib_fields')))
driver.find_element_by_id('bib_fields').click()
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.ID,'bib_fields:fullrec_fields_option')))
driver.find_element_by_id('bib_fields:fullrec_fields_option').click()
#Choose Plain Text format
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.ID,'saveOptions')))
dropdown = driver.find_element_by_id('saveOptions')
Select(dropdown).select_by_visible_text(file_format)
#Click on send button
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="ui-id-7"]/form/div[4]/span/input')))
driver.find_element_by_xpath('//*[@id="ui-id-7"]/form/div[4]/span/input').click()
#Click on close button after download
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="ui-id-7"]/form/div[2]/a')))
driver.find_element_by_xpath('//*[@id="ui-id-7"]/form/div[2]/a').click()
return
In [ ]:
def webofscience(username,password,keywords,year,pages,file_format):
"""
Function to download results from Compendex
"""
#Initializing driver
driver = webdriver.Chrome()
driver.get("http://login.webofknowledge.com")
#WOS Login
wos_login(driver,username,password)
#Add Keywords
keyword_search(driver,keywords)
#Add Year
select_year(driver,year)
#Click on search
a=9
b=8-len(keywords)
a=a-b
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="searchCell%d"]'%a)))
driver.find_element_by_xpath('//*[@id="searchCell%d"]'%a).click()
#Increase page size to 50 results per page
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="s2id_selectPageSize_.bottom"]/a')))
driver.find_element_by_xpath('//*[@id="s2id_selectPageSize_.bottom"]/a').click()
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="select2-results-5"]/li[3]')))
driver.find_element_by_xpath('//*[@id="select2-results-5"]/li[3]').click()
for i in pages:
download_page(driver,i,file_format)
return
In [ ]:
#Registered User Email Address
username = '<Email Address>'
#Registered User Password
password = '<Password>'
#List of keywords
keywords = ['keyword 1','keyword 2']
#Year
year = 2016
#Pages to download (you can use range function here)
pages = [1]
#File format
#It can be 'BibTex', 'HTML', 'Plain Text', 'Tab-delimited (Win)', 'Tab-delimited (Mac)',
#'Tab-delimited (Win, UTF-8)' and 'Tab-delimited (Win, UTF-8)'
file_format = 'Plain Text'
#Download Results
webofscience(username,password,keywords,year,pages,file_format)