In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
In [2]:
# setup driver
driver = webdriver.Firefox()
In [13]:
# go to page
driver.get("http://wbsec.gov.in/(S(eoxjutirydhdvx550untivvu))/DetailedResult/Detailed_gp_2013.aspx")
In [14]:
# find "district" drop down
district = driver.find_element_by_name("ddldistrict")
In [15]:
district.click()
In [16]:
# find options in that drop down
district_options = district.find_elements_by_tag_name("option")
In [17]:
district_options[1].text
Out[17]:
In [18]:
# save and click on the first option
district_text = district_options[1].text
district_options[1].click()
In [19]:
# find the "block" drop down
block = driver.find_element_by_name("ddlblock")
In [20]:
# get options
block_options = block.find_elements_by_tag_name("option")
In [21]:
# click on the first option
block_options_text = block_options[1].text
block_options[1].click()
In [22]:
# find "gp" dropdown
gp = driver.find_element_by_name("ddlgp")
In [23]:
# get options
gp_options = gp.find_elements_by_tag_name("option")
In [24]:
# click on first option
gp_options_text = gp_options[1].text
gp_options[1].click()
In [26]:
# get the html for the table
table = driver.find_element_by_css_selector("#DataGrid1").get_attribute('innerHTML')
In [27]:
# soup-ify
table = BeautifulSoup(table)
In [28]:
table
Out[28]:
In [29]:
# get list of rows
rows = [row for row in table.select("tr")]
In [30]:
#for row in rows:
data = []
for row in rows[1:]:
dic = {}
dic['seat'] = row.select('td')[0].text
dic['electors'] = row.select('td')[1].text
dic['polled'] = row.select('td')[2].text
dic['rejected'] = row.select('td')[3].text
dic['osn'] = row.select('td')[4].text
dic['candidate'] = row.select('td')[5].text
dic['party'] = row.select('td')[6].text
dic['secured'] = row.select('td')[7].text
data.append(dic)
In [31]:
# strip whitespace
for dic in data:
for key in dic:
dic[key] = dic[key].strip()
In [32]:
not data[0]['seat']
Out[32]:
In [33]:
#fill out info
i = 0
while i < len(data):
if data[i]['seat']:
seat = data[i]['seat']
electors = data[i]['electors']
polled = data[i]['polled']
rejected = data[i]['rejected']
i = i+1
else:
data[i]['seat'] = seat
data[i]['electors'] = electors
data[i]['polled'] = polled
data[i]['rejected'] = rejected
i = i+1
In [34]:
data[:5]
Out[34]:
In [ ]: