In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup

In [2]:
# setup driver
driver = webdriver.Firefox()

In [13]:
# go to page
driver.get("http://wbsec.gov.in/(S(eoxjutirydhdvx550untivvu))/DetailedResult/Detailed_gp_2013.aspx")

In [14]:
# find "district" drop down
district = driver.find_element_by_name("ddldistrict")

In [15]:
district.click()

In [16]:
# find options in that drop down
district_options = district.find_elements_by_tag_name("option")

In [17]:
district_options[1].text


Out[17]:
u'Bankura'

In [18]:
# save and click on the first option
district_text = district_options[1].text
district_options[1].click()

In [19]:
# find the "block" drop down
block = driver.find_element_by_name("ddlblock")

In [20]:
# get options
block_options = block.find_elements_by_tag_name("option")

In [21]:
# click on the first option
block_options_text = block_options[1].text
block_options[1].click()

In [22]:
# find "gp" dropdown
gp = driver.find_element_by_name("ddlgp")

In [23]:
# get options
gp_options = gp.find_elements_by_tag_name("option")

In [24]:
# click on first option
gp_options_text = gp_options[1].text
gp_options[1].click()

In [26]:
# get the html for the table
table = driver.find_element_by_css_selector("#DataGrid1").get_attribute('innerHTML')

In [27]:
# soup-ify
table = BeautifulSoup(table)

In [28]:
table


Out[28]:
<html><body><tbody><tr style="color:#FFFFCC;background-color:#990000;font-weight:bold;">
<td>Seat Name</td><td>Total Electors</td><td>Votes Polled</td><td>Votes Rejected</td><td>OSN</td><td>Candidate Name</td><td>Party Name</td><td>Votes Secured</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl02_txtseat">I/1</span>
<br/>
<span id="DataGrid1_ctl02_txtReserve" style="color:#993366;">WOMAN</span>
</td><td>
<span id="DataGrid1_ctl02_lblelectors">830</span>
</td><td>
<span id="DataGrid1_ctl02_lblVPolled">646</span>
</td><td>
<span id="DataGrid1_ctl02_lblVRejected">19</span>
</td><td>1</td><td>RITA CHATTOPADHYAY</td><td>IND</td><td>301</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>RINA GHOSH</td><td>BJP</td><td>57</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>3</td><td>RINA PATRA</td><td>AIFB</td><td>269</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl05_txtseat">II/2</span>
<br/>
<span id="DataGrid1_ctl05_txtReserve" style="color:#993366;">SC WOMAN</span>
</td><td>
<span id="DataGrid1_ctl05_lblelectors">659</span>
</td><td>
<span id="DataGrid1_ctl05_lblVPolled">528</span>
</td><td>
<span id="DataGrid1_ctl05_lblVRejected">39</span>
</td><td>1</td><td>ANIMA BAURI</td><td>AITC</td><td>210</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>APARNA MANDAL</td><td>CPIM</td><td>279</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl07_txtseat">III/3</span>
<br/>
<span id="DataGrid1_ctl07_txtReserve" style="color:#993366;">BC</span>
</td><td>
<span id="DataGrid1_ctl07_lblelectors">1066</span>
</td><td>
<span id="DataGrid1_ctl07_lblVPolled">747</span>
</td><td>
<span id="DataGrid1_ctl07_lblVRejected">17</span>
</td><td>1</td><td>ABDUL MOTALEB MOLLA</td><td>AITC</td><td>437</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>MOLLA ABDUL HANNAN</td><td>CPIM</td><td>293</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl09_txtseat">IV/4</span>
<br/>
<span id="DataGrid1_ctl09_txtReserve" style="color:#993366;">BCW</span>
</td><td>
<span id="DataGrid1_ctl09_lblelectors">541</span>
</td><td>
<span id="DataGrid1_ctl09_lblVPolled">415</span>
</td><td>
<span id="DataGrid1_ctl09_lblVRejected">20</span>
</td><td>1</td><td>MANISHA BIBI</td><td>CPIM</td><td>200</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>RUMANA KHATUN HOSEN</td><td>AITC</td><td>195</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl11_txtseat">V/5</span>
<br/>
<span id="DataGrid1_ctl11_txtReserve" style="color:#993366;">GENERAL</span>
</td><td>
<span id="DataGrid1_ctl11_lblelectors">1275</span>
</td><td>
<span id="DataGrid1_ctl11_lblVPolled">854</span>
</td><td>
<span id="DataGrid1_ctl11_lblVRejected">24</span>
</td><td>1</td><td>EFTAKHARUL HAQUE MIDDYA</td><td>IND</td><td>7</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>MD. AMIR HOSSAIN</td><td>IND</td><td>5</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>3</td><td>MD. JAYNAL ABEDIN MOLLA</td><td>AITC</td><td>522</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>4</td><td>MD. MEHERULLA</td><td>IND</td><td>6</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>5</td><td>SK. SHAHAJAHAN ALI</td><td>IND</td><td>290</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl16_txtseat">VI/6</span>
<br/>
<span id="DataGrid1_ctl16_txtReserve" style="color:#993366;">WOMAN</span>
</td><td>
<span id="DataGrid1_ctl16_lblelectors">1303</span>
</td><td>
<span id="DataGrid1_ctl16_lblVPolled">1114</span>
</td><td>
<span id="DataGrid1_ctl16_lblVRejected">53</span>
</td><td>1</td><td>PARBATI GHOSHAL</td><td>BJP</td><td>43</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>BANI CHINA</td><td>CPIM</td><td>616</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>3</td><td>RINKU KAPRI</td><td>AITC</td><td>402</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl19_txtseat">VII/7</span>
<br/>
<span id="DataGrid1_ctl19_txtReserve" style="color:#993366;">SC</span>
</td><td>
<span id="DataGrid1_ctl19_lblelectors">834</span>
</td><td>
<span id="DataGrid1_ctl19_lblVPolled">662</span>
</td><td>
<span id="DataGrid1_ctl19_lblVRejected">21</span>
</td><td>1</td><td>NIMAI KALINDI</td><td>BJP</td><td>134</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>BHAJAN KALINDI</td><td>AITC</td><td>308</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>3</td><td>SUNIL MALAKAR</td><td>CPIM</td><td>199</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl22_txtseat">VIII/8</span>
<br/>
<span id="DataGrid1_ctl22_txtReserve" style="color:#993366;">GENERAL</span>
</td><td>
<span id="DataGrid1_ctl22_lblelectors">525</span>
</td><td>
<span id="DataGrid1_ctl22_lblVPolled">443</span>
</td><td>
<span id="DataGrid1_ctl22_lblVRejected">14</span>
</td><td>1</td><td>DEBASHISH MUKHERJEE</td><td>BJP</td><td>101</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>RANJIT SINGHA</td><td>AITC</td><td>209</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>3</td><td>SWAPAN BANERJEE</td><td>CPIM</td><td>119</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl25_txtseat">IX/9</span>
<br/>
<span id="DataGrid1_ctl25_txtReserve" style="color:#993366;">WOMAN</span>
</td><td>
<span id="DataGrid1_ctl25_lblelectors">975</span>
</td><td>
<span id="DataGrid1_ctl25_lblVPolled">778</span>
</td><td>
<span id="DataGrid1_ctl25_lblVRejected">47</span>
</td><td>1</td><td>ATHA BULA</td><td>CPIM</td><td>345</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>BANDANA CHINA</td><td>AITC</td><td>386</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl27_txtseat">X/10</span>
<br/>
<span id="DataGrid1_ctl27_txtReserve" style="color:#993366;">GENERAL</span>
</td><td>
<span id="DataGrid1_ctl27_lblelectors">843</span>
</td><td>
<span id="DataGrid1_ctl27_lblVPolled">678</span>
</td><td>
<span id="DataGrid1_ctl27_lblVRejected">57</span>
</td><td>1</td><td>RANJIT KARAK</td><td>AITC</td><td>245</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>SAMIR BAURI</td><td>CPIM</td><td>200</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>3</td><td>SUJIT CHAKRABORTY</td><td>INC</td><td>176</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl30_txtseat">XI/11</span>
<br/>
<span id="DataGrid1_ctl30_txtReserve" style="color:#993366;">SC WOMAN</span>
</td><td>
<span id="DataGrid1_ctl30_lblelectors">670</span>
</td><td>
<span id="DataGrid1_ctl30_lblVPolled">535</span>
</td><td>
<span id="DataGrid1_ctl30_lblVRejected">23</span>
</td><td>1</td><td>JHARNA DAS</td><td>AITC</td><td>300</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>RITA MAJUMDER</td><td>CPIM</td><td>212</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl32_txtseat">XII/12</span>
<br/>
<span id="DataGrid1_ctl32_txtReserve" style="color:#993366;">WOMAN</span>
</td><td>
<span id="DataGrid1_ctl32_lblelectors">1181</span>
</td><td>
<span id="DataGrid1_ctl32_lblVPolled">935</span>
</td><td>
<span id="DataGrid1_ctl32_lblVRejected">106</span>
</td><td>1</td><td>MITHU SHIT</td><td>AITC</td><td>354</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>SARAMA PAL</td><td>BJP</td><td>155</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>3</td><td>SUSAMA  DHABAL</td><td>CPI</td><td>320</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl35_txtseat">XIII/13</span>
<br/>
<span id="DataGrid1_ctl35_txtReserve" style="color:#993366;">SC</span>
</td><td>
<span id="DataGrid1_ctl35_lblelectors">1047</span>
</td><td>
<span id="DataGrid1_ctl35_lblVPolled">875</span>
</td><td>
<span id="DataGrid1_ctl35_lblVRejected">19</span>
</td><td>1</td><td>DILIP BAGDI</td><td>BJP</td><td>78</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>RITA BAURI</td><td>AITC</td><td>448</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>3</td><td>SUNIL BAGDI</td><td>CPIM</td><td>330</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl38_txtseat">XIV/14</span>
<br/>
<span id="DataGrid1_ctl38_txtReserve" style="color:#993366;">GENERAL</span>
</td><td>
<span id="DataGrid1_ctl38_lblelectors">1055</span>
</td><td>
<span id="DataGrid1_ctl38_lblVPolled">818</span>
</td><td>
<span id="DataGrid1_ctl38_lblVRejected">47</span>
</td><td>1</td><td>AZAD HOSSAIN MIDYA</td><td>INC</td><td>185</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>ATARUL  HOQUE</td><td>AITC</td><td>378</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>3</td><td>RABIYAL MIDYA</td><td>CPIM</td><td>208</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl41_txtseat">XV/15</span>
<br/>
<span id="DataGrid1_ctl41_txtReserve" style="color:#993366;">GENERAL</span>
</td><td>
<span id="DataGrid1_ctl41_lblelectors">918</span>
</td><td>
<span id="DataGrid1_ctl41_lblVPolled">659</span>
</td><td>
<span id="DataGrid1_ctl41_lblVRejected">19</span>
</td><td>1</td><td>MANNAN KHAN</td><td>CPIM</td><td>341</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>MIDYA  NABIJAN </td><td>AITC</td><td>299</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl43_txtseat">XVI/16</span>
<br/>
<span id="DataGrid1_ctl43_txtReserve" style="color:#993366;">BCW</span>
</td><td>
<span id="DataGrid1_ctl43_lblelectors">937</span>
</td><td>
<span id="DataGrid1_ctl43_lblVPolled">671</span>
</td><td>
<span id="DataGrid1_ctl43_lblVRejected">12</span>
</td><td>1</td><td>FAYJUN  BEGAM</td><td>AITC</td><td>367</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>MUKSEDA BEGAM</td><td>CPIM</td><td>148</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>3</td><td>SUFIA KHATUN</td><td>INC</td><td>144</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<span id="DataGrid1_ctl46_txtseat">XVII/17</span>
<br/>
<span id="DataGrid1_ctl46_txtReserve" style="color:#993366;">BC</span>
</td><td>
<span id="DataGrid1_ctl46_lblelectors">1155</span>
</td><td>
<span id="DataGrid1_ctl46_lblVPolled">870</span>
</td><td>
<span id="DataGrid1_ctl46_lblVRejected">13</span>
</td><td>1</td><td>KHAN RUKUBUDDIN</td><td>AITC</td><td>510</td>
</tr><tr style="color:#330099;background-color:LightCyan;border-color:White;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>2</td><td>MOSIHUR RAHAMAN MALLICK</td><td>INC</td><td>53</td>
</tr><tr style="color:#330099;background-color:LightCyan;font-family:Times New Roman;font-size:Small;">
<td style="font-weight:bold;">
<br/>
</td><td>
</td><td>
</td><td>
</td><td>3</td><td>RABBUL KHAN</td><td>CPIM</td><td>294</td>
</tr>
</tbody></body></html>

In [29]:
# get list of rows
rows = [row for row in table.select("tr")]

In [30]:
#for row in rows:
data = []
for row in rows[1:]:
    dic = {}
    dic['seat'] = row.select('td')[0].text
    dic['electors'] = row.select('td')[1].text
    dic['polled'] = row.select('td')[2].text
    dic['rejected'] = row.select('td')[3].text
    dic['osn'] = row.select('td')[4].text
    dic['candidate'] = row.select('td')[5].text
    dic['party'] = row.select('td')[6].text
    dic['secured'] = row.select('td')[7].text
    data.append(dic)

In [31]:
# strip whitespace
for dic in data:
    for key in dic:
        dic[key] = dic[key].strip()

In [32]:
not data[0]['seat']


Out[32]:
False

In [33]:
#fill out info

i = 0
while i < len(data):
    if data[i]['seat']:
        seat = data[i]['seat']
        electors = data[i]['electors']
        polled = data[i]['polled']
        rejected = data[i]['rejected']
        i = i+1
    else:
        data[i]['seat'] = seat
        data[i]['electors'] = electors
        data[i]['polled'] = polled
        data[i]['rejected'] = rejected
        i = i+1

In [34]:
data[:5]


Out[34]:
[{'candidate': u'RITA CHATTOPADHYAY',
  'electors': u'830',
  'osn': u'1',
  'party': u'IND',
  'polled': u'646',
  'rejected': u'19',
  'seat': u'I/1\n\nWOMAN',
  'secured': u'301'},
 {'candidate': u'RINA GHOSH',
  'electors': u'830',
  'osn': u'2',
  'party': u'BJP',
  'polled': u'646',
  'rejected': u'19',
  'seat': u'I/1\n\nWOMAN',
  'secured': u'57'},
 {'candidate': u'RINA PATRA',
  'electors': u'830',
  'osn': u'3',
  'party': u'AIFB',
  'polled': u'646',
  'rejected': u'19',
  'seat': u'I/1\n\nWOMAN',
  'secured': u'269'},
 {'candidate': u'ANIMA BAURI',
  'electors': u'659',
  'osn': u'1',
  'party': u'AITC',
  'polled': u'528',
  'rejected': u'39',
  'seat': u'II/2\n\nSC WOMAN',
  'secured': u'210'},
 {'candidate': u'APARNA MANDAL',
  'electors': u'659',
  'osn': u'2',
  'party': u'CPIM',
  'polled': u'528',
  'rejected': u'39',
  'seat': u'II/2\n\nSC WOMAN',
  'secured': u'279'}]

In [ ]: