In [1]:
from selenium import webdriver
#import urllib you can use urllib to send web request to websites and get back html text as response
import pandas as pd
from bs4 import BeautifulSoup
from selenium.webdriver.common.keys import Keys
from lxml import html
import numpy
# import dependencies

In [2]:
browser = webdriver.Firefox() #I only tested in firefox
browser.get('http://costcotravel.com/Rental-Cars')
browser.implicitly_wait(5)#wait for webpage download

In [3]:
browser.find_element_by_id('pickupLocationTextWidget').send_keys("PHX");

In [4]:
browser.find_element_by_css_selector('.sayt-result').click()

In [5]:
browser.find_element_by_id("pickupDateWidget").send_keys('08/27/2016')#you can't send it directly, need to clear first

In [6]:
browser.find_element_by_id("pickupDateWidget").clear()

In [7]:
browser.find_element_by_id("pickupDateWidget").send_keys('08/27/2016')

In [8]:
browser.find_element_by_id("dropoffDateWidget").clear()

In [9]:
browser.find_element_by_id("dropoffDateWidget").send_keys('08/31/2016',Keys.RETURN)

In [10]:
browser.find_element_by_css_selector('#pickupTimeWidget option[value="03:00 PM"]').click() #select time

In [11]:
browser.find_element_by_css_selector('#dropoffTimeWidget option[value="03:00 PM"]').click()

In [12]:
browser.find_element_by_link_text('SEARCH').click() #click the red button !!

In [15]:
n = browser.page_source #grab the page source

The follow code is same as before, but you can send the commands all in one go. However, there are implicit wait for the driver so it can do AJAX request and render the page for elements also, you can you find_element_by_xpath method


In [14]:
# browser = webdriver.Firefox() #I only tested in firefox
# browser.get('http://costcotravel.com/Rental-Cars')
# browser.implicitly_wait(5)#wait for webpage download
# browser.find_element_by_id('pickupLocationTextWidget').send_keys("PHX");
# browser.implicitly_wait(5) #wait for the airport suggestion box to show
# browser.find_element_by_xpath('//li[@class="sayt-result"]').click() 
# #click the  airport suggestion box 

# browser.find_element_by_xpath('//input[@id="pickupDateWidget"]').send_keys('08/27/2016')
# browser.find_element_by_xpath('//input[@id="dropoffDateWidget"]').send_keys('08/30/2016',Keys.RETURN)

# browser.find_element_by_xpath('//select[@id="pickupTimeWidget"]/option[@value="09:00 AM"]').click()
# browser.find_element_by_xpath('//select[@id="dropoffTimeWidget"]/option[@value="05:00 PM"]').click()
# browser.implicitly_wait(5) #wait for the clicks to be completed
# browser.find_element_by_link_text('SEARCH').click()
# #click the search box

# time.sleep(8) #wait for firefox to download and render the page
# n = browser.page_source #grab the html source code

In [16]:
type(n) #the site use unicode


Out[16]:
unicode

In [17]:
soup = BeautifulSoup(n,'lxml') #use BeautifulSoup to parse the source

In [18]:
print "--------------first 1000 characters:--------------\n"
print soup.prettify()[:1000]
print "\n--------------last 1000 characters:--------------"
print soup.prettify()[-1000:]


--------------first 1000 characters:--------------

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html style="height:100%;" xmlns="http://www.w3.org/1999/xhtml">
 <head>
  <title>
   Rental Car Low Price Finder at Costco Travel
  </title>
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <meta content="Price all brands in one search with our Low Price Finder. Enter your criteria and we'll shop all coupons, codes and discounts for the lowest prices!" name="description"/>
  <meta content="www.costcotravel.com: Rental Cars,Car Rentals,Low Price Finder,Alamo,Avis,Budget,Enterprise" name="keywords"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="GENERAL" name="rating"/>
  <meta content="" name="robots"/>
  <link href="https://www.costcotravel.com" id="desktop" media="only screen and (min-device-width: 641px)" rel="alternate"/>
  <link href="https://m.costcotravel.com" id="phone" media="only screen and (max-

--------------last 1000 characters:--------------
ltsrcattempted="0" id="_searchingImage" initialized="1" src="https://www.costcotravel.com/shared/images/searching.gif" style="width: 168px; height: 16px;"/>
    </div>
   </div>
  </div>
  <script type="text/javascript">
   loadBackground();
  </script>
  <!--	
	|***** Server Information ******
	|*                              
	|*  Server = 04
	|*  Time = Thursday, April 28, 2016 7:07:09 PM PDT    
	|*            
	|*******************************	
	-->
  <script type="text/javascript">
   DomUtil.createCookie("Csrf-token", "1834581341518e741efdb22652c356e348d3ef52cb012b7bd2c9ba4ac3fd807928305bb8c5a530f6664277773b29801d2516cb01490820cc4ae8f29d0838338a");
			
				UnsupportedBrowserPopup.showUnsupportedBrowserPopup();
				if(!DomUtil.areCookiesEnabled()){
					MessageUtil.showErrorMessage(Navigation.TranslationMessages.COOKIES_DISABLED_MESSAGE, "dataContent");
				}
				
				Navigation.manageDynamicElements();
				Navigation.removeLocaleCodeQueryParameter();
  </script>
 </body>
</html>

In [19]:
table = soup.find('div',{'class':'rentalCarTableDetails'}) #find the table

In [20]:
print "--------------first 1000 characters:--------------\n"
print table.prettify()[:1000]
print "\n--------------last 1000 characters:--------------"
print table.prettify()[-1000:]


--------------first 1000 characters:--------------

<div class="rentalCarTableDetails">
 <table border="0" cellpadding="5" cellspacing="0" class="carMatrixTable" width="100%">
  <tbody>
   <tr>
    <th class="w123 nob tar fs10" rowspan="2">
     Taxes and fees are included in the price
    </th>
    <th class="w141 tac">
     <img alt="Alamo Rent A Car Logo" altsrcattempted="0" initialized="1" src="https://www.costcotravel.com/content/shared/images/logos/car/Alamo_m.gif"/>
    </th>
    <th class="w141 tac">
     <img alt="Avis Rent A Car Logo" altsrcattempted="0" initialized="1" src="https://www.costcotravel.com/content/shared/images/logos/car/Avis_m.gif"/>
    </th>
    <th class="w141 tac">
     <img alt="Budget Rent A Car Logo" altsrcattempted="0" initialized="1" src="https://www.costcotravel.com/content/shared/images/logos/car/Budget_m.gif"/>
    </th>
    <th class="w141 tac">
     <img alt="Enterprise Rent A Car Logo" altsrcattempted="0" initialized="1" src="https://www.costcotravel.com/content/shared/images/logos/car/Enterprise_

--------------last 1000 characters:--------------
850bb:1545fc32ada:28a3', false );">
       $595
      </a>
     </div>
    </td>
    <td class="">
     <div class="carCell">
      <a class="u " href="javascript:;" onclick="javascript:RentalCarMatrix.selectCarCategoryFromGrid( '-604850bb:1545fc32ada:28a3', '-604850bb:1545fc32ada:28ee', '-604850bb:1545fc32ada:28a3', false );">
       $739
      </a>
     </div>
    </td>
    <td class="">
     <div class="carCell">
      <a class="u " href="javascript:;" onclick="javascript:RentalCarMatrix.selectCarCategoryFromGrid( '-604850bb:1545fc32ada:28a3', '-604850bb:1545fc32ada:28ca', '-604850bb:1545fc32ada:28a3', false );">
       $730
      </a>
     </div>
    </td>
    <td class="">
     <div class="carCell">
      <a class="u " href="javascript:;" onclick="javascript:RentalCarMatrix.selectCarCategoryFromGrid( '-604850bb:1545fc32ada:28a3', '-604850bb:1545fc32ada:2d1f', '-604850bb:1545fc32ada:28a3', false );">
       $595
      </a>
     </div>
    </td>
   </tr>
  </tbody>
 </table>
</div>


In [21]:
tr = table.select('tr') #let's look at one of the row

In [22]:
type(tr)


Out[22]:
list

In [23]:
#lets look at first three row
for i in tr[0:3]:
    print i.prettify()
    print "-----------------------------------"


<tr>
 <th class="w123 nob tar fs10" rowspan="2">
  Taxes and fees are included in the price
 </th>
 <th class="w141 tac">
  <img alt="Alamo Rent A Car Logo" altsrcattempted="0" initialized="1" src="https://www.costcotravel.com/content/shared/images/logos/car/Alamo_m.gif"/>
 </th>
 <th class="w141 tac">
  <img alt="Avis Rent A Car Logo" altsrcattempted="0" initialized="1" src="https://www.costcotravel.com/content/shared/images/logos/car/Avis_m.gif"/>
 </th>
 <th class="w141 tac">
  <img alt="Budget Rent A Car Logo" altsrcattempted="0" initialized="1" src="https://www.costcotravel.com/content/shared/images/logos/car/Budget_m.gif"/>
 </th>
 <th class="w141 tac">
  <img alt="Enterprise Rent A Car Logo" altsrcattempted="0" initialized="1" src="https://www.costcotravel.com/content/shared/images/logos/car/Enterprise_m.gif"/>
 </th>
</tr>

-----------------------------------
<tr>
 <th class="nob tac">
  Phx Sky Harbor Intl Arpt
  <br/>
  Shuttle
 </th>
 <th class="nob tac">
  Phoenix Sky Harbor Airport
  <br/>
  Shuttle
 </th>
 <th class="nob tac">
  Sky Harbor Intl Airport
  <br/>
  Shuttle
 </th>
 <th class="nob tac">
  Phx Sky Harbor Intl Arpt
  <br/>
  Shuttle
 </th>
</tr>

-----------------------------------
<tr>
 <th class="w123 nob tar fs10">
  Location Hours
 </th>
 <th class="nob tac">
  24 Hours
 </th>
 <th class="nob tac">
  24 Hours
 </th>
 <th class="nob tac">
  24 Hours
 </th>
 <th class="nob tac">
  24 Hours
 </th>
</tr>

-----------------------------------

let play with one of the row


In [24]:
row = tr[3]

In [25]:
row.find('th',{'class':'tar'}).text.encode('utf-8')


Out[25]:
'Economy Car'

In [26]:
row


Out[26]:
<tr><th class="tar">Economy Car</th><td class=""><div class="carCell"><a class="u linkredU" href="javascript:;" onclick="javascript:RentalCarMatrix.selectCarCategoryFromGrid( '-604850bb:1545fc32ada:28a3', '-604850bb:1545fc32ada:2d00', '-604850bb:1545fc32ada:28a3', false );">$159</a></div></td><td class=""><div class="carCell"><a class="u " href="javascript:;" onclick="javascript:RentalCarMatrix.selectCarCategoryFromGrid( '-604850bb:1545fc32ada:28a3', '-604850bb:1545fc32ada:28d1', '-604850bb:1545fc32ada:28a3', false );">$515</a></div></td><td class=""><div class="carCell"><a class="u " href="javascript:;" onclick="javascript:RentalCarMatrix.selectCarCategoryFromGrid( '-604850bb:1545fc32ada:28a3', '-604850bb:1545fc32ada:28b7', '-604850bb:1545fc32ada:28a3', false );">$364</a></div></td><td class=""><div class="carCell"><a class="u " href="javascript:;" onclick="javascript:RentalCarMatrix.selectCarCategoryFromGrid( '-604850bb:1545fc32ada:28a3', '-604850bb:1545fc32ada:2d25', '-604850bb:1545fc32ada:28a3', false );">$173</a></div></td></tr>

In [27]:
row.contents[4].text #1. this is unicode, 2. the dollar sign is in the way


Out[27]:
u'$173'

In [28]:
'Car' in 'Econ Car' #use this string logic to filter out unwanted data


Out[28]:
True

In [29]:
rows = [i for i in tr if (('Price' not in i.contents[0].text and  'Fees' not in i.contents[0].text and 'Location' not in i.contents[0].text and i.contents[0].text !='') and len(i.contents[0].text)<30)]
# use this crazy list comprehension to get the data we want 
#1. don't want the text 'Price' in the first column
#2. don't want the text 'Fee' in the first column
#3. don't want the text 'Location' in the first column
#4. the text length of first column must be less than 30 characters long

In [30]:
rows[0].contents[0].text #just exploring here...


Out[30]:
u'Economy Car'

In [31]:
rows[0].contents[4].text #need to get rid of the $....


Out[31]:
u'$173'

In [32]:
rows[3].contents[0].text #need to make it utf-8


Out[32]:
u'Standard Car'

In [33]:
#process the data
prices = {} 
for i in rows:
    #print the 1st column text
    print i.contents[0].text.encode('utf-8')
    prices[i.contents[0].text.encode('utf-8')] = [i.contents[1].text.encode('utf-8'),i.contents[2].text.encode('utf-8'), i.contents[3].text.encode('utf-8'),i.contents[4].text.encode('utf-8')]


Economy Car
Compact Car
Intermediate Car
Standard Car
Fullsize Car
Premium Car
Intermediate SUV
Standard SUV
Premium Crossover
Mini Van
Standard Van
Fullsize SUV
Standard Convertible
Standard Specialty
Fullsize Specialty
Premium Specialty
Luxury Specialty
Luxury Car
Premium SUV
Luxury SUV
Fullsize Van

In [34]:
prices


Out[34]:
{'Compact Car': ['$164', '$459', '$364', '$178'],
 'Economy Car': ['$159', '$515', '$364', '$173'],
 'Fullsize Car': ['$175', '$528', '$378', '$175'],
 'Fullsize SUV': ['$383', '$735', '$610', '$383'],
 'Fullsize Specialty': ['Not Available',
  'Not Available',
  'Not Available',
  '$1,025'],
 'Fullsize Van': ['$595', '$739', '$730', '$595'],
 'Intermediate Car': ['$166.77', '$473', '$373', '$180'],
 'Intermediate SUV': ['$416', '$570', '$486', '$416'],
 'Luxury Car': ['$361', '$652', '$566', '$361'],
 'Luxury SUV': ['$590', 'Not Available', 'Not Available', '$590'],
 'Luxury Specialty': ['Not Available',
  'Not Available',
  'Not Available',
  '$1,220'],
 'Mini Van': ['$285', '$556', '$548', '$291'],
 'Premium Car': ['$335', '$625', '$495', '$335'],
 'Premium Crossover': ['Not Available',
  'Not Available',
  'Not Available',
  '$383'],
 'Premium SUV': ['$472', '$905', '$730', '$472'],
 'Premium Specialty': ['Not Available',
  'Not Available',
  'Not Available',
  '$1,025'],
 'Standard Car': ['$164', '$528', '$376', '$164'],
 'Standard Convertible': ['$489', '$712', '$612', '$489'],
 'Standard SUV': ['$306', '$611', '$475', '$306'],
 'Standard Specialty': ['$188', 'Not Available', 'Not Available', '$188'],
 'Standard Van': ['$452', 'Not Available', 'Not Available', 'Not Available']}

In [35]:
iteritems = prices.iteritems() 
#call .iteritems() on a dictionary will give you a generator which you can iter over

In [36]:
iteritems.next() #run me five times


Out[36]:
('Standard Car', ['$164', '$528', '$376', '$164'])

In [37]:
for name, priceList in prices.iteritems():
    newPriceList = []
    for i in priceList:
        newPriceList.append(i.replace('$',''))
    prices[name] = newPriceList

In [38]:
prices


Out[38]:
{'Compact Car': ['164', '459', '364', '178'],
 'Economy Car': ['159', '515', '364', '173'],
 'Fullsize Car': ['175', '528', '378', '175'],
 'Fullsize SUV': ['383', '735', '610', '383'],
 'Fullsize Specialty': ['Not Available',
  'Not Available',
  'Not Available',
  '1,025'],
 'Fullsize Van': ['595', '739', '730', '595'],
 'Intermediate Car': ['166.77', '473', '373', '180'],
 'Intermediate SUV': ['416', '570', '486', '416'],
 'Luxury Car': ['361', '652', '566', '361'],
 'Luxury SUV': ['590', 'Not Available', 'Not Available', '590'],
 'Luxury Specialty': ['Not Available',
  'Not Available',
  'Not Available',
  '1,220'],
 'Mini Van': ['285', '556', '548', '291'],
 'Premium Car': ['335', '625', '495', '335'],
 'Premium Crossover': ['Not Available',
  'Not Available',
  'Not Available',
  '383'],
 'Premium SUV': ['472', '905', '730', '472'],
 'Premium Specialty': ['Not Available',
  'Not Available',
  'Not Available',
  '1,025'],
 'Standard Car': ['164', '528', '376', '164'],
 'Standard Convertible': ['489', '712', '612', '489'],
 'Standard SUV': ['306', '611', '475', '306'],
 'Standard Specialty': ['188', 'Not Available', 'Not Available', '188'],
 'Standard Van': ['452', 'Not Available', 'Not Available', 'Not Available']}

In [39]:
data = pd.DataFrame.from_dict(prices, orient='index') #get a pandas DataFrame from the prices dictionary

In [40]:
data


Out[40]:
0 1 2 3
Standard Car 164 528 376 164
Premium Car 335 625 495 335
Standard Convertible 489 712 612 489
Compact Car 164 459 364 178
Fullsize Van 595 739 730 595
Economy Car 159 515 364 173
Fullsize SUV 383 735 610 383
Luxury SUV 590 Not Available Not Available 590
Standard Van 452 Not Available Not Available Not Available
Fullsize Car 175 528 378 175
Intermediate SUV 416 570 486 416
Fullsize Specialty Not Available Not Available Not Available 1,025
Luxury Car 361 652 566 361
Intermediate Car 166.77 473 373 180
Mini Van 285 556 548 291
Standard SUV 306 611 475 306
Premium Crossover Not Available Not Available Not Available 383
Premium SUV 472 905 730 472
Luxury Specialty Not Available Not Available Not Available 1,220
Premium Specialty Not Available Not Available Not Available 1,025
Standard Specialty 188 Not Available Not Available 188

In [41]:
data = data.replace('Not Available', numpy.nan) #replace the 'Not Available' data point to numpy.nan

In [42]:
data = pd.to_numeric(data, errors='coerce') #cast to numeric data

In [43]:
data


Out[43]:
0 1 2 3
Standard Car 164 528 376 164
Premium Car 335 625 495 335
Standard Convertible 489 712 612 489
Compact Car 164 459 364 178
Fullsize Van 595 739 730 595
Economy Car 159 515 364 173
Fullsize SUV 383 735 610 383
Luxury SUV 590 NaN NaN 590
Standard Van 452 NaN NaN NaN
Fullsize Car 175 528 378 175
Intermediate SUV 416 570 486 416
Fullsize Specialty NaN NaN NaN 1,025
Luxury Car 361 652 566 361
Intermediate Car 166.77 473 373 180
Mini Van 285 556 548 291
Standard SUV 306 611 475 306
Premium Crossover NaN NaN NaN 383
Premium SUV 472 905 730 472
Luxury Specialty NaN NaN NaN 1,220
Premium Specialty NaN NaN NaN 1,025
Standard Specialty 188 NaN NaN 188

In [44]:
data.columns= ['Alamo','Avis','Budget','Enterprise'] #set column names

In [45]:
data


Out[45]:
Alamo Avis Budget Enterprise
Standard Car 164 528 376 164
Premium Car 335 625 495 335
Standard Convertible 489 712 612 489
Compact Car 164 459 364 178
Fullsize Van 595 739 730 595
Economy Car 159 515 364 173
Fullsize SUV 383 735 610 383
Luxury SUV 590 NaN NaN 590
Standard Van 452 NaN NaN NaN
Fullsize Car 175 528 378 175
Intermediate SUV 416 570 486 416
Fullsize Specialty NaN NaN NaN 1,025
Luxury Car 361 652 566 361
Intermediate Car 166.77 473 373 180
Mini Van 285 556 548 291
Standard SUV 306 611 475 306
Premium Crossover NaN NaN NaN 383
Premium SUV 472 905 730 472
Luxury Specialty NaN NaN NaN 1,220
Premium Specialty NaN NaN NaN 1,025
Standard Specialty 188 NaN NaN 188

In [46]:
data.notnull() #check for missing data


Out[46]:
Alamo Avis Budget Enterprise
Standard Car True True True True
Premium Car True True True True
Standard Convertible True True True True
Compact Car True True True True
Fullsize Van True True True True
Economy Car True True True True
Fullsize SUV True True True True
Luxury SUV True False False True
Standard Van True False False False
Fullsize Car True True True True
Intermediate SUV True True True True
Fullsize Specialty False False False True
Luxury Car True True True True
Intermediate Car True True True True
Mini Van True True True True
Standard SUV True True True True
Premium Crossover False False False True
Premium SUV True True True True
Luxury Specialty False False False True
Premium Specialty False False False True
Standard Specialty True False False True

In [47]:
data.min(axis=1, skipna=True) #look at the cheapest car in each class


Out[47]:
Standard Car            164.000000
Premium Car             335.000000
Standard Convertible    489.000000
Compact Car             164.000000
Fullsize Van            595.000000
Economy Car             159.000000
Fullsize SUV            383.000000
Luxury SUV                     inf
Standard Van                   inf
Fullsize Car            175.000000
Intermediate SUV        416.000000
Fullsize Specialty             inf
Luxury Car              361.000000
Intermediate Car        166.770000
Mini Van                285.000000
Standard SUV            306.000000
Premium Crossover              inf
Premium SUV             472.000000
Luxury Specialty               inf
Premium Specialty              inf
Standard Specialty             inf
dtype: float64

From this point on, you can set up to run every night and email yourself results etc.


In [ ]: