ta_qt_scraper

Code describing the scraping of the tripadvisor site.


In [1]:
import sys  
from PyQt4.QtGui import *  
from PyQt4.QtCore import *  
from PyQt4.QtWebKit import *  
from lxml import html 
import re
import time
import timeout_decorator

In [2]:
class Render(QWebPage):  
    def __init__(self, url):  
        self.app = QApplication(sys.argv)  
        QWebPage.__init__(self)  
        self.loadFinished.connect(self._loadFinished)  
        self.mainFrame().load(QUrl(url))  
        self.app.exec_()  

    def _loadFinished(self, result):  
        self.frame = self.mainFrame()  
        self.app.quit() 
    
    def update_url(self, url):
        self.mainFrame().load(QUrl(url))
        self.app.exec_()

In [3]:
url="http://www.tripadvisor.com/Hotels-g33851-New_Haven_Connecticut-Hotels.html"

In [4]:
#Create the render object:
r = Render(url)

#result is a QString:
result = r.frame.toHtml()

#QString should be converted to string before processed by lxml
formatted_result = str(result.toAscii())

#Next build lxml tree from formatted_result
tree = html.fromstring(formatted_result)

In [5]:
len(tree)


Out[5]:
2

In [6]:
#Now using correct Xpath we are fetching URL of archives
hotels_div = tree.xpath('//*[@id="ACCOM_OVERVIEW"]/div/div/div/div')
titles_div = tree.xpath('//*[@class="listing_title"]')

In [7]:
hotels_div


Out[7]:
[<Element div at 0x10adfaba8>,
 <Element div at 0x10adfac00>,
 <Element div at 0x10adfac58>,
 <Element div at 0x10adfacb0>,
 <Element div at 0x10adfad08>,
 <Element div at 0x10adfad60>,
 <Element div at 0x10adfadb8>,
 <Element div at 0x10adfae10>,
 <Element div at 0x10adfae68>,
 <Element div at 0x10adfaec0>,
 <Element div at 0x10adfaf18>,
 <Element div at 0x10adfaf70>,
 <Element div at 0x10adfafc8>,
 <Element div at 0x10ae24050>,
 <Element div at 0x10ae240a8>,
 <Element div at 0x10ae24100>,
 <Element div at 0x10ae24158>]

In [8]:
titles_div


Out[8]:
[<Element div at 0x10ae241b0>,
 <Element div at 0x10ae24208>,
 <Element div at 0x10ae24260>,
 <Element div at 0x10ae242b8>,
 <Element div at 0x10ae24310>,
 <Element div at 0x10ae24368>,
 <Element div at 0x10ae243c0>,
 <Element div at 0x10ae24418>,
 <Element div at 0x10ae24470>,
 <Element div at 0x10ae244c8>,
 <Element div at 0x10ae24520>,
 <Element div at 0x10ae24578>,
 <Element div at 0x10ae245d0>]

In [9]:
r.app.quit()

In [9]:
r.app.exit()

In [10]:
r = 0

In [11]:
r


Out[11]:
0

In [12]:
url


Out[12]:
'http://www.tripadvisor.com/Hotels-g33851-New_Haven_Connecticut-Hotels.html'

In [ ]:
s = Render(url)

In [9]:
hotel_names = []
links = []

for title in titles_div:
    print(title.text_content())
    hotel_names.append(title.text_content())
    print(title.xpath('a/@href')[0])
    links.append(title.xpath('a/@href')[0])
    print('*'*50)


Omni New Haven Hotel at YaleSpecial Offer12% Off or Free Parking! 
/Hotel_Review-g33851-d119851-Reviews-Omni_New_Haven_Hotel_at_Yale-New_Haven_Connecticut.html
**************************************************
The Study at Yale
/Hotel_Review-g33851-d1217689-Reviews-The_Study_at_Yale-New_Haven_Connecticut.html
**************************************************
Omni New Haven Hotel at YaleSpecial Offer12% Off or Free Parking! 
/Hotel_Review-g33851-d119851-Reviews-Omni_New_Haven_Hotel_at_Yale-New_Haven_Connecticut.html
**************************************************
New Haven Hotel
/Hotel_Review-g33851-d85912-Reviews-New_Haven_Hotel-New_Haven_Connecticut.html
**************************************************
Courtyard New Haven at YaleSpecial OfferDouble Rewards Points! 
/Hotel_Review-g33851-d83697-Reviews-Courtyard_New_Haven_at_Yale-New_Haven_Connecticut.html
**************************************************
New Haven Premiere Hotel and Suites
/Hotel_Review-g33851-d85908-Reviews-New_Haven_Premiere_Hotel_and_Suites-New_Haven_Connecticut.html
**************************************************
Duncan Hotel
/Hotel_Review-g33851-d119885-Reviews-Duncan_Hotel-New_Haven_Connecticut.html
**************************************************
La Quinta Inn & Suites New HavenSpecial OfferSave up to 25% 
/Hotel_Review-g33851-d85971-Reviews-La_Quinta_Inn_Suites_New_Haven-New_Haven_Connecticut.html
**************************************************
Days Inn New HavenSpecial OfferPlan Ahead & Save 15% 
/Hotel_Review-g33851-d225916-Reviews-Days_Inn_New_Haven-New_Haven_Connecticut.html
**************************************************
Atlantic Motel
/Hotel_Review-g33851-d119904-Reviews-Atlantic_Motel-New_Haven_Connecticut.html
**************************************************
Regal Inn
/Hotel_Review-g33851-d83702-Reviews-Regal_Inn-New_Haven_Connecticut.html
**************************************************
New Haven Inn
/Hotel_Review-g33851-d83701-Reviews-New_Haven_Inn-New_Haven_Connecticut.html
**************************************************
Park Plaza Hotel
/Hotel_Review-g33851-d269024-Reviews-Park_Plaza_Hotel-New_Haven_Connecticut.html
**************************************************

In [10]:
links


Out[10]:
['/Hotel_Review-g33851-d119851-Reviews-Omni_New_Haven_Hotel_at_Yale-New_Haven_Connecticut.html',
 '/Hotel_Review-g33851-d1217689-Reviews-The_Study_at_Yale-New_Haven_Connecticut.html',
 '/Hotel_Review-g33851-d119851-Reviews-Omni_New_Haven_Hotel_at_Yale-New_Haven_Connecticut.html',
 '/Hotel_Review-g33851-d85912-Reviews-New_Haven_Hotel-New_Haven_Connecticut.html',
 '/Hotel_Review-g33851-d83697-Reviews-Courtyard_New_Haven_at_Yale-New_Haven_Connecticut.html',
 '/Hotel_Review-g33851-d85908-Reviews-New_Haven_Premiere_Hotel_and_Suites-New_Haven_Connecticut.html',
 '/Hotel_Review-g33851-d119885-Reviews-Duncan_Hotel-New_Haven_Connecticut.html',
 '/Hotel_Review-g33851-d85971-Reviews-La_Quinta_Inn_Suites_New_Haven-New_Haven_Connecticut.html',
 '/Hotel_Review-g33851-d225916-Reviews-Days_Inn_New_Haven-New_Haven_Connecticut.html',
 '/Hotel_Review-g33851-d119904-Reviews-Atlantic_Motel-New_Haven_Connecticut.html',
 '/Hotel_Review-g33851-d83702-Reviews-Regal_Inn-New_Haven_Connecticut.html',
 '/Hotel_Review-g33851-d83701-Reviews-New_Haven_Inn-New_Haven_Connecticut.html',
 '/Hotel_Review-g33851-d269024-Reviews-Park_Plaza_Hotel-New_Haven_Connecticut.html']

Now to scrape the reviews from a specific hotel


In [11]:
base_url = 'http://www.tripadvisor.com'
url = base_url+links[0]
url


Out[11]:
'http://www.tripadvisor.com/Hotel_Review-g33851-d119851-Reviews-Omni_New_Haven_Hotel_at_Yale-New_Haven_Connecticut.html'

In [12]:
#Create the render object:
r.update_url(url)

#result is a QString:
result = r.frame.toHtml()

#QString should be converted to string before processed by lxml
formatted_result = str(result.toAscii())

#Next build lxml tree from formatted_result
tree = html.fromstring(formatted_result)

In [13]:
# example of how to access specific divs within divs
# sel.xpath('//div[@id="REVIEWS"]/div/div[contains(@class, "review")]/div[@class="col2of2"]/div[@class="innerBubble"]')

In [14]:
full_reviews = tree.xpath('//div[contains(@class, "reviewSelector")]')
# users = tree.xpath('//div[@class="col1of2"]')

In [15]:
full_reviews


Out[15]:
[<Element div at 0x109ed9158>,
 <Element div at 0x109ed91b0>,
 <Element div at 0x109ed9208>,
 <Element div at 0x109ed9260>,
 <Element div at 0x109ed92b8>,
 <Element div at 0x109ed9310>,
 <Element div at 0x109ed9368>,
 <Element div at 0x109ed93c0>,
 <Element div at 0x109ed9418>,
 <Element div at 0x109ed9470>]

In [15]:
for fullrev in full_reviews:
    # user name:
    member_info = fullrev.xpath('div/div[contains(@class, "col1of2")]/div[contains(@class, "member_info")]')[0]
    usrnm = member_info.xpath('div/div[contains(@class, "username mo")]')
    review = fullrev.xpath('div/div[@class="col2of2"]/div[@class="innerBubble"]')[0]
    title = review.xpath('div/div[contains(@class, "quote")]')[0].text_content().strip()[1:-1]
    rating = review.xpath('div/div[contains(@class, "rating")]/span/img/@alt')[0].split(' ')[0]
    date = review.xpath('div/div[contains(@class, "rating")]/span[contains(@class, "ratingDate")]/@title')[0]
    rev = review.xpath('div/div[contains(@class, "entry")]')[0].text_content().strip().replace("\n", "")
    if len(usrnm) > 0:
        #location = member_info.xpath('div[1]')[0].text_content()
        print('Username: {}'.format(str(usrnm[0].text_content()).strip()))
    else:
        print('Username: A Trip Advisor Member')
    
    location = member_info.xpath('div[contains(@class, "location")]')
    if len(location) > 0:
        print('Location: {}'.format(str(location[0].text_content()).strip()))
    else:
        print('Location: ')
    
    print('full review_id: {}'.format(fullrev.xpath('@id')[0]))
    try:
        rev_id = re.search('review_(\d+)$', fullrev.xpath('@id')[0]).group(1)
    except AttributeError:
        rev_id = ''

    print('review_id: {}'.format(rev_id))
    print('Title: {}'.format(title))
    print('Rating: {}'.format(rating))
    print('Date: {}'.format(date))
    print('Review:')
    print(rev)
    print('*'*50)


Username: Katelyn F
Location: Long Beach Island, New Jersey
full review_id: review_310707970
review_id: 310707970
Title: Perfect location.
Rating: 5
Date: September 15, 2015
Review:
This was the first stop on our honeymoon road trip, and it was absolutely perfect. You can't beat the location next to Yale and right downtown. Our room was super comfortable. The staff was extremely accommodating and really helped us out when we accidentally we left something important in the room after checkout.
**************************************************
Username: SweetlifeChicago
Location: Chicago
full review_id: review_310654721
review_id: 310654721
Title: Conveniently located
Rating: 3
Date: September 15, 2015
Review:
Very convenient location, walking distance to Yale, restaurants, etc. Rooms are OK and clean but felt outdated.Small fridge and coffee maker below TV. Check-in/check-out very fast. Had a view on parking and port. 15th floor was very quiet, didn't notice any noise from the street. The breakfast dining room offers a nice view on Yale Campus. Buffet and a la...More
**************************************************
Username: Oswaldo A
Location: Shelton, Connecticut
full review_id: review_309666719
review_id: 309666719
Title: Best option in new haven
Rating: 5
Date: September 12, 2015
Review:
Excellent location, walking distance to everything, friendly and professional team and comfortable rooms , have a nice restaurant in the rooftop with amazing view over new haven Realy enjoy a time in this hotel
**************************************************
Username: SLR879
Location: San Juan, null, Puerto Rico
full review_id: review_309287783
review_id: 309287783
Title: Good for a short stay
Rating: 4
Date: September 11, 2015
Review:
Arrived very late at night but was welcomed by very nice people there. The rooms were perfect for my stay and when I told them I was going for an interview, they gave me a special rate. They provide with restaurants nearby, but their restaurant only provides with breakfast and no room service was offered. Overall, it was good!
**************************************************
Username: Valerie P
Location: Neuilly-sur-Seine, France
full review_id: review_309145073
review_id: 309145073
Title: Perfect stay
Rating: 5
Date: September 10, 2015
Review:
The hotel is conveniently located, very close to Yale University and to Chapel , College streeets on which lots of restaurants can be found+ on Temple Street too. We had asked for a Yale view room, upper floors and we got a King on the 16th floor with a beautiful view, the bed and pillows are superbly comfortable and the...More
**************************************************
Username: tibird77
Location: Arlington, Massachusetts
full review_id: review_308834381
review_id: 308834381
Title: Pet friendly, clean and convenient
Rating: 5
Date: September 9, 2015
Review:
I was in town for a conference, which was held at the Omni, so that is where we stayed. I loved that it was dog-friendly, with a $50 fee per stay. I travel often with my dog & you never know what you're gonna get with pet friendly, but this was a clean, comfortable room. All staff was very friendly...More
**************************************************
Username: michaelsU5133PY
Location: 
full review_id: review_308413873
review_id: 308413873
Title: Nice hotel in a great location
Rating: 4
Date: September 8, 2015
Review:
The Omni New Haven is a full service hotel right in the heart of downtown New Haven. All the usual amenities, friendly staff and comfortable rooms. Step outside the front door and you have tons of restaurants, bars and coffee shops to choose from. Ask for a higher floor for less street noise.
**************************************************
Username: Tami T
Location: Ashley, United States
full review_id: review_308413858
review_id: 308413858
Title: New haven 20k
Rating: 5
Date: September 8, 2015
Review:
The hotel was a perfect location for the race. The room was good and a fair price. Would stay there again. There are enough restaurants near the hotel if you didn't like the restaurant in the hotel. The staff was friendly.
**************************************************
Username: cdecdd28
Location: Thetford Mines, Canada
full review_id: review_307358310
review_id: 307358310
Title: MPC meet and train
Rating: 4
Date: September 5, 2015
Review:
Spent a week with fellow MPC's from Canada and USA. To train and updates on juridistion from division 1 2 and 3. This is the Supreme office hotel where we always go. Self or valet parking. Restaurant at the 19 floor is expensive. Personnel both at front desk, porters are eager to serve. Fitness room is small. Go outside for...More
**************************************************
Username: Sandra D
Location: 
full review_id: review_307025158
review_id: 307025158
Title: Beautiful View of Yale
Rating: 4
Date: September 4, 2015
Review:
Our suite, overlooking the Yale campus, was well appointed and comfortable. The view at night was worth the pricey room. One concern we had was that when we decided to stroll through the park during the early evening hours, a police officer suggested we may be better off watching a movie in our room.The hotel dining experience was great....More
**************************************************

In [16]:
len(tree.xpath('//a[contains(@class, "next")]'))
tree.xpath('//a[contains(@class, "next")]/@href')[0]


Out[16]:
'/Hotel_Review-g33851-d119851-Reviews-or10-Omni_New_Haven_Hotel_at_Yale-New_Haven_Connecticut.html#REVIEWS'

Now scraping all pages of the reviews:


In [17]:
@timeout_decorator.timeout(5, use_signals=True)
def return_results(formatted_result, base_url, page):
    #Next build lxml tree from formatted_result
    tree = html.fromstring(formatted_result)
    
    print('Tree returned!')
    print(tree.xpath('//div[contains(@id, "emailOnlySignupWrap")]'))

    full_reviews = tree.xpath('//div[contains(@class, "reviewSelector")]')

    page_usernames = []
    page_locations = []
    page_titles = []
    page_ratings = []
    page_dates = []
    page_reviews = []
    page_review_ids = []

    for fullrev in full_reviews:
        # user name:
        member_info = fullrev.xpath('div/div[contains(@class, "col1of2")]/div[contains(@class, "member_info")]')[0]
        usrnm = member_info.xpath('div/div[contains(@class, "username mo")]')
        review = fullrev.xpath('div/div[@class="col2of2"]/div[@class="innerBubble"]')[0]
        title = review.xpath('div/div[contains(@class, "quote")]')[0].text_content().strip()[1:-1]
        rating = review.xpath('div/div[contains(@class, "rating")]/span/img/@alt')[0].split(' ')[0]
        date = review.xpath('div/div[contains(@class, "rating")]/span[contains(@class, "ratingDate")]/@title')
        if len(date) > 0:
            date = date[0]
        else:
            date = review.xpath('div/div[contains(@class, "rating")]/span[contains(@class, "ratingDate")]')[0].text_content()
        rev = review.xpath('div/div[contains(@class, "entry")]')[0].text_content().strip().replace("\n", "")
        if len(usrnm) > 0:
            #location = member_info.xpath('div[1]')[0].text_content()
            print('Username: {}'.format(str(usrnm[0].text_content()).strip()))
            page_usernames.append(str(usrnm[0].text_content()).strip())
        else:
            print('Username: A Trip Advisor Member')
            page_usernames.append('')

        location = member_info.xpath('div[contains(@class, "location")]')
        if len(location) > 0:
            # print('Location: {}'.format(str(location[0].text_content()).strip()))
            page_locations.append(location[0].text_content().strip())
        else:
            # print('Location: ')
            page_locations.append('')

        print('Title: {}'.format(title.encode('utf-8')))
#         print('Rating: {}'.format(rating))
#         print('Date: {}'.format(date))
#         print('Review:')
#         print(rev)
        page_titles.append(title)
        page_ratings.append(ratings)
        page_dates.append(date)
        page_reviews.append(rev)
        try:
            rev_id = re.search('review_(\d+)$', fullrev.xpath('@id')[0]).group(1)
        except AttributeError:
            rev_id = ''
        page_review_ids.append(rev_id)

#         print('*'*50)

    if len(tree.xpath('//a[contains(@class, "next")]')) > 0:
        url = base_url+tree.xpath('//a[contains(@class, "next")]/@href')[0]
        more_reviews = True
        page += 1
        print('url and page updated.')
    else:
        more_reviews = False
    
    ret_dict = {'usrnms': page_usernames,
                'locs': page_locations,
                'ttls': page_titles,
                'rtngs': page_ratings,
                'dts': page_dates,
                'rvws': page_reviews,
                'revids': page_review_ids,
               'url': url,
               'more_reviews': more_reviews,
               'page': page}
    return ret_dict

In [28]:
usernames = []
locations = []
titles = []
ratings = []
dates = []
reviews = []
review_ids = []

base_url = 'http://www.tripadvisor.com'
url = base_url+links[0]
url


Out[28]:
'http://www.tripadvisor.com/Hotel_Review-g33851-d119851-Reviews-Omni_New_Haven_Hotel_at_Yale-New_Haven_Connecticut.html'

In [25]:
page = 1
r.update_url(url)

#result is a QString:
result = r.frame.toHtml()

#QString should be converted to string before processed by lxml
formatted_result = str(result.toAscii())

ret_dict = return_results(formatted_result, base_url, page)


Tree returned!
[]
Username: Katelyn F
Title: Perfect location.
Username: SweetlifeChicago
Title: Conveniently located
Username: Oswaldo A
Title: Best option in new haven
Username: SLR879
Title: Good for a short stay
Username: Valerie P
Title: Perfect stay
Username: tibird77
Title: Pet friendly, clean and convenient
Username: michaelsU5133PY
Title: Nice hotel in a great location
Username: Tami T
Title: New haven 20k
Username: cdecdd28
Title: MPC meet and train
Username: Sandra D
Title: Beautiful View of Yale
url and page updated.

In [26]:
ret_dict['url']


Out[26]:
'http://www.tripadvisor.com/Hotel_Review-g33851-d119851-Reviews-or10-Omni_New_Haven_Hotel_at_Yale-New_Haven_Connecticut.html#REVIEWS'

In [ ]:
usernames = []
locations = []
titles = []
ratings = []
dates = []
reviews = []
review_ids = []

base_url = 'http://www.tripadvisor.com'
url = base_url+links[0]
url
more_reviews = True
page = 1
while more_reviews:
    #print('*'*50)
    print('*'*50)
    print('Now on page {}'.format(page))
    #print('*'*50)
    print('*'*50)
    #Create the render object:
    r.update_url(url)

    #result is a QString:
    result = r.frame.toHtml()

    #QString should be converted to string before processed by lxml
    formatted_result = str(result.toAscii())

    try:
        ret_dict = return_results(formatted_result, base_url, page)
        usernames.append(ret_dict['usrnms'])
        locations.append(ret_dict['locs'])
        titles.append(ret_dict['ttls'])
        ratings.append(ret_dict['rtngs'])
        dates.append(ret_dict['dts'])
        reviews.append(ret_dict['rvws'])
        review_ids.append(ret_dict['revids'])
        url = ret_dict['url']
        more_reviews = ret_dict['more_reviews']
        page = ret_dict['page']
    except:
        print('Timed out! Trying that page again...' )


**************************************************
Now on page 1
**************************************************
Tree returned!
[]
Username: Katelyn F
Title: Perfect location.
Username: SweetlifeChicago
Title: Conveniently located
Username: Oswaldo A
Title: Best option in new haven
Username: SLR879
Title: Good for a short stay
Username: Valerie P
Title: Perfect stay
Username: tibird77
Title: Pet friendly, clean and convenient
Username: michaelsU5133PY
Title: Nice hotel in a great location
Username: Tami T
Title: New haven 20k
Username: cdecdd28
Title: MPC meet and train
Username: Sandra D
Title: Beautiful View of Yale
url and page updated.
**************************************************
Now on page 2
**************************************************
Tree returned!
[]
Username: Keith T
Title: Great hotel and location
Username: David S
Title: Boarders and more.
Username: Rose H
Title: One night stay!
Username: Sean713
Title: Best Hotel in the City
Username: Dunmy N
Title: Great stay
Username: Andy H
Title: Stay
Username: paknflyguy
Title: Wonderful stay
Username: James B
Title: Wedding stay
Username: James N
Title: Hotel is well located, but tired
Username: Blair H
Title: Amazing staff, nice rooms, prime location...
url and page updated.
**************************************************
Now on page 3
**************************************************
Tree returned!
[]
Username: pacoaMadrid
Title: visit to Yale
Username: Joe H
Title: Brief stay
Username: Z2089UCmarym
Title: Great service
Username: Nelson5454
Title: Great for attending conferences
Username: gcala
Title: Nice hotel 
Username: Brian C
Title: At Home
Username: Lisa T
Title: Great Wedding Venue
Username: Sally B
Title: Omni New Haven
Username: Leverne M
Title: Excellent service and great food!
Username: Jessica W
Title: Charming hotel!
url and page updated.
**************************************************
Now on page 4
**************************************************
Tree returned!
[]
Username: Vivi C
Title: Average, Better with select guest..
Username: Julia A
Title: Very nice, comfortable hotel
Username: Natalia A
Title: 2 night stay
Username: Ryan P
Title: Amazing
Username: Sebastiendog
Title: Excellent Hotel,excellent location
Username: Kermit753
Title: My place to stay in New Haven
Username: traveller211
Title: Great location, excellent value, lovely hotel
Username: newrock
Title: High quality hotel in the heart of New Haven
Username: JQ2009
Title: Looked a bit grander in photos
Username: cheryl c
Title: Great hotel
url and page updated.
**************************************************
Now on page 5
**************************************************
Tree returned!
[]
Username: SunshineV88
Title: Went for a wedding, had to take our old dog!
Username: LarryLB
Title: nice place and good staff
Username: Chris T
Title: Great Hotel.. Great Location
Username: Catherine M
Title: Perfect for Small Meeting-Presentation Space!
Username: rjc999
Title: All good except the mattress
Username: abajaj960
Title: Yale New Haven
Username: LauretteLancaster
Title: Great staff made the stay
Username: Thomas H
Title: Excellent Surprise
Username: bjostfeld
Title: Omni Hotel at Yale Redeems Itself
Username: ortteb1
Title: Great location to Yale campus; comfortable lobby
url and page updated.
**************************************************
Now on page 6
**************************************************
Tree returned!
[]
Username: Scott G
Title: courteous staff
Username: tigersz
Title: decent hotel, good service
Username: Ostfeld
Title: Seriously Disappointing 
Username: jeffreyfromjericho
Title: FOUR STAR? NO WAY
Username: Sharon B
Title: Omni New Haven/Yale
Username: Anne G
Title: Practical
Username: Dru D
Title: Omni New Haven
Username: Linda E
Title: Excellent in every way.
Username: Bret D
Title: Wedding Trip
Username: Paul W
Title: Will tell our friends how Great this hotel is
url and page updated.
**************************************************
Now on page 7
**************************************************
Tree returned!

In [2]:
import time
import timeout_decorator

@timeout_decorator.timeout(5, use_signals=True)
def mytest():
    print "Start"
    for i in range(1,10):
        time.sleep(1)
        print "%d seconds have passed" % i

In [3]:
try:
    mytest()
except:
    print('It worked!')


Start
1 seconds have passed
2 seconds have passed
3 seconds have passed
4 seconds have passed
It worked!

In [6]:
help(r)


Help on Render in module __main__ object:

class Render(PyQt4.QtWebKit.QWebPage)
 |  Method resolution order:
 |      Render
 |      PyQt4.QtWebKit.QWebPage
 |      PyQt4.QtCore.QObject
 |      sip.wrapper
 |      sip.simplewrapper
 |      __builtin__.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, url)
 |  
 |  update_url(self, url)
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from PyQt4.QtWebKit.QWebPage:
 |  
 |  acceptNavigationRequest(...)
 |      QWebPage.acceptNavigationRequest(QWebFrame, QNetworkRequest, QWebPage.NavigationType) -> bool
 |  
 |  action(...)
 |      QWebPage.action(QWebPage.WebAction) -> QAction
 |  
 |  applicationCacheQuotaExceeded = <unbound signal applicationCacheQuotaExceeded>
 |  bytesReceived(...)
 |      QWebPage.bytesReceived() -> int
 |  
 |  chooseFile(...)
 |      QWebPage.chooseFile(QWebFrame, QString) -> QString
 |  
 |  contentsChanged = <unbound signal contentsChanged>
 |  createPlugin(...)
 |      QWebPage.createPlugin(QString, QUrl, QStringList, QStringList) -> QObject
 |  
 |  createStandardContextMenu(...)
 |      QWebPage.createStandardContextMenu() -> QMenu
 |  
 |  createWindow(...)
 |      QWebPage.createWindow(QWebPage.WebWindowType) -> QWebPage
 |  
 |  currentFrame(...)
 |      QWebPage.currentFrame() -> QWebFrame
 |  
 |  databaseQuotaExceeded = <unbound signal databaseQuotaExceeded>
 |  downloadRequested = <unbound signal downloadRequested>
 |  event(...)
 |      QWebPage.event(QEvent) -> bool
 |  
 |  extension(...)
 |      QWebPage.extension(QWebPage.Extension, QWebPage.ExtensionOption option=None, QWebPage.ExtensionReturn output=None) -> bool
 |  
 |  featurePermissionRequestCanceled = <unbound signal featurePermissionRequestCanceled>
 |  featurePermissionRequested = <unbound signal featurePermissionRequested>
 |  findText(...)
 |      QWebPage.findText(QString, QWebPage.FindFlags options=0) -> bool
 |  
 |  focusNextPrevChild(...)
 |      QWebPage.focusNextPrevChild(bool) -> bool
 |  
 |  forwardUnsupportedContent(...)
 |      QWebPage.forwardUnsupportedContent() -> bool
 |  
 |  frameAt(...)
 |      QWebPage.frameAt(QPoint) -> QWebFrame
 |  
 |  frameCreated = <unbound signal frameCreated>
 |  geometryChangeRequested = <unbound signal geometryChangeRequested>
 |  hasSelection(...)
 |      QWebPage.hasSelection() -> bool
 |  
 |  history(...)
 |      QWebPage.history() -> QWebHistory
 |  
 |  inputMethodQuery(...)
 |      QWebPage.inputMethodQuery(Qt.InputMethodQuery) -> QVariant
 |  
 |  isContentEditable(...)
 |      QWebPage.isContentEditable() -> bool
 |  
 |  isModified(...)
 |      QWebPage.isModified() -> bool
 |  
 |  javaScriptAlert(...)
 |      QWebPage.javaScriptAlert(QWebFrame, QString)
 |  
 |  javaScriptConfirm(...)
 |      QWebPage.javaScriptConfirm(QWebFrame, QString) -> bool
 |  
 |  javaScriptConsoleMessage(...)
 |      QWebPage.javaScriptConsoleMessage(QString, int, QString)
 |  
 |  javaScriptPrompt(...)
 |      QWebPage.javaScriptPrompt(QWebFrame, QString, QString, QString) -> bool
 |  
 |  linkClicked = <unbound signal linkClicked>
 |  linkDelegationPolicy(...)
 |      QWebPage.linkDelegationPolicy() -> QWebPage.LinkDelegationPolicy
 |  
 |  linkHovered = <unbound signal linkHovered>
 |  loadFinished = <unbound signal loadFinished>
 |  loadProgress = <unbound signal loadProgress>
 |  loadStarted = <unbound signal loadStarted>
 |  mainFrame(...)
 |      QWebPage.mainFrame() -> QWebFrame
 |  
 |  menuBarVisibilityChangeRequested = <unbound signal menuBarVisibilityChangeRequested>
 |  microFocusChanged = <unbound signal microFocusChanged>
 |  networkAccessManager(...)
 |      QWebPage.networkAccessManager() -> QNetworkAccessManager
 |  
 |  palette(...)
 |      QWebPage.palette() -> QPalette
 |  
 |  pluginFactory(...)
 |      QWebPage.pluginFactory() -> QWebPluginFactory
 |  
 |  preferredContentsSize(...)
 |      QWebPage.preferredContentsSize() -> QSize
 |  
 |  printRequested = <unbound signal printRequested>
 |  repaintRequested = <unbound signal repaintRequested>
 |  restoreFrameStateRequested = <unbound signal restoreFrameStateRequested>
 |  saveFrameStateRequested = <unbound signal saveFrameStateRequested>
 |  scrollRequested = <unbound signal scrollRequested>
 |  selectedHtml(...)
 |      QWebPage.selectedHtml() -> QString
 |  
 |  selectedText(...)
 |      QWebPage.selectedText() -> QString
 |  
 |  selectionChanged = <unbound signal selectionChanged>
 |  setActualVisibleContentRect(...)
 |      QWebPage.setActualVisibleContentRect(QRect)
 |  
 |  setContentEditable(...)
 |      QWebPage.setContentEditable(bool)
 |  
 |  setFeaturePermission(...)
 |      QWebPage.setFeaturePermission(QWebFrame, QWebPage.Feature, QWebPage.PermissionPolicy)
 |  
 |  setForwardUnsupportedContent(...)
 |      QWebPage.setForwardUnsupportedContent(bool)
 |  
 |  setLinkDelegationPolicy(...)
 |      QWebPage.setLinkDelegationPolicy(QWebPage.LinkDelegationPolicy)
 |  
 |  setNetworkAccessManager(...)
 |      QWebPage.setNetworkAccessManager(QNetworkAccessManager)
 |  
 |  setPalette(...)
 |      QWebPage.setPalette(QPalette)
 |  
 |  setPluginFactory(...)
 |      QWebPage.setPluginFactory(QWebPluginFactory)
 |  
 |  setPreferredContentsSize(...)
 |      QWebPage.setPreferredContentsSize(QSize)
 |  
 |  setView(...)
 |      QWebPage.setView(QWidget)
 |  
 |  setViewportSize(...)
 |      QWebPage.setViewportSize(QSize)
 |  
 |  settings(...)
 |      QWebPage.settings() -> QWebSettings
 |  
 |  shouldInterruptJavaScript(...)
 |      QWebPage.shouldInterruptJavaScript() -> bool
 |  
 |  statusBarMessage = <unbound signal statusBarMessage>
 |  statusBarVisibilityChangeRequested = <unbound signal statusBarVisibilityChangeRequested>
 |  supportedContentTypes(...)
 |      QWebPage.supportedContentTypes() -> QStringList
 |  
 |  supportsContentType(...)
 |      QWebPage.supportsContentType(QString) -> bool
 |  
 |  supportsExtension(...)
 |      QWebPage.supportsExtension(QWebPage.Extension) -> bool
 |  
 |  swallowContextMenuEvent(...)
 |      QWebPage.swallowContextMenuEvent(QContextMenuEvent) -> bool
 |  
 |  toolBarVisibilityChangeRequested = <unbound signal toolBarVisibilityChangeRequested>
 |  totalBytes(...)
 |      QWebPage.totalBytes() -> int
 |  
 |  triggerAction(...)
 |      QWebPage.triggerAction(QWebPage.WebAction, bool checked=False)
 |  
 |  undoStack(...)
 |      QWebPage.undoStack() -> QUndoStack
 |  
 |  unsupportedContent = <unbound signal unsupportedContent>
 |  updatePositionDependentActions(...)
 |      QWebPage.updatePositionDependentActions(QPoint)
 |  
 |  userAgentForUrl(...)
 |      QWebPage.userAgentForUrl(QUrl) -> QString
 |  
 |  view(...)
 |      QWebPage.view() -> QWidget
 |  
 |  viewportAttributesForSize(...)
 |      QWebPage.viewportAttributesForSize(QSize) -> QWebPage.ViewportAttributes
 |  
 |  viewportChangeRequested = <unbound signal viewportChangeRequested>
 |  viewportSize(...)
 |      QWebPage.viewportSize() -> QSize
 |  
 |  windowCloseRequested = <unbound signal windowCloseRequested>
 |  ----------------------------------------------------------------------
 |  Data and other attributes inherited from PyQt4.QtWebKit.QWebPage:
 |  
 |  AlignCenter = 63
 |  
 |  AlignJustified = 64
 |  
 |  AlignLeft = 65
 |  
 |  AlignRight = 66
 |  
 |  Back = 8
 |  
 |  ChooseMultipleFilesExtension = 0
 |  
 |  ChooseMultipleFilesExtensionOption = <class 'PyQt4.QtWebKit.ChooseMult...
 |      QWebPage.ChooseMultipleFilesExtensionOption()
 |      QWebPage.ChooseMultipleFilesExtensionOption(QWebPage.ChooseMultipleFilesExtensionOption)
 |  
 |  ChooseMultipleFilesExtensionReturn = <class 'PyQt4.QtWebKit.ChooseMult...
 |      QWebPage.ChooseMultipleFilesExtensionReturn()
 |      QWebPage.ChooseMultipleFilesExtensionReturn(QWebPage.ChooseMultipleFilesExtensionReturn)
 |  
 |  Copy = 13
 |  
 |  CopyImageToClipboard = 7
 |  
 |  CopyImageUrlToClipboard = 68
 |  
 |  CopyLinkToClipboard = 4
 |  
 |  Cut = 12
 |  
 |  DelegateAllLinks = 2
 |  
 |  DelegateExternalLinks = 1
 |  
 |  DeleteEndOfWord = 42
 |  
 |  DeleteStartOfWord = 41
 |  
 |  DontDelegateLinks = 0
 |  
 |  DownloadImageToDisk = 6
 |  
 |  DownloadLinkToDisk = 3
 |  
 |  ErrorDomain = <class 'PyQt4.QtWebKit.ErrorDomain'>
 |  
 |  
 |  ErrorPageExtension = 1
 |  
 |  ErrorPageExtensionOption = <class 'PyQt4.QtWebKit.ErrorPageExtensionOp...
 |      QWebPage.ErrorPageExtensionOption()
 |      QWebPage.ErrorPageExtensionOption(QWebPage.ErrorPageExtensionOption)
 |  
 |  ErrorPageExtensionReturn = <class 'PyQt4.QtWebKit.ErrorPageExtensionRe...
 |      QWebPage.ErrorPageExtensionReturn()
 |      QWebPage.ErrorPageExtensionReturn(QWebPage.ErrorPageExtensionReturn)
 |  
 |  Extension = <class 'PyQt4.QtWebKit.Extension'>
 |  
 |  
 |  ExtensionOption = <class 'PyQt4.QtWebKit.ExtensionOption'>
 |      QWebPage.ExtensionOption()
 |      QWebPage.ExtensionOption(QWebPage.ExtensionOption)
 |  
 |  ExtensionReturn = <class 'PyQt4.QtWebKit.ExtensionReturn'>
 |      QWebPage.ExtensionReturn()
 |      QWebPage.ExtensionReturn(QWebPage.ExtensionReturn)
 |  
 |  Feature = <class 'PyQt4.QtWebKit.Feature'>
 |  
 |  
 |  FindBackward = 1
 |  
 |  FindCaseSensitively = 2
 |  
 |  FindFlag = <class 'PyQt4.QtWebKit.FindFlag'>
 |  
 |  
 |  FindFlags = <class 'PyQt4.QtWebKit.FindFlags'>
 |      QWebPage.FindFlags(QWebPage.FindFlags)
 |      QWebPage.FindFlags(int)
 |      QWebPage.FindFlags()
 |  
 |  FindWrapsAroundDocument = 4
 |  
 |  Forward = 9
 |  
 |  Geolocation = 1
 |  
 |  HighlightAllOccurrences = 8
 |  
 |  Http = 1
 |  
 |  Indent = 61
 |  
 |  InsertLineSeparator = 51
 |  
 |  InsertOrderedList = 60
 |  
 |  InsertParagraphSeparator = 50
 |  
 |  InsertUnorderedList = 59
 |  
 |  InspectElement = 49
 |  
 |  LinkDelegationPolicy = <class 'PyQt4.QtWebKit.LinkDelegationPolicy'>
 |  
 |  
 |  MoveToEndOfBlock = 26
 |  
 |  MoveToEndOfDocument = 28
 |  
 |  MoveToEndOfLine = 24
 |  
 |  MoveToNextChar = 17
 |  
 |  MoveToNextLine = 21
 |  
 |  MoveToNextWord = 19
 |  
 |  MoveToPreviousChar = 18
 |  
 |  MoveToPreviousLine = 22
 |  
 |  MoveToPreviousWord = 20
 |  
 |  MoveToStartOfBlock = 25
 |  
 |  MoveToStartOfDocument = 27
 |  
 |  MoveToStartOfLine = 23
 |  
 |  NavigationType = <class 'PyQt4.QtWebKit.NavigationType'>
 |  
 |  
 |  NavigationTypeBackOrForward = 2
 |  
 |  NavigationTypeFormResubmitted = 4
 |  
 |  NavigationTypeFormSubmitted = 1
 |  
 |  NavigationTypeLinkClicked = 0
 |  
 |  NavigationTypeOther = 5
 |  
 |  NavigationTypeReload = 3
 |  
 |  NoWebAction = -1
 |  
 |  Notifications = 0
 |  
 |  OpenFrameInNewWindow = 2
 |  
 |  OpenImageInNewWindow = 5
 |  
 |  OpenLink = 0
 |  
 |  OpenLinkInNewWindow = 1
 |  
 |  Outdent = 62
 |  
 |  Paste = 14
 |  
 |  PasteAndMatchStyle = 54
 |  
 |  PermissionDeniedByUser = 2
 |  
 |  PermissionGrantedByUser = 1
 |  
 |  PermissionPolicy = <class 'PyQt4.QtWebKit.PermissionPolicy'>
 |  
 |  
 |  PermissionUnknown = 0
 |  
 |  QtNetwork = 0
 |  
 |  Redo = 16
 |  
 |  Reload = 11
 |  
 |  ReloadAndBypassCache = 53
 |  
 |  RemoveFormat = 55
 |  
 |  SelectAll = 52
 |  
 |  SelectEndOfBlock = 38
 |  
 |  SelectEndOfDocument = 40
 |  
 |  SelectEndOfLine = 36
 |  
 |  SelectNextChar = 29
 |  
 |  SelectNextLine = 33
 |  
 |  SelectNextWord = 31
 |  
 |  SelectPreviousChar = 30
 |  
 |  SelectPreviousLine = 34
 |  
 |  SelectPreviousWord = 32
 |  
 |  SelectStartOfBlock = 37
 |  
 |  SelectStartOfDocument = 39
 |  
 |  SelectStartOfLine = 35
 |  
 |  SetTextDirectionDefault = 43
 |  
 |  SetTextDirectionLeftToRight = 44
 |  
 |  SetTextDirectionRightToLeft = 45
 |  
 |  Stop = 10
 |  
 |  StopScheduledPageRefresh = 67
 |  
 |  ToggleBold = 46
 |  
 |  ToggleItalic = 47
 |  
 |  ToggleStrikethrough = 56
 |  
 |  ToggleSubscript = 57
 |  
 |  ToggleSuperscript = 58
 |  
 |  ToggleUnderline = 48
 |  
 |  Undo = 15
 |  
 |  ViewportAttributes = <class 'PyQt4.QtWebKit.ViewportAttributes'>
 |      QWebPage.ViewportAttributes()
 |      QWebPage.ViewportAttributes(QWebPage.ViewportAttributes)
 |  
 |  WebAction = <class 'PyQt4.QtWebKit.WebAction'>
 |  
 |  
 |  WebBrowserWindow = 0
 |  
 |  WebKit = 2
 |  
 |  WebModalDialog = 1
 |  
 |  WebWindowType = <class 'PyQt4.QtWebKit.WebWindowType'>
 |  
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from PyQt4.QtCore.QObject:
 |  
 |  __getattr__(...)
 |      QObject.__getattr__(str) -> object
 |  
 |  blockSignals(...)
 |      QObject.blockSignals(bool) -> bool
 |  
 |  childEvent(...)
 |      QObject.childEvent(QChildEvent)
 |  
 |  children(...)
 |      QObject.children() -> list-of-QObject
 |  
 |  connect(...)
 |      QObject.connect(QObject, SIGNAL(), QObject, SLOT(), Qt.ConnectionType=Qt.AutoConnection) -> bool
 |      QObject.connect(QObject, SIGNAL(), callable, Qt.ConnectionType=Qt.AutoConnection) -> bool
 |      QObject.connect(QObject, SIGNAL(), SLOT(), Qt.ConnectionType=Qt.AutoConnection) -> bool
 |  
 |  connectNotify(...)
 |      QObject.connectNotify(SIGNAL())
 |  
 |  customEvent(...)
 |      QObject.customEvent(QEvent)
 |  
 |  deleteLater(...)
 |      QObject.deleteLater()
 |  
 |  destroyed = <unbound signal destroyed>
 |  disconnect(...)
 |      QObject.disconnect(QObject, SIGNAL(), QObject, SLOT()) -> bool
 |      QObject.disconnect(QObject, SIGNAL(), callable) -> bool
 |  
 |  disconnectNotify(...)
 |      QObject.disconnectNotify(SIGNAL())
 |  
 |  dumpObjectInfo(...)
 |      QObject.dumpObjectInfo()
 |  
 |  dumpObjectTree(...)
 |      QObject.dumpObjectTree()
 |  
 |  dynamicPropertyNames(...)
 |      QObject.dynamicPropertyNames() -> list-of-QByteArray
 |  
 |  emit(...)
 |      QObject.emit(SIGNAL(), ...)
 |  
 |  eventFilter(...)
 |      QObject.eventFilter(QObject, QEvent) -> bool
 |  
 |  findChild(...)
 |      QObject.findChild(type, QString name=QString()) -> QObject
 |      QObject.findChild(tuple, QString name=QString()) -> QObject
 |  
 |  findChildren(...)
 |      QObject.findChildren(type, QString name=QString()) -> list-of-QObject
 |      QObject.findChildren(tuple, QString name=QString()) -> list-of-QObject
 |      QObject.findChildren(type, QRegExp) -> list-of-QObject
 |      QObject.findChildren(tuple, QRegExp) -> list-of-QObject
 |  
 |  inherits(...)
 |      QObject.inherits(str) -> bool
 |  
 |  installEventFilter(...)
 |      QObject.installEventFilter(QObject)
 |  
 |  isWidgetType(...)
 |      QObject.isWidgetType() -> bool
 |  
 |  killTimer(...)
 |      QObject.killTimer(int)
 |  
 |  metaObject(...)
 |      QObject.metaObject() -> QMetaObject
 |  
 |  moveToThread(...)
 |      QObject.moveToThread(QThread)
 |  
 |  objectName(...)
 |      QObject.objectName() -> QString
 |  
 |  parent(...)
 |      QObject.parent() -> QObject
 |  
 |  property(...)
 |      QObject.property(str) -> QVariant
 |  
 |  pyqtConfigure(...)
 |      QObject.pyqtConfigure(...)
 |      
 |      Each keyword argument is either the name of a Qt property or a Qt signal.
 |      For properties the property is set to the given value which should be of an
 |      appropriate type.
 |      For signals the signal is connected to the given value which should be a
 |      callable.
 |  
 |  receivers(...)
 |      QObject.receivers(SIGNAL()) -> int
 |  
 |  removeEventFilter(...)
 |      QObject.removeEventFilter(QObject)
 |  
 |  sender(...)
 |      QObject.sender() -> QObject
 |  
 |  senderSignalIndex(...)
 |      QObject.senderSignalIndex() -> int
 |  
 |  setObjectName(...)
 |      QObject.setObjectName(QString)
 |  
 |  setParent(...)
 |      QObject.setParent(QObject)
 |  
 |  setProperty(...)
 |      QObject.setProperty(str, QVariant) -> bool
 |  
 |  signalsBlocked(...)
 |      QObject.signalsBlocked() -> bool
 |  
 |  startTimer(...)
 |      QObject.startTimer(int) -> int
 |  
 |  thread(...)
 |      QObject.thread() -> QThread
 |  
 |  timerEvent(...)
 |      QObject.timerEvent(QTimerEvent)
 |  
 |  tr(...)
 |      QObject.tr(str, str disambiguation=None, int n=-1) -> QString
 |  
 |  trUtf8(...)
 |      QObject.trUtf8(str, str disambiguation=None, int n=-1) -> QString
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from PyQt4.QtCore.QObject:
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  staticMetaObject
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from sip.simplewrapper:
 |  
 |  __dict__
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes inherited from sip.simplewrapper:
 |  
 |  __new__ = <built-in method __new__ of sip.wrappertype object>
 |      T.__new__(S, ...) -> a new object with type S, a subtype of T


In [ ]: