In [5]:
import mechanize
import cookielib

# Browser
br = mechanize.Browser()

# Cookie Jar
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)

# Browser options
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)

# Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

# Want debugging messages?
#br.set_debug_http(True)
#br.set_debug_redirects(True)
#br.set_debug_responses(True)

# User-Agent (this is cheating, ok?)
br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36')]


C:\Users\Zac\Anaconda2\lib\site-packages\ipykernel\__main__.py:13: UserWarning: gzip transfer encoding is experimental!

In [6]:
r = br.open('https://www.yelp.com/search?find_desc=vietnamese+food&find_loc=Hoboken&ns=1')
html = r.read()

In [7]:
import bs4
soup = bs4.BeautifulSoup(html)

In [11]:
search_results = soup.findAll(attrs={'class': 'regular-search-result'})

In [10]:
with open('this_html.html', 'w') as outfile:
    outfile.write(html)

In [35]:
[(res.find(attrs={'class':'biz-name js-analytics-click'}).span.text.strip(), 
  res.find(attrs={'class': 'biz-phone'}).text.strip()) for res in search_results]


Out[35]:
[(u'Pho Nomenon Noodle & Grill', u'(201) 706-8554'),
 (u'New Thanh Hoai', u'(201) 918-6599')]

In [19]:
str(search_results[0]).find('biz-phone')


Out[19]:
1932

In [23]:
str(search_results[0])[1915:2032]


Out[23]:
'an>\n<span class="biz-phone">\n        (201) 706-8554\n    </span>\n</div>\n</div>\n<div class="search-result-ctas u-space-'

In [25]:
sr = search_results[0]

In [26]:
sr


Out[26]:
<li class="regular-search-result">\n<div class="search-result natural-search-result" data-key="1">\n<div class="biz-listing-large">\n<div class="main-attributes">\n<div class="media-block media-block--12">\n<div class="media-avatar">\n<div class="photo-box pb-90s">\n<a class="js-analytics-click" data-analytics-label="biz-photo" href="/biz/pho-nomenon-noodle-and-grill-hoboken?osq=vietnamese+food">\n<img alt="Pho Nomenon Noodle &amp; Grill" class="photo-box-img" height="90" src="https://s3-media3.fl.yelpcdn.com/bphoto/zWyVwZdyyjEuDBmlfL4T5A/90s.jpg" width="90"/>\n</a>\n</div>\n</div>\n<div class="media-story">\n<h3 class="search-result-title">\n<span class="indexed-biz-name">1.         <a class="biz-name js-analytics-click" data-analytics-label="biz-name" data-hovercard-id="HblDdwBZIP7F1dYH5JRBag" href="/biz/pho-nomenon-noodle-and-grill-hoboken?osq=vietnamese+food"><span>Pho Nomenon Noodle &amp; Grill</span></a>\n</span>\n</h3>\n<div class="biz-rating biz-rating-large clearfix">\n<div class="i-stars i-stars--regular-3 rating-large" title="3.0 star rating">\n<img alt="3.0 star rating" class="offscreen" height="303" src="https://s3-media1.fl.yelpcdn.com/assets/srv0/yelp_design_web/41341496d9db/assets/img/stars/stars.png" width="84"/>\n</div>\n<span class="review-count rating-qualifier">\n            162 reviews\n    </span>\n</div>\n<div class="price-category">\n<span class="bullet-after">\n<span class="business-attribute price-range">$</span>\n</span>\n<span class="category-str-list">\n<a href="/search?find_desc=vietnamese+food&amp;find_loc=Hoboken&amp;cflt=vietnamese">Vietnamese</a>,\n                    <a href="/search?find_desc=vietnamese+food&amp;find_loc=Hoboken&amp;cflt=noodles">Noodles</a>\n</span>\n</div>\n<ul class="search-result_tags">\n</ul>\n</div>\n</div>\n</div>\n<div class="secondary-attributes">\n<address>\n        516 Washington St<br/>Hoboken, NJ 07030\n    </address>\n<span class="offscreen">Phone number</span>\n<span class="biz-phone">\n        (201) 706-8554\n    </span>\n</div>\n</div>\n<div class="search-result-ctas u-space-t1">\n<div class="search-avatar-offset js-platform">\n<div class="island island--slim search-result-cta">\n<div class="arrange arrange--middle arrange--6">\n<div class="arrange_unit">\n<span aria-hidden="true" class="icon icon--18-order icon--size-18" style="fill: #ca6d00; width: 18px; height: 18px;">\n<svg class="icon_svg">\n<use xlink:href="#18x18_order"></use>\n</svg>\n</span>\n</div>\n<div class="arrange_unit--fill arrange_unit cta-text">\n<span class="">\n            This restaurant accepts pickup and delivery\n        </span>\n</div>\n<div class="arrange_unit nowrap js-tag-action" data-business-id="OE9bjWdWyo530-Nj3dIILw" data-popup-title="" data-search-action-uri="/transaction_platform/start_order/OE9bjWdWyo530-Nj3dIILw">\n<a class="ybtn ybtn--small low-intent-search-action-button" href="javascript:;">Start Order</a>\n</div>\n</div>\n</div>\n<div class="search-result-cta-error-row">\n<div class="platform-vsearch-error-message text-error">\n</div>\n</div>\n</div>\n</div>\n<div class="snippet-block media-block">\n<div class="media-avatar">\n<div class="photo-box pb-30s" data-hovercard-id="daCCXEk5YQLmzwdIvUeeDw">\n<a href="/user_details?userid=5mdS-QvefXD0EhYDqiSrdA">\n<img alt="Kelly N." class="photo-box-img" height="30" src="https://s3-media1.fl.yelpcdn.com/photo/YJFa28-3D0O4sVo9h4wyaA/30s.jpg" width="30"/>\n</a>\n</div>\n</div>\n<div class="media-story">\n<p class="snippet">\n                        having Chinese owners (who are really friendly, btw), the <span class="highlighted">food</span> still had those delicious <span class="highlighted">Vietnamese</span> flavors. My go-to is the grilled pork with white rice. I've definitely had better\u2026\n                        <a class="nowrap" href="/biz/pho-nomenon-noodle-and-grill-hoboken?hrid=flhdJjn0DwfeygIGh279cA&amp;osq=vietnamese+food">read more</a>\n</p>\n</div>\n</div>\n</div>\n</li>

In [ ]: