In [2]:
! pip install beautifulsoup4


Requirement already satisfied: beautifulsoup4 in c:\users\dell\anaconda3\lib\site-packages

In [3]:
from bs4 import BeautifulSoup

In [7]:
! pip install urllib3


Requirement already satisfied: urllib3 in c:\users\dell\anaconda3\lib\site-packages

In [9]:
import urllib.request

In [10]:
r = urllib.request.urlopen('https://www.yelp.com/search?find_desc=Restaurants&find_loc=Houston%2C+TX').read()

In [11]:
#Using Beautiful Soup Library to parse the data
soup = BeautifulSoup(r, "lxml")
type(soup)


Out[11]:
bs4.BeautifulSoup

In [12]:
len(str(soup.prettify()))


Out[12]:
299419

In [13]:
a=str(soup.prettify())

In [14]:
a.find('''class="snippet"''')


Out[14]:
170264

In [15]:
a[115000:145000]


Out[15]:
'  </label>\n                </li>\n               </ul>\n               <ul class="column">\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="HappyHour"/>\n                  <span class="filter-label">\n                   Happy Hour\n                  </span>\n                 </label>\n                </li>\n               </ul>\n              </div>\n             </div>\n            </div>\n            <div class="js-toggle-list-item toggle-list-item">\n             <h4 class="arrange arrange--middle toggle-list-link js-toggle-list-trigger u-space-b1">\n              <span class="arrange_unit u-cursor-pointer">\n               <span class="toggle-link-arrow-wrap u-inline-block">\n                <span class="toggle-link-arrow" role="presentation">\n                </span>\n               </span>\n              </span>\n              <span class="arrange_unit arrange_unit--fill">\n               <span class="u-pseudo-link u-decoration-none">\n                Meals Served\n               </span>\n              </span>\n             </h4>\n             <div class="js-toggle-list-content toggle-list-content">\n              <div class="filter-group">\n               <ul class="column">\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="GoodForMeal.breakfast"/>\n                  <span class="filter-label">\n                   Breakfast\n                  </span>\n                 </label>\n                </li>\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="GoodForMeal.brunch"/>\n                  <span class="filter-label">\n                   Brunch\n                  </span>\n                 </label>\n                </li>\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="GoodForMeal.lunch"/>\n                  <span class="filter-label">\n                   Lunch\n                  </span>\n                 </label>\n                </li>\n               </ul>\n               <ul class="column">\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="GoodForMeal.dinner"/>\n                  <span class="filter-label">\n                   Dinner\n                  </span>\n                 </label>\n                </li>\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="GoodForMeal.dessert"/>\n                  <span class="filter-label">\n                   Dessert\n                  </span>\n                 </label>\n                </li>\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="GoodForMeal.latenight"/>\n                  <span class="filter-label">\n                   Late Night\n                  </span>\n                 </label>\n                </li>\n               </ul>\n              </div>\n             </div>\n            </div>\n            <div class="js-toggle-list-item toggle-list-item">\n             <h4 class="arrange arrange--middle toggle-list-link js-toggle-list-trigger u-space-b1">\n              <span class="arrange_unit u-cursor-pointer">\n               <span class="toggle-link-arrow-wrap u-inline-block">\n                <span class="toggle-link-arrow" role="presentation">\n                </span>\n               </span>\n              </span>\n              <span class="arrange_unit arrange_unit--fill">\n               <span class="u-pseudo-link u-decoration-none">\n                Music\n               </span>\n              </span>\n             </h4>\n             <div class="js-toggle-list-content toggle-list-content">\n              <div class="filter-group">\n               <ul class="column">\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="Music.dj"/>\n                  <span class="filter-label">\n                   DJ\n                  </span>\n                 </label>\n                </li>\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="Music.jukebox"/>\n                  <span class="filter-label">\n                   Juke Box\n                  </span>\n                 </label>\n                </li>\n               </ul>\n               <ul class="column">\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="Music.karaoke"/>\n                  <span class="filter-label">\n                   Karaoke\n                  </span>\n                 </label>\n                </li>\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="Music.live"/>\n                  <span class="filter-label">\n                   Live\n                  </span>\n                 </label>\n                </li>\n               </ul>\n              </div>\n             </div>\n            </div>\n            <div class="js-toggle-list-item toggle-list-item">\n             <h4 class="arrange arrange--middle toggle-list-link js-toggle-list-trigger u-space-b1">\n              <span class="arrange_unit u-cursor-pointer">\n               <span class="toggle-link-arrow-wrap u-inline-block">\n                <span class="toggle-link-arrow" role="presentation">\n                </span>\n               </span>\n              </span>\n              <span class="arrange_unit arrange_unit--fill">\n               <span class="u-pseudo-link u-decoration-none">\n                Parking\n               </span>\n              </span>\n             </h4>\n             <div class="js-toggle-list-content toggle-list-content">\n              <div class="filter-group">\n               <ul class="column">\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="BusinessParking.street"/>\n                  <span class="filter-label">\n                   Street\n                  </span>\n                 </label>\n                </li>\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="BusinessParking.garage"/>\n                  <span class="filter-label">\n                   Garage\n                  </span>\n                 </label>\n                </li>\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="BusinessParking.valet"/>\n                  <span class="filter-label">\n                   Valet\n                  </span>\n                 </label>\n                </li>\n               </ul>\n               <ul class="column">\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="BusinessParking.lot"/>\n                  <span class="filter-label">\n                   Private Lot\n                  </span>\n                 </label>\n                </li>\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="BusinessParking.validated"/>\n                  <span class="filter-label">\n                   Validated\n                  </span>\n                 </label>\n                </li>\n               </ul>\n              </div>\n             </div>\n            </div>\n            <div class="js-toggle-list-item toggle-list-item">\n             <h4 class="arrange arrange--middle toggle-list-link js-toggle-list-trigger u-space-b1">\n              <span class="arrange_unit u-cursor-pointer">\n               <span class="toggle-link-arrow-wrap u-inline-block">\n                <span class="toggle-link-arrow" role="presentation">\n                </span>\n               </span>\n              </span>\n              <span class="arrange_unit arrange_unit--fill">\n               <span class="u-pseudo-link u-decoration-none">\n                Wi-Fi\n               </span>\n              </span>\n             </h4>\n             <div class="js-toggle-list-content toggle-list-content">\n              <div class="filter-group">\n               <ul class="column">\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="WiFi.free"/>\n                  <span class="filter-label">\n                   Free\n                  </span>\n                 </label>\n                </li>\n               </ul>\n               <ul class="column">\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="WiFi.paid"/>\n                  <span class="filter-label">\n                   Paid\n                  </span>\n                 </label>\n                </li>\n               </ul>\n              </div>\n             </div>\n            </div>\n            <div class="js-toggle-list-item toggle-list-item">\n             <h4 class="arrange arrange--middle toggle-list-link js-toggle-list-trigger u-space-b1">\n              <span class="arrange_unit u-cursor-pointer">\n               <span class="toggle-link-arrow-wrap u-inline-block">\n                <span class="toggle-link-arrow" role="presentation">\n                </span>\n               </span>\n              </span>\n              <span class="arrange_unit arrange_unit--fill">\n               <span class="u-pseudo-link u-decoration-none">\n                Smoking\n               </span>\n              </span>\n             </h4>\n             <div class="js-toggle-list-content toggle-list-content">\n              <div class="filter-group">\n               <ul class="column">\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="Smoking.no"/>\n                  <span class="filter-label">\n                   No\n                  </span>\n                 </label>\n                </li>\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="Smoking.outdoor"/>\n                  <span class="filter-label">\n                   Outdoor Area / Patio Only\n                  </span>\n                 </label>\n                </li>\n               </ul>\n               <ul class="column">\n                <li>\n                 <label class="feature radio-check">\n                  <input name="feature" type="checkbox" value="Smoking.yes"/>\n                  <span class="filter-label">\n                   Yes\n                  </span>\n                 </label>\n                </li>\n               </ul>\n              </div>\n             </div>\n            </div>\n           </ul>\n          </div>\n          <div class="filter-set category-filters">\n           <h4>\n            Category\n           </h4>\n           <ul class="main">\n            <li>\n             <label class="category radio-check">\n              <input name="category" type="checkbox" value="mexican"/>\n              <span>\n               Mexican\n              </span>\n             </label>\n            </li>\n            <li>\n             <label class="category radio-check">\n              <input name="category" type="checkbox" value="hotdogs"/>\n              <span>\n               Fast Food\n              </span>\n             </label>\n            </li>\n            <li>\n             <label class="category radio-check">\n              <input name="category" type="checkbox" value="sandwiches"/>\n              <span>\n               Sandwiches\n              </span>\n             </label>\n            </li>\n            <li>\n             <label class="category radio-check">\n              <input name="category" type="checkbox" value="burgers"/>\n              <span>\n               Burgers\n              </span>\n             </label>\n            </li>\n           </ul>\n           <a class="more-link" href="javascript:;">\n            More Categories\n           </a>\n           <div class="more category-more filter-group arrange arrange--30 arrange--equal">\n            <ul class="arrange_unit">\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="restaurants"/>\n               <span>\n                Restaurants\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="mexican"/>\n               <span>\n                Mexican\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="hotdogs"/>\n               <span>\n                Fast Food\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="sandwiches"/>\n               <span>\n                Sandwiches\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="food"/>\n               <span>\n                Food\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="burgers"/>\n               <span>\n                Burgers\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="pizza"/>\n               <span>\n                Pizza\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="tradamerican"/>\n               <span>\n                American (Traditional)\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="nightlife"/>\n               <span>\n                Nightlife\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="chinese"/>\n               <span>\n                Chinese\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="chicken_wings"/>\n               <span>\n                Chicken Wings\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="bars"/>\n               <span>\n                Bars\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="seafood"/>\n               <span>\n                Seafood\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="breakfast_brunch"/>\n               <span>\n                Breakfast &amp; Brunch\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="italian"/>\n               <span>\n                Italian\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="tex-mex"/>\n               <span>\n                Tex-Mex\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="newamerican"/>\n               <span>\n                American (New)\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="delis"/>\n               <span>\n                Delis\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="bbq"/>\n               <span>\n                Barbeque\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="vietnamese"/>\n               <span>\n                Vietnamese\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="foodtrucks"/>\n               <span>\n                Food Trucks\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="cajun"/>\n               <span>\n                Cajun/Creole\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="salad"/>\n               <span>\n                Salad\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="cafes"/>\n               <span>\n                Cafes\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="latin"/>\n               <span>\n                Latin American\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="japanese"/>\n               <span>\n                Japanese\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="sportsbars"/>\n               <span>\n                Sports Bars\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="sushi"/>\n               <span>\n                Sushi Bars\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="bakeries"/>\n               <span>\n                Bakeries\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="soup"/>\n               <span>\n                Soup\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="indpak"/>\n               <span>\n                Indian\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="southern"/>\n               <span>\n                Southern\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="steak"/>\n               <span>\n                Steakhouses\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="mediterranean"/>\n               <span>\n                Mediterranean\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="asianfusion"/>\n               <span>\n                Asian Fusion\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="buffets"/>\n               <span>\n                Buffets\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="diners"/>\n               <span>\n                Diners\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="hotdog"/>\n               <span>\n                Hot Dogs\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="eventservices"/>\n               <span>\n                Event Planning &amp; Services\n               </span>\n              </label>\n             </li>\n            </ul>\n            <ul class="arrange_unit">\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="desserts"/>\n               <span>\n                Desserts\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="coffee"/>\n               <span>\n                Coffee &amp; Tea\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="foodstands"/>\n               <span>\n                Food Stands\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="chickenshop"/>\n               <span>\n                Chicken Shop\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="tacos"/>\n               <span>\n                Tacos\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="halal"/>\n               <span>\n                Halal\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="catering"/>\n               <span>\n                Caterers\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="wine_bars"/>\n               <span>\n                Wine Bars\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="thai"/>\n               <span>\n                Thai\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="vegetarian"/>\n               <span>\n                Vegetarian\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="juicebars"/>\n               <span>\n                Juice Bars &amp; Smoothies\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="soulfood"/>\n               <span>\n                Soul Food\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="gourmet"/>\n               <span>\n                Specialty Food\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="gluten_free"/>\n               <span>\n                Gluten-Free\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="mideastern"/>\n               <span>\n                Middle Eastern\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="icecream"/>\n               <span>\n                Ice Cream &amp; Frozen Yogurt\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="salvadoran"/>\n               <span>\n                Salvadoran\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="greek"/>\n               <span>\n                Greek\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="cocktailbars"/>\n               <span>\n                Cocktail Bars\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="caribbean"/>\n               <span>\n                Caribbean\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="pakistani"/>\n               <span>\n                Pakistani\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="noodles"/>\n               <span>\n                Noodles\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="donuts"/>\n               <span>\n                Donuts\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="korean"/>\n               <span>\n                Korean\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="arts"/>\n               <span>\n                Arts &amp; Entertainment\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="french"/>\n               <span>\n                French\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="gastropubs"/>\n               <span>\n                Gastropubs\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="comfortfood"/>\n               <span>\n                Comfort Food\n               </span>\n              </label>\n             </li>\n             <li>\n              <label class="category radio-check">\n               <input name="category" type="checkbox" value="lounges"/>\n               <span>\n                Lounges\n               </span>\n              <'

In [17]:
#Lets try and find the list of phone numbers. We note both the HTNL tag and the class for  it.
# We use the find_all function 
letters = soup.find_all("span", class_="biz-phone")
letters[1:1000]


Out[17]:
[<span class="biz-phone">
         (713) 842-7114
     </span>, <span class="biz-phone">
         (832) 831-9453
     </span>, <span class="biz-phone">
         (832) 487-9412
     </span>, <span class="biz-phone">
         (281) 501-3780
     </span>, <span class="biz-phone">
         (281) 888-1929
     </span>, <span class="biz-phone">
         (832) 203-5180
     </span>, <span class="biz-phone">
         (832) 834-4417
     </span>, <span class="biz-phone">
         (713) 861-6143
     </span>, <span class="biz-phone">
         (713) 225-8079
     </span>, <span class="biz-phone">
         (346) 319-5919
     </span>]

In [ ]: