In [45]:
from traitlets.config.manager import BaseJSONConfigManager
path = "/Users/bob/anaconda/etc/jupyter/nbconfig"
cm = BaseJSONConfigManager(config_dir=path)
pixels = 900
cm.update('livereveal', {
    'transition': 'convex',
    'start_slideshow_at': 'selected',
    'scroll': True,
    'width': pixels * 16 / 9,
    'height': pixels,
    'controls': False
})


Out[45]:
{'controls': False,
 'height': 900,
 'minScale': 0.2,
 'scroll': True,
 'start_slideshow_at': 'selected',
 'theme': 'serif',
 'transition': 'convex',
 'width': 1600.0}

Web Crawling

One Approach


In [1]:
from bs4 import BeautifulSoup
import urllib
url= 'http://atlanta.backpage.com/'
page = urllib.request.urlopen(url)
soup = BeautifulSoup(page.read(), "lxml")
soup.find('body')


Out[1]:
<body id="index">
<!-- Google Tag Manager -->
<noscript><iframe height="0" src="//www.googletagmanager.com/ns.html?id=GTM-5KCSP8" style="display:none;visibility:hidden" width="0"></iframe></noscript>
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': 
  new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
  j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
  '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
  })(window,document,'script','dataLayer','GTM-5KCSP8');</script>
<!-- End Google Tag Manager -->
<div class="nav-trigger sidebar-toggle">nav</div>
<div class="search-trigger">search</div>
<div class="sidebar-nav">
<div class="sidebar">
<div class="inner">
<div id="postAdButton">
<form action="http://posting.atlanta.backpage.com/online/classifieds/PostAdPPI.html/atl/atlanta.backpage.com/" id="formPost" method="get" name="formPost">
<input class="button" id="postAdButton" type="submit" value="Post Ad"/>
<input name="u" type="hidden" value="atl"/>
<input name="serverName" type="hidden" value="atlanta.backpage.com"/>
</form>
</div><!-- #postAdButton -->
<ul class="top">
<li class="nav-item"><a href="http://atlanta.backpage.com/">Home</a></li>
<li class="nav-item"><a href="https://my.backpage.com/classifieds/central/index">My Account</a></li>
<li class="nav-item"><a href="https://my.backpage.com/classifieds/central/PurchaseCredits.html/atlanta.backpage.com/" onclick="if ($(this).attr('disabled')){return false;}else{$(this).attr('disabled', 'true'); return true;}">Buy Credits</a></li>
</ul>
<h3>Change Language</h3>
<ul class="list languageSwitch">
<li class="nav-item">
<a data-key="" data-lang="es-us" data-langkey="" href="#es-us" rel="nofollow">español</a>
</li>
</ul>
<script>
            $("ul.languageSwitch li a").on("click", function(e){
              e.preventDefault();
              var myLang = $(this).attr("data-lang");
              var myKey = $(this).attr("data-key");
              var myLangKey = $(this).attr("data-langKey");

              var myStart = window.location.href.match(/^https?:\/\//)||'';
              var myUrl = window.location.href.replace(/^https?:\/\//,'');

              // update the key
              if (myKey != "" && myLangKey != ""){
                myUrl = myUrl.replace('/'+myKey+'/', '/'+myLangKey+'/');
              }

              // update the language
              if (myLang!=""){
                if (myUrl.search(/\/..-..\//) == -1){
                  
                    myUrl = myUrl.replace(/\//, '/'+myLang+'/');
                  
                } else {
                  myUrl = myUrl.replace(/\/..-..\//, '/'+myLang+'/');
                }
                setCookie("languageCode",myLang,30,"backpage.com");
              } else {
                myUrl = myUrl.replace(/\/..-..\//, '/');
                setCookie("languageCode","",30,"backpage.com");
              }
              //console.log(myStart+myUrl);
              window.location = myStart+myUrl;
            });
          </script>
<h3>Nearby Cities</h3>
<ul class="list">
<li class="nav-item"><a href="http://albanyga.backpage.com/">albany</a></li>
<li class="nav-item"><a href="http://athensga.backpage.com/">athens</a></li>
<li class="nav-item"><a href="http://atlanta.backpage.com/">atlanta</a></li>
<li class="nav-item"><a href="http://augusta.backpage.com/">augusta</a></li>
<li class="nav-item"><a href="http://brunswick.backpage.com/">brunswick</a></li>
<li class="nav-item"><a href="http://columbusga.backpage.com/">columbus</a></li>
<li class="nav-item"><a href="http://macon.backpage.com/">macon</a></li>
<li class="nav-item"><a href="http://nwga.backpage.com/">northwest georgia</a></li>
<li class="nav-item"><a href="http://savannah.backpage.com/">savannah</a></li>
<li class="nav-item"><a href="http://statesboro.backpage.com/">statesboro</a></li>
<li class="nav-item"><a href="http://valdosta.backpage.com/">valdosta</a></li>
</ul>
<br/>
<ul class="list">
<li class="nav-item">
<a href="http://atlanta.backpage.com/classifieds/AllCities">View All Cities</a>
</li>
</ul>
</div>
</div>
<div class="dimmer"></div>
</div><!-- .sidebar-nav -->
<div class="siteHeader" id="tlHeader">
<div class="tlBlock" id="logo">
<a href="http://atlanta.backpage.com/classifieds/AllCities">atlanta.backpage.com</a>
</div>
<div class="tlBlock" id="postAdButton">
<form action="http://posting.atlanta.backpage.com/online/classifieds/PostAdPPI.html/atl/atlanta.backpage.com/" id="formPost" method="get" name="formPost">
<input class="button" id="postAdButton" type="submit" value="Post Ad"/>
<input name="u" type="hidden" value="atl"/>
<input name="serverName" type="hidden" value="atlanta.backpage.com"/>
</form>
</div><!-- #postAdButton -->
<div class="tlBlock" id="searchInline">
<span class="search-wrapper">
<form action="http://atlanta.backpage.com/online/classifieds/Search" id="formSearch" method="get" name="formSearch">
<input name="rd" type="hidden" value="no"/>
<input data-default=" keyword" maxlength="100" name="keyword" size="15" type="text" value=" keyword"/>
<select name="section">
<option value="11258070">local places
            
          
            
              </option><option value="4382">community
            
          
            
              </option><option selected="" value="4378">buy/ sell/ trade
            
          
            
              </option><option value="463948">automotive
            
          
            
              </option><option value="4380">musician
            
          
            
              </option><option value="4376">rentals
            
          
            
              </option><option value="4375">real estate
            
          
            
              </option><option value="4373">jobs
            
          
            
              </option><option value="4383">dating
            
          
            
              </option><option value="4381">adult
            
          
            
              </option><option value="4374">services
            
          
        </option></select>
<input class="button" id="searchButton" type="submit" value="search"/>
</form>
<script>
      jQuery("form[name='formSearch'] input[type='text']").focus(function(){
        if (jQuery(this).is("[data-default]")){
          if (jQuery(this).val()==jQuery(this).attr("data-default")){
            jQuery(this).val("");
          }
        }
      });
      jQuery("form[name='formSearch'] input[type='text']").blur(function(){
        if (jQuery(this).is("[data-default]")){
          if (jQuery.trim(jQuery(this).val())==""){
            jQuery(this).val(jQuery(this).attr("data-default"));
          }
        }
      });
      jQuery("form[name='formSearch']").submit(function(){
        jQuery("form[name='formSearch'] input[type=text]").each(function() {
          if (jQuery(this).val()==jQuery(this).attr("data-default")){
            jQuery(this).val("");
          }
        });
      });
    </script>
</span>
</div>
<div class="tlBlock" id="community">
<h1>
<span class="city">
              atlanta, ga
            </span>
<span class="comm">
                 free classifieds
            </span>
</h1>
</div><!-- #community -->
</div><!-- #tlHeader -->
<div id="searchDropdown">
<span class="search-wrapper">
<form action="http://atlanta.backpage.com/online/classifieds/Search" id="formSearch" method="get" name="formSearch">
<input name="rd" type="hidden" value="no"/>
<input data-default=" keyword" maxlength="100" name="keyword" size="15" type="text" value=" keyword"/>
<select name="section">
<option value="11258070">local places
            
          
            
              </option><option value="4382">community
            
          
            
              </option><option selected="" value="4378">buy/ sell/ trade
            
          
            
              </option><option value="463948">automotive
            
          
            
              </option><option value="4380">musician
            
          
            
              </option><option value="4376">rentals
            
          
            
              </option><option value="4375">real estate
            
          
            
              </option><option value="4373">jobs
            
          
            
              </option><option value="4383">dating
            
          
            
              </option><option value="4381">adult
            
          
            
              </option><option value="4374">services
            
          
        </option></select>
<input class="button" id="searchButton" type="submit" value="search"/>
</form>
<script>
      jQuery("form[name='formSearch'] input[type='text']").focus(function(){
        if (jQuery(this).is("[data-default]")){
          if (jQuery(this).val()==jQuery(this).attr("data-default")){
            jQuery(this).val("");
          }
        }
      });
      jQuery("form[name='formSearch'] input[type='text']").blur(function(){
        if (jQuery(this).is("[data-default]")){
          if (jQuery.trim(jQuery(this).val())==""){
            jQuery(this).val(jQuery(this).attr("data-default"));
          }
        }
      });
      jQuery("form[name='formSearch']").submit(function(){
        jQuery("form[name='formSearch'] input[type=text]").each(function() {
          if (jQuery(this).val()==jQuery(this).attr("data-default")){
            jQuery(this).val("");
          }
        });
      });
    </script>
</span>
</div>
<div id="pageBackground">
<div id="mainCellWrapper">
<!-- 2016-12-12 04:00:00 -->
<div class="mainCellBackground" id="columnsTable" style="width: 100%;">
<div class="indexSectionColumnBlock">
<div class="indexSectionColumn">
<div class="indexSectionButtons">
<a class="head" href="http://atlanta.backpage.com/places/">local places</a> <span class="count">5,085</span>
</div>
<ul>
<li class="indexSectionList"><a href="http://atlanta.backpage.com/events/">events</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/bars/">bars/<wbr><span class="wbr"></span>clubs</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/restaurants/">restaurants</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/salons/">salons/<wbr><span class="wbr"></span>nails/<wbr><span class="wbr"></span>spas</wbr></wbr></a>
</li></ul>
<div class="indexSectionButtons">
<a class="head" href="http://atlanta.backpage.com/community/">community</a> <span class="count">192</span>
</div>
<ul>
<li class="indexSectionList"><a href="http://atlanta.backpage.com/Childcare/">childcare</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/Classes/">classes/<wbr><span class="wbr"></span>workshops</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/GeneralCommunity/">general</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/Groups/">groups</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/LostAndFound/">lost &amp; found</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/Volunteers/">volunteers</a>
</li></ul>
<div class="indexSectionButtons">
<a class="head" href="http://atlanta.backpage.com/buy-sell-trade/">buy/<wbr><span class="wbr"></span> sell/<wbr><span class="wbr"></span> trade</wbr></wbr></a> <span class="count">1,344</span>
</div>
<ul>
<li class="indexSectionList"><a href="http://atlanta.backpage.com/AntiquesForSale/">antiq.-collectibles</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/AppliancesForSale/">appliances</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/BusinessForSale/">business</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/MotorcyclesForSale/">boats &amp; motorcycles</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/Farm/">farm/<wbr><span class="wbr"></span>garden</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/ClothingForSale/">clothing/<wbr><span class="wbr"></span>jewelry</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/ElectronicsForSale/">computers/<wbr><span class="wbr"></span>electronics</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/Household/">household items</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/Free/">free</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/FurnitureForSale/">furniture</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/MiscForSale/">miscellaneous</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/PetsForSale/">pets, pet supplies</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/SportsEquipForSale/">sports equip.</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/TicketsForSale/">tickets</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/ToolsForSale/">tools/<wbr><span class="wbr"></span>materials</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/WantedTrade/">want-trade</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/YardSale/">yard sales</a>
</li></ul>
</div>
</div>
<div id="indexSectionColumnBlock">
<div class="indexSectionColumn">
<div class="indexSectionButtons">
<a class="head" href="http://atlanta.backpage.com/automotive/">automotive</a> <span class="count">8,887</span>
</div>
<ul>
<li class="indexSectionList"><a href="http://atlanta.backpage.com/AutosForSale/">auto-truck-rv</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/AutoPartsForSale/">auto parts</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/AutoServices/">services</a>
</li></ul>
<div class="indexSectionButtons">
<a class="head" href="http://atlanta.backpage.com/musician/">musician</a> <span class="count">132</span>
</div>
<ul>
<li class="indexSectionList"><a href="http://atlanta.backpage.com/MusicianWanted/">available/<wbr><span class="wbr"></span>wanted</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/MusicEquipForSale/">equip/<wbr><span class="wbr"></span>instruments</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/MusicInstruction/">instruction</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/MusicianServices/">services</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/PlugBand/">plug the band</a>
</li></ul>
<div class="indexSectionButtons">
<a class="head" href="http://atlanta.backpage.com/rentals/">rentals</a> <span class="count">7,796</span>
</div>
<ul>
<li class="indexSectionList"><a href="http://atlanta.backpage.com/Roommates/">roommates</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/ApartmentsForRent/">apt/<wbr><span class="wbr"></span>condo/<wbr><span class="wbr"></span>house</wbr></wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/CommercialForRent/">commercial</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/VacationForRent/">vacation</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/MiscForRent/">miscellaneous</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/RentalsWanted/">rentals wanted</a>
</li></ul>
<div class="indexSectionButtons">
<a class="head" href="http://atlanta.backpage.com/real-estate/">real estate</a> <span class="count">4,626</span>
</div>
<ul>
<li class="indexSectionList"><a href="http://atlanta.backpage.com/homes-for-sale/">house/<wbr><span class="wbr"></span>condo</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/land-for-sale/">land for sale</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/commercial-for-sale/">commercial</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/misc-real-estate/">misc</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/real-estate-wanted/">wanted</a>
</li></ul>
</div>
</div>
<div id="indexSectionColumnBlock">
<div class="indexSectionColumn">
<div class="indexSectionButtons">
<a class="head" href="http://atlanta.backpage.com/employment/">jobs</a> <span class="count">12,473</span>
</div>
<ul>
<li class="indexSectionList"><a href="http://atlanta.backpage.com/AccountingJobs/">acctg/<wbr><span class="wbr"></span>finance</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/AdminOfficeJobs/">admin/<wbr><span class="wbr"></span>office</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/ComputerJobs/">computer/<wbr><span class="wbr"></span>technical</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/CustomerServiceJobs/">customer service</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/DomesticJobs/">domestic</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/DriverJobs/">driver/<wbr><span class="wbr"></span>delivery/<wbr><span class="wbr"></span>courier</wbr></wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/EducationJobs/">education</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/FocusGroups/">focus group/<wbr><span class="wbr"></span>studies</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/JobWanted/">job wanted/<wbr><span class="wbr"></span>resume</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/ManagementJobs/">mgmt/<wbr><span class="wbr"></span>professional</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/MedicalHealthJobs/">medical/<wbr><span class="wbr"></span>health</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/MiscJobs/">miscellaneous</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/RealEstateJobs/">real estate</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/RestaurantRetailJobs/">rest/<wbr><span class="wbr"></span>retail/<wbr><span class="wbr"></span>hotel</wbr></wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/SalesJobs/">sales/<wbr><span class="wbr"></span>mktg</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/SalonJobs/">salon/<wbr><span class="wbr"></span>spa</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/Auditions/">show biz/<wbr><span class="wbr"></span>audition</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/TradesJobs/">trades/<wbr><span class="wbr"></span>labor</wbr></a>
</li></ul>
</div>
</div>
<div id="indexSectionColumnBlock">
<div class="indexSectionColumn">
<div class="indexSectionButtons">
<a class="head" href="http://atlanta.backpage.com/dating/classifieds/Disclaimer?section=4383">dating</a> <span class="count">3,920</span>
</div>
<ul>
<li class="indexSectionList"><a href="http://atlanta.backpage.com/WomenSeekMen/classifieds/Disclaimer?category=4453">women &gt; men</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/MenSeekWomen/classifieds/Disclaimer?category=4454">men &gt; women</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/MenSeekMen/classifieds/Disclaimer?category=4456">men &gt; men</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/WomenSeekWomen/classifieds/Disclaimer?category=4452">women &gt; women</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/Transgender/classifieds/Disclaimer?category=44989577">t &gt;</a>
</li></ul>
<div class="indexSectionButtons">
<a class="head" href="http://atlanta.backpage.com/adult/classifieds/Disclaimer?section=4381">adult</a> 
    
  </div>
<ul>
<li class="indexSectionList"><a href="http://atlanta.backpage.com/FemaleEscorts/classifieds/Disclaimer?category=4443">escorts</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/BodyRubs/classifieds/Disclaimer?category=1067062">body rubs</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/Strippers/classifieds/Disclaimer?category=5901439">strippers &amp; strip clubs</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/Domination/classifieds/Disclaimer?category=4949054">dom &amp; fetish</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/TranssexualEscorts/classifieds/Disclaimer?category=790380">ts</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/MaleEscorts/classifieds/Disclaimer?category=2655774">male escorts</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/Datelines/classifieds/Disclaimer?category=695873">phone &amp; websites</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/AdultJobs/classifieds/Disclaimer?category=4389">adult jobs</a>
</li></ul>
<div class="indexSectionButtons">
<a class="head" href="http://atlanta.backpage.com/services/">services</a> <span class="count">5,719</span>
</div>
<ul>
<li class="indexSectionList"><a href="http://atlanta.backpage.com/BizOpps/">biz opps</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/BusinessServices/">business</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/CleaningServices/">cleaning</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/ComputerServices/">computer</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/FinancialServices/">financial</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/CreativeServices/">creative</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/HealthServices/">health &amp; beauty</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/HomeImprovement/">home improvement</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/LaborMoving/">labor/<wbr><span class="wbr"></span>moving</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/LawnServices/">landscape/<wbr><span class="wbr"></span>lawn</wbr></a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/LegalServices/">legal</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/TherapeuticMassage/">massage</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/MiscServices/">misc.</a>
</li><li class="indexSectionList"><a href="http://atlanta.backpage.com/RealEstateServices/">real estate</a>
</li></ul>
</div><!-- .indexSectionColumn -->
</div><!-- .indexSectionColumnBlock -->
</div><!-- #columnsTable -->
</div>
<!-- #mainCellWrapper -->
<script type="text/javascript">
    <!--
      setCookie("site","atlanta.backpage.com",30,"backpage.com");
    // -->
    </script>
<div id="tlFooter">
<div class="footerText">
<a href="https://my.backpage.com/classifieds/central/index">My Account</a> |
        
      
        
          <a href="https://my.backpage.com/classifieds/central/PurchaseCredits.html/atlanta.backpage.com/" onclick="if ($(this).attr('disabled')){return false;}else{$(this).attr('disabled', 'true'); return true;}">Buy Credits</a> |
        

        
          <a href="http://posting.atlanta.backpage.com/classifieds/Contact">Contact</a> |
        

        <a href="http://atlanta.backpage.com/classifieds/Help">Help</a> |
        <a href="http://atlanta.backpage.com/classifieds/PrivacyPolicy">Privacy</a> |
        <a href="http://atlanta.backpage.com/classifieds/TermsOfUse">Terms</a>

        
          | <a href="http://atlanta.backpage.com/classifieds/UserSafety">Safety</a>
</div><!-- .footerText -->
<div class="footerDisclaimer">
        atlanta.backpage.com is an interactive computer service that enables access by multiple users and should not be treated as the publisher or speaker of any information provided by another information content provider.
        © 2016
        
          <a href="http://atlanta.backpage.com/classifieds/AllCities">backpage.com</a>
</div><!-- .footerDisclaimer -->
</div><!-- #tlFooter -->
</div><!-- #pageBackground -->
</body>

In [34]:
from IPython.display import HTML, display
display(HTML(str(soup.find('body'))))


search
atlanta.backpage.com is an interactive computer service that enables access by multiple users and should not be treated as the publisher or speaker of any information provided by another information content provider. © 2016 backpage.com

Problem

  • Sites might limit the revisit rate from a given IP address

Solution

  • Change IP addresses

Tor Scaper


In [ ]:
class TorEngine(DefaultEngine):
    def __init__(self, pw = None, control = ("127.0.0.1", 9051), signal = Signal.NEWNYM,
            proxy_handler = urllib.request.ProxyHandler({"http": "127.0.0.1:8118"}),
            data = None, headers = { "User-Agent": DEFAULT_USER_AGENT }):
        if pw:
            self.pw = pw
        else:
            self.pw = getpass.getpass("Tor password: ")
        self.control = control
        self.signal = signal
        self.proxy_handler = proxy_handler
        proxy_opener = urllib.request.build_opener(self.proxy_handler)
        urllib.request.install_opener(proxy_opener)
        super(TorEngine, self).__init__(data, headers)

...

On GitHub

Install Tor and Privoxy

brew install privoxy
brew install tor

Modify some config files

Start Services

$ brew services start privoxy
==> Successfully started `privoxy` (label: homebrew.mxcl.privoxy)
$ brew services start tor
==> Successfully started `tor` (label: homebrew.mxcl.tor)

In [40]:
from palantiri.core import engine
te = engine.TorEngine()


Tor password: ········

In [ ]:
out = te.get_page_source('http://ifconfig.me/ip')
print(out.source)

In [43]:
out = te.get_page_source(url)
print(out.source)


b'\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n  \r\n\r\n  <!DOCTYPE html>\r\n    <!--[if lte IE 7]> <html class="ie7" lang-"en-us"> <![endif]-->\r\n    <!--[if IE 8]>     <html class="ie8" lang="en-us"> <![endif]-->\r\n    <!--[if IE 9]>     <html class="ie9" lang="en-us"> <![endif]-->\r\n    <!--[if !IE]><!--> <html lang="en-us">             <!--<![endif]-->\r\n\r\n    <head>\r\n      <title>Atlanta classifieds for apts, jobs, and items for sale - backpage.com</title>\r\n      <meta http-equiv="X-UA-Compatible" content="IE=edge">\r\n      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">\r\n      <meta name="keywords" content="Atlanta, Atlanta classifieds, Atlanta classified ads, jobs, apartments for rent, house rentals, employment, roommate, musician, services, massage, community, forums">\r\n      <meta name="description" content="Atlanta classifieds. Post free ads for apartments, houses for rent, jobs, furniture, appliances, cars, pets and items for sale.">\r\n      \r\n      \r\n        <link rel="shortcut icon" href="/favicon.ico" />\r\n      \r\n\r\n      \r\n      \r\n        <meta name="viewport" content="width=device-width, initial-scale=1.0">\r\n      \r\n\r\n      \r\n      \r\n\r\n      \r\n      <link rel="stylesheet" type="text/css" href="http://atlanta.backpage.com/styles/Global.css?cb=36c7777d1ced62b17b9283c32c177874">\r\n    \r\n      <link rel="stylesheet" type="text/css" href="http://atlanta.backpage.com/styles/custom/Backpage.css?cb=36c7777d1ced62b17b9283c32c177874">\r\n\r\n      \r\n      \r\n        <link rel="canonical" href="http://atlanta.backpage.com/" />\r\n      \r\n      \r\n\r\n      \r\n      <script type="text/javascript" src="/scripts/jquery-1.7.2.min.js"></script>\r\n  <script type="text/javascript" src="/scripts/global-compiled.js?4"></script>\r\n\r\n      <script type="text/javascript">\r\n         (function($){Menu=function(e){return e&&e.on_before_open&&(this.on_before_open=e.on_before_open),$(document).ready(function(){var e=".dimmer",t=".sidebar",n=".sidebar-nav",s=".sidebar-toggle";this.elements={menu:$(t),dimmer:$(e),trigger:$(s),wrapper:$(n)},this.elements.trigger.on("click",this.toggle.bind(this)),this.elements.dimmer.on("click",this.close.bind(this)),$(window).on("resize orientationchange",this.set_height.bind(this)),this.state="closed"}.bind(this)),this},Menu.prototype.toggle=function(){"open"==this.state?this.close():this.open()},Menu.prototype.open=function(){if("closed"==this.state){this.on_before_open();var e=this.elements.wrapper,t=(this.elements.dimmer,this.elements.trigger,this.elements.menu);e.addClass("active"),this.set_height(),setTimeout(function(){e.addClass("open"),t.one("webkitTransitionEnd otransitionend oTransitionEnd msTransitionEnd transitionend",function(){this.state="open"}.bind(this))}.bind(this),0)}},Menu.prototype.set_height=function(){this.elements.wrapper.css("min-height","");var e=$(document).height();this.elements.wrapper.css("min-height",e+"px")},Menu.prototype.close=function(){if("open"==this.state){var e=this.elements.menu,t=this.elements.wrapper;t.removeClass("open"),e.one("webkitTransitionEnd otransitionend oTransitionEnd msTransitionEnd transitionend",function(){t.removeClass("active"),this.state="closed"}.bind(this))}};var menu=new Menu({on_before_open:function(){search&&"open"==search.state&&search.elements.trigger.css("z-index","9998")}});Search=function(e){return e&&e.on_before_open&&(this.on_before_open=e.on_before_open),$(document).ready(function(){var e=".search-wrap",t=".search-trigger",n=\'.search-wrap input[type="text"]\';this.elements={menu:$(e),trigger:$(t),keywords:$(n)},this.elements.trigger.on("click",this.toggle.bind(this)),this.elements.keywords.on("click focus keypress",this.open.bind(this)),this.state="closed"}.bind(this)),this},Search.prototype.toggle=function(){"open"==this.state?this.close():this.open(),this.elements.trigger.css("z-index",10004)},Search.prototype.open=function(){this.on_before_open();var e=this.elements.menu,t=this.elements.trigger;e.addClass("open"),t.addClass("active"),this.elements.trigger.css("z-index",10004),this.state="open"},Search.prototype.close=function(){var e=this.elements.menu,t=this.elements.trigger;e.removeClass("open"),t.removeClass("active"),this.state="closed"};var search=new Search({on_before_open:function(){menu.close()}});})(jQuery);\r\n      </script>\r\n\r\n      \r\n      <script>dataLayer=[];</script>\r\n    </head>\r\n    \r\n    \r\n\r\n    <body id="index">\r\n\r\n    \r\n    \r\n\r\n\r\n\r\n\r\n  \r\n\r\n  <!-- Google Tag Manager -->\r\n  <noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-5KCSP8"\r\n  height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>\r\n  <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({\'gtm.start\': \r\n  new Date().getTime(),event:\'gtm.js\'});var f=d.getElementsByTagName(s)[0],\r\n  j=d.createElement(s),dl=l!=\'dataLayer\'?\'&l=\'+l:\'\';j.async=true;j.src=\r\n  \'//www.googletagmanager.com/gtm.js?id=\'+i+dl;f.parentNode.insertBefore(j,f);\r\n  })(window,document,\'script\',\'dataLayer\',\'GTM-5KCSP8\');</script>\r\n  <!-- End Google Tag Manager -->\r\n\r\n\r\n\r\n    \r\n    \r\n      <div class="nav-trigger sidebar-toggle">nav</div>\r\n      \r\n        <div class="search-trigger">search</div>\r\n      \r\n      \r\n\r\n<div class="sidebar-nav">\r\n  <div class="sidebar">\r\n    <div class="inner">\r\n      \r\n      <div id="postAdButton">\r\n        <form name="formPost" id="formPost" action="http://posting.atlanta.backpage.com/online/classifieds/PostAdPPI.html/atl/atlanta.backpage.com/" method="get">\r\n          <input type="submit" value="Post Ad" class="button" id="postAdButton">\r\n          <input type="hidden" name="u" value="atl">\r\n          <input type="hidden" name="serverName" value="atlanta.backpage.com">\r\n        </form>\r\n      </div><!-- #postAdButton -->\r\n\r\n      \r\n      <ul class="top">\r\n        <li class="nav-item"><a href="http://atlanta.backpage.com/">Home</a></li>\r\n        \r\n          <li class="nav-item"><a href="https://my.backpage.com/classifieds/central/index">My Account</a></li>\r\n        \r\n        \r\n          <li class="nav-item"><a href="https://my.backpage.com/classifieds/central/PurchaseCredits.html/atlanta.backpage.com/" onclick="if ($(this).attr(\'disabled\')){return false;}else{$(this).attr(\'disabled\', \'true\'); return true;}">Buy Credits</a></li>\r\n        \r\n      </ul>\r\n\r\n      \r\n      \r\n\r\n        \r\n        \r\n          \r\n          \r\n\r\n          <h3>Change Language</h3>\r\n          <ul class="list languageSwitch">\r\n            <li class="nav-item">\n<a href="#es-us"\nrel="nofollow"\ndata-lang="es-us"\ndata-key=""\ndata-langKey="">espa&ntilde;ol</a>\n</li>\r\n          </ul>\r\n          \r\n          \r\n          <script>\r\n            $("ul.languageSwitch li a").on("click", function(e){\r\n              e.preventDefault();\r\n              var myLang = $(this).attr("data-lang");\r\n              var myKey = $(this).attr("data-key");\r\n              var myLangKey = $(this).attr("data-langKey");\r\n\r\n              var myStart = window.location.href.match(/^https?:\\/\\//)||\'\';\r\n              var myUrl = window.location.href.replace(/^https?:\\/\\//,\'\');\r\n\r\n              // update the key\r\n              if (myKey != "" && myLangKey != ""){\r\n                myUrl = myUrl.replace(\'/\'+myKey+\'/\', \'/\'+myLangKey+\'/\');\r\n              }\r\n\r\n              // update the language\r\n              if (myLang!=""){\r\n                if (myUrl.search(/\\/..-..\\//) == -1){\r\n                  \r\n                    myUrl = myUrl.replace(/\\//, \'/\'+myLang+\'/\');\r\n                  \r\n                } else {\r\n                  myUrl = myUrl.replace(/\\/..-..\\//, \'/\'+myLang+\'/\');\r\n                }\r\n                setCookie("languageCode",myLang,30,"backpage.com");\r\n              } else {\r\n                myUrl = myUrl.replace(/\\/..-..\\//, \'/\');\r\n                setCookie("languageCode","",30,"backpage.com");\r\n              }\r\n              //console.log(myStart+myUrl);\r\n              window.location = myStart+myUrl;\r\n            });\r\n          </script>\r\n        \r\n      \r\n\r\n      \r\n      \r\n        \r\n\r\n        \r\n        \r\n        \r\n        \r\n        \r\n          <h3>Nearby Cities</h3>\r\n          <ul class="list">\r\n            <li class="nav-item"><a href="http://albanyga.backpage.com/">albany</a></li>\n<li class="nav-item"><a href="http://athensga.backpage.com/">athens</a></li>\n<li class="nav-item"><a href="http://atlanta.backpage.com/">atlanta</a></li>\n<li class="nav-item"><a href="http://augusta.backpage.com/">augusta</a></li>\n<li class="nav-item"><a href="http://brunswick.backpage.com/">brunswick</a></li>\n<li class="nav-item"><a href="http://columbusga.backpage.com/">columbus</a></li>\n<li class="nav-item"><a href="http://macon.backpage.com/">macon</a></li>\n<li class="nav-item"><a href="http://nwga.backpage.com/">northwest georgia</a></li>\n<li class="nav-item"><a href="http://savannah.backpage.com/">savannah</a></li>\n<li class="nav-item"><a href="http://statesboro.backpage.com/">statesboro</a></li>\n<li class="nav-item"><a href="http://valdosta.backpage.com/">valdosta</a></li>\r\n          </ul>\r\n        \r\n      \r\n      <br>\r\n\r\n\r\n      \r\n      <ul class="list">\r\n        <li class="nav-item">\r\n          <a href="http://atlanta.backpage.com/classifieds/AllCities">View All Cities</a>\r\n        </li>\r\n      </ul>\r\n\r\n    </div>\r\n  </div>\r\n  <div class="dimmer"></div>\r\n</div><!-- .sidebar-nav -->\r\n\r\n\r\n    \r\n  \r\n\r\n  \r\n\r\n  \r\n\r\n  \r\n\r\n  \r\n  \r\n    <div id="tlHeader" class="siteHeader">\r\n      <div id="logo" class="tlBlock">\r\n        \r\n          <a href="http://atlanta.backpage.com/classifieds/AllCities">atlanta.backpage.com</a>\r\n        \r\n      </div>\r\n\r\n      <div id="postAdButton" class="tlBlock">\r\n        <form name="formPost" id="formPost" action="http://posting.atlanta.backpage.com/online/classifieds/PostAdPPI.html/atl/atlanta.backpage.com/" method="get">\r\n      <input type="submit" value="Post Ad" class="button" id="postAdButton">\r\n      <input type="hidden" name="u" value="atl">\r\n      <input type="hidden" name="serverName" value="atlanta.backpage.com">\r\n    </form>\r\n      </div><!-- #postAdButton -->\r\n\r\n      \r\n      <div id="searchInline" class="tlBlock">\r\n        <span class="search-wrapper">\r\n        <form name="formSearch" id="formSearch" action="http://atlanta.backpage.com/online/classifieds/Search" method="get">\r\n      <input type="hidden" name="rd" value="no">\r\n      <input type="text" size="15" name="keyword" value=" keyword" data-default=" keyword" maxlength="100">\r\n      \r\n        \r\n        <select name="section">\r\n          \r\n          \r\n          \r\n          \r\n            \r\n              <option value="11258070">local places\r\n            \r\n          \r\n            \r\n              <option value="4382">community\r\n            \r\n          \r\n            \r\n              <option value="4378" selected>buy/ sell/ trade\r\n            \r\n          \r\n            \r\n              <option value="463948">automotive\r\n            \r\n          \r\n            \r\n              <option value="4380">musician\r\n            \r\n          \r\n            \r\n              <option value="4376">rentals\r\n            \r\n          \r\n            \r\n              <option value="4375">real estate\r\n            \r\n          \r\n            \r\n              <option value="4373">jobs\r\n            \r\n          \r\n            \r\n              <option value="4383">dating\r\n            \r\n          \r\n            \r\n              <option value="4381">adult\r\n            \r\n          \r\n            \r\n              <option value="4374">services\r\n            \r\n          \r\n        </select>\r\n      \r\n      <input type="submit" value="search" class="button" id="searchButton">\r\n    </form>\r\n\r\n    <script>\r\n      jQuery("form[name=\'formSearch\'] input[type=\'text\']").focus(function(){\r\n        if (jQuery(this).is("[data-default]")){\r\n          if (jQuery(this).val()==jQuery(this).attr("data-default")){\r\n            jQuery(this).val("");\r\n          }\r\n        }\r\n      });\r\n      jQuery("form[name=\'formSearch\'] input[type=\'text\']").blur(function(){\r\n        if (jQuery(this).is("[data-default]")){\r\n          if (jQuery.trim(jQuery(this).val())==""){\r\n            jQuery(this).val(jQuery(this).attr("data-default"));\r\n          }\r\n        }\r\n      });\r\n      jQuery("form[name=\'formSearch\']").submit(function(){\r\n        jQuery("form[name=\'formSearch\'] input[type=text]").each(function() {\r\n          if (jQuery(this).val()==jQuery(this).attr("data-default")){\r\n            jQuery(this).val("");\r\n          }\r\n        });\r\n      });\r\n    </script>\r\n      </span>\r\n      </div>\r\n\r\n      <div id="community" class="tlBlock">\r\n        \r\n          <h1>\r\n            <span class="city">\r\n              atlanta,\xc2\xa0ga\r\n            </span>\r\n            <span class="comm">\r\n                &nbsp;free&nbsp;classifieds\r\n            </span>\r\n          </h1>\r\n\r\n        \r\n      </div><!-- #community -->\r\n    </div><!-- #tlHeader -->\r\n\r\n    \r\n    \r\n\r\n    \r\n    <div id="searchDropdown">\r\n      <span class="search-wrapper">\r\n        <form name="formSearch" id="formSearch" action="http://atlanta.backpage.com/online/classifieds/Search" method="get">\r\n      <input type="hidden" name="rd" value="no">\r\n      <input type="text" size="15" name="keyword" value=" keyword" data-default=" keyword" maxlength="100">\r\n      \r\n        \r\n        <select name="section">\r\n          \r\n          \r\n          \r\n          \r\n            \r\n              <option value="11258070">local places\r\n            \r\n          \r\n            \r\n              <option value="4382">community\r\n            \r\n          \r\n            \r\n              <option value="4378" selected>buy/ sell/ trade\r\n            \r\n          \r\n            \r\n              <option value="463948">automotive\r\n            \r\n          \r\n            \r\n              <option value="4380">musician\r\n            \r\n          \r\n            \r\n              <option value="4376">rentals\r\n            \r\n          \r\n            \r\n              <option value="4375">real estate\r\n            \r\n          \r\n            \r\n              <option value="4373">jobs\r\n            \r\n          \r\n            \r\n              <option value="4383">dating\r\n            \r\n          \r\n            \r\n              <option value="4381">adult\r\n            \r\n          \r\n            \r\n              <option value="4374">services\r\n            \r\n          \r\n        </select>\r\n      \r\n      <input type="submit" value="search" class="button" id="searchButton">\r\n    </form>\r\n\r\n    <script>\r\n      jQuery("form[name=\'formSearch\'] input[type=\'text\']").focus(function(){\r\n        if (jQuery(this).is("[data-default]")){\r\n          if (jQuery(this).val()==jQuery(this).attr("data-default")){\r\n            jQuery(this).val("");\r\n          }\r\n        }\r\n      });\r\n      jQuery("form[name=\'formSearch\'] input[type=\'text\']").blur(function(){\r\n        if (jQuery(this).is("[data-default]")){\r\n          if (jQuery.trim(jQuery(this).val())==""){\r\n            jQuery(this).val(jQuery(this).attr("data-default"));\r\n          }\r\n        }\r\n      });\r\n      jQuery("form[name=\'formSearch\']").submit(function(){\r\n        jQuery("form[name=\'formSearch\'] input[type=text]").each(function() {\r\n          if (jQuery(this).val()==jQuery(this).attr("data-default")){\r\n            jQuery(this).val("");\r\n          }\r\n        });\r\n      });\r\n    </script>\r\n      </span>\r\n    </div>\r\n\r\n    \r\n    \r\n      \r\n    \r\n  \r\n\r\n  <div id="pageBackground">\r\n\r\n  \r\n    <div id="mainCellWrapper">\r\n\r\n\r\n  \r\n\r\n  \r\n    \r\n    <!-- 2016-12-13 04:00:00 -->\r\n    <div class="mainCellBackground" id="columnsTable" style="width: 100%;">\r\n      <div class="indexSectionColumnBlock">\r\n        <div class="indexSectionColumn">\r\n          \r\n\r\n          \r\n              \r\n            <div class="indexSectionButtons">\r\n    \r\n    \r\n      <a href="http://atlanta.backpage.com/places/" class="head">local&nbsp;places</a>&nbsp;<span class="count">5,086</span>\r\n    \r\n  </div>\r\n\r\n  \r\n  \r\n  \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n    \r\n    <ul>\r\n    <li class="indexSectionList"><a href="http://atlanta.backpage.com/events/">events</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/bars/">bars/<wbr><span class="wbr"></span>clubs</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/restaurants/">restaurants</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/salons/">salons/<wbr><span class="wbr"></span>nails/<wbr><span class="wbr"></span>spas</a>\r\n    </ul>\r\n          \r\n              \r\n            <div class="indexSectionButtons">\r\n    \r\n    \r\n      <a href="http://atlanta.backpage.com/community/" class="head">community</a>&nbsp;<span class="count">190</span>\r\n    \r\n  </div>\r\n\r\n  \r\n  \r\n  \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n    \r\n    <ul>\r\n    <li class="indexSectionList"><a href="http://atlanta.backpage.com/Childcare/">childcare</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/Classes/">classes/<wbr><span class="wbr"></span>workshops</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/GeneralCommunity/">general</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/Groups/">groups</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/LostAndFound/">lost & found</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/Volunteers/">volunteers</a>\r\n    </ul>\r\n          \r\n              \r\n            <div class="indexSectionButtons">\r\n    \r\n    \r\n      <a href="http://atlanta.backpage.com/buy-sell-trade/" class="head">buy/<wbr><span class="wbr"></span>&nbsp;sell/<wbr><span class="wbr"></span>&nbsp;trade</a>&nbsp;<span class="count">1,377</span>\r\n    \r\n  </div>\r\n\r\n  \r\n  \r\n  \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n    \r\n    <ul>\r\n    <li class="indexSectionList"><a href="http://atlanta.backpage.com/AntiquesForSale/">antiq.-collectibles</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/AppliancesForSale/">appliances</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/BusinessForSale/">business</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/MotorcyclesForSale/">boats & motorcycles</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/Farm/">farm/<wbr><span class="wbr"></span>garden</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/ClothingForSale/">clothing/<wbr><span class="wbr"></span>jewelry</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/ElectronicsForSale/">computers/<wbr><span class="wbr"></span>electronics</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/Household/">household items</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/Free/">free</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/FurnitureForSale/">furniture</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/MiscForSale/">miscellaneous</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/PetsForSale/">pets, pet supplies</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/SportsEquipForSale/">sports equip.</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/TicketsForSale/">tickets</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/ToolsForSale/">tools/<wbr><span class="wbr"></span>materials</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/WantedTrade/">want-trade</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/YardSale/">yard sales</a>\r\n    </ul>\r\n  \r\n\r\n  \r\n      </div>\r\n    </div>\r\n    <div id="indexSectionColumnBlock">\r\n      <div class="indexSectionColumn">\r\n          \r\n              \r\n            <div class="indexSectionButtons">\r\n    \r\n    \r\n      <a href="http://atlanta.backpage.com/automotive/" class="head">automotive</a>&nbsp;<span class="count">9,548</span>\r\n    \r\n  </div>\r\n\r\n  \r\n  \r\n  \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n    \r\n    <ul>\r\n    <li class="indexSectionList"><a href="http://atlanta.backpage.com/AutosForSale/">auto-truck-rv</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/AutoPartsForSale/">auto parts</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/AutoServices/">services</a>\r\n    </ul>\r\n          \r\n              \r\n            <div class="indexSectionButtons">\r\n    \r\n    \r\n      <a href="http://atlanta.backpage.com/musician/" class="head">musician</a>&nbsp;<span class="count">131</span>\r\n    \r\n  </div>\r\n\r\n  \r\n  \r\n  \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n    \r\n    <ul>\r\n    <li class="indexSectionList"><a href="http://atlanta.backpage.com/MusicianWanted/">available/<wbr><span class="wbr"></span>wanted</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/MusicEquipForSale/">equip/<wbr><span class="wbr"></span>instruments</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/MusicInstruction/">instruction</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/MusicianServices/">services</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/PlugBand/">plug the band</a>\r\n    </ul>\r\n          \r\n              \r\n            <div class="indexSectionButtons">\r\n    \r\n    \r\n      <a href="http://atlanta.backpage.com/rentals/" class="head">rentals</a>&nbsp;<span class="count">7,675</span>\r\n    \r\n  </div>\r\n\r\n  \r\n  \r\n  \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n    \r\n    <ul>\r\n    <li class="indexSectionList"><a href="http://atlanta.backpage.com/Roommates/">roommates</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/ApartmentsForRent/">apt/<wbr><span class="wbr"></span>condo/<wbr><span class="wbr"></span>house</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/CommercialForRent/">commercial</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/VacationForRent/">vacation</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/MiscForRent/">miscellaneous</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/RentalsWanted/">rentals wanted</a>\r\n    </ul>\r\n          \r\n              \r\n            <div class="indexSectionButtons">\r\n    \r\n    \r\n      <a href="http://atlanta.backpage.com/real-estate/" class="head">real&nbsp;estate</a>&nbsp;<span class="count">4,061</span>\r\n    \r\n  </div>\r\n\r\n  \r\n  \r\n  \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n    \r\n    <ul>\r\n    <li class="indexSectionList"><a href="http://atlanta.backpage.com/homes-for-sale/">house/<wbr><span class="wbr"></span>condo</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/land-for-sale/">land for sale</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/commercial-for-sale/">commercial</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/misc-real-estate/">misc</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/real-estate-wanted/">wanted</a>\r\n    </ul>\r\n  \r\n\r\n  \r\n      </div>\r\n    </div>\r\n    <div id="indexSectionColumnBlock">\r\n      <div class="indexSectionColumn">\r\n          \r\n              \r\n            <div class="indexSectionButtons">\r\n    \r\n    \r\n      <a href="http://atlanta.backpage.com/employment/" class="head">jobs</a>&nbsp;<span class="count">8,867</span>\r\n    \r\n  </div>\r\n\r\n  \r\n  \r\n  \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n    \r\n    <ul>\r\n    <li class="indexSectionList"><a href="http://atlanta.backpage.com/AccountingJobs/">acctg/<wbr><span class="wbr"></span>finance</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/AdminOfficeJobs/">admin/<wbr><span class="wbr"></span>office</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/ComputerJobs/">computer/<wbr><span class="wbr"></span>technical</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/CustomerServiceJobs/">customer service</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/DomesticJobs/">domestic</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/DriverJobs/">driver/<wbr><span class="wbr"></span>delivery/<wbr><span class="wbr"></span>courier</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/EducationJobs/">education</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/FocusGroups/">focus group/<wbr><span class="wbr"></span>studies</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/JobWanted/">job wanted/<wbr><span class="wbr"></span>resume</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/ManagementJobs/">mgmt/<wbr><span class="wbr"></span>professional</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/MedicalHealthJobs/">medical/<wbr><span class="wbr"></span>health</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/MiscJobs/">miscellaneous</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/RealEstateJobs/">real estate</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/RestaurantRetailJobs/">rest/<wbr><span class="wbr"></span>retail/<wbr><span class="wbr"></span>hotel</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/SalesJobs/">sales/<wbr><span class="wbr"></span>mktg</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/SalonJobs/">salon/<wbr><span class="wbr"></span>spa</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/Auditions/">show biz/<wbr><span class="wbr"></span>audition</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/TradesJobs/">trades/<wbr><span class="wbr"></span>labor</a>\r\n    </ul>\r\n  \r\n\r\n  \r\n      </div>\r\n    </div>\r\n    <div id="indexSectionColumnBlock">\r\n      <div class="indexSectionColumn">\r\n          \r\n              \r\n            <div class="indexSectionButtons">\r\n    \r\n    \r\n      <a href="http://atlanta.backpage.com/dating/classifieds/Disclaimer?section=4383" class="head">dating</a>&nbsp;<span class="count">3,959</span>\r\n    \r\n  </div>\r\n\r\n  \r\n  \r\n  \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n    \r\n    <ul>\r\n    <li class="indexSectionList"><a href="http://atlanta.backpage.com/WomenSeekMen/classifieds/Disclaimer?category=4453">women &gt; men</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/MenSeekWomen/classifieds/Disclaimer?category=4454">men &gt; women</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/MenSeekMen/classifieds/Disclaimer?category=4456">men &gt; men</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/WomenSeekWomen/classifieds/Disclaimer?category=4452">women &gt; women</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/Transgender/classifieds/Disclaimer?category=44989577">t &gt;</a>\r\n    </ul>\r\n          \r\n              \r\n            <div class="indexSectionButtons">\r\n    \r\n    \r\n      <a href="http://atlanta.backpage.com/adult/classifieds/Disclaimer?section=4381" class="head">adult</a>&nbsp;\r\n    \r\n  </div>\r\n\r\n  \r\n  \r\n  \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n    \r\n    <ul>\r\n    <li class="indexSectionList"><a href="http://atlanta.backpage.com/FemaleEscorts/classifieds/Disclaimer?category=4443">escorts</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/BodyRubs/classifieds/Disclaimer?category=1067062">body rubs</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/Strippers/classifieds/Disclaimer?category=5901439">strippers & strip clubs</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/Domination/classifieds/Disclaimer?category=4949054">dom & fetish</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/TranssexualEscorts/classifieds/Disclaimer?category=790380">ts</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/MaleEscorts/classifieds/Disclaimer?category=2655774">male escorts</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/Datelines/classifieds/Disclaimer?category=695873">phone & websites</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/AdultJobs/classifieds/Disclaimer?category=4389">adult jobs</a>\r\n    </ul>\r\n          \r\n              \r\n            <div class="indexSectionButtons">\r\n    \r\n    \r\n      <a href="http://atlanta.backpage.com/services/" class="head">services</a>&nbsp;<span class="count">5,737</span>\r\n    \r\n  </div>\r\n\r\n  \r\n  \r\n  \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n      \r\n    \r\n    \r\n    <ul>\r\n    <li class="indexSectionList"><a href="http://atlanta.backpage.com/BizOpps/">biz opps</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/BusinessServices/">business</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/CleaningServices/">cleaning</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/ComputerServices/">computer</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/FinancialServices/">financial</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/CreativeServices/">creative</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/HealthServices/">health & beauty</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/HomeImprovement/">home improvement</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/LaborMoving/">labor/<wbr><span class="wbr"></span>moving</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/LawnServices/">landscape/<wbr><span class="wbr"></span>lawn</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/LegalServices/">legal</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/TherapeuticMassage/">massage</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/MiscServices/">misc.</a>\n<li class="indexSectionList"><a href="http://atlanta.backpage.com/RealEstateServices/">real estate</a>\r\n    </ul>\r\n          \r\n        </div><!-- .indexSectionColumn -->\r\n      </div><!-- .indexSectionColumnBlock -->\r\n    </div><!-- #columnsTable -->\r\n  \r\n\r\n\r\n  \r\n\r\n  </div>\r\n    <!-- #mainCellWrapper -->\r\n  \r\n  \r\n  \r\n  \r\n    <script type="text/javascript">\r\n    <!--\r\n      setCookie("site","atlanta.backpage.com",30,"backpage.com");\r\n    // -->\r\n    </script>\r\n  \r\n\r\n  \r\n  \r\n    <div id="tlFooter">\r\n      <div class="footerText">\r\n        \r\n          <a href="https://my.backpage.com/classifieds/central/index">My Account</a> |\r\n        \r\n      \r\n        \r\n          <a href="https://my.backpage.com/classifieds/central/PurchaseCredits.html/atlanta.backpage.com/" onclick="if ($(this).attr(\'disabled\')){return false;}else{$(this).attr(\'disabled\', \'true\'); return true;}">Buy Credits</a> |\r\n        \r\n\r\n        \r\n          <a href="http://posting.atlanta.backpage.com/classifieds/Contact">Contact</a> |\r\n        \r\n\r\n        <a href="http://atlanta.backpage.com/classifieds/Help">Help</a> |\r\n        <a href="http://atlanta.backpage.com/classifieds/PrivacyPolicy">Privacy</a> |\r\n        <a href="http://atlanta.backpage.com/classifieds/TermsOfUse">Terms</a>\r\n\r\n        \r\n          | <a href="http://atlanta.backpage.com/classifieds/UserSafety">Safety</a>\r\n        \r\n\r\n        \r\n      </div><!-- .footerText -->\r\n\r\n      \r\n      <div class="footerDisclaimer">\r\n        atlanta.backpage.com is an interactive computer service that enables access by multiple users and should not be treated as the publisher or speaker of any information provided by another information content provider.\r\n        &copy;&nbsp;2016\r\n        \r\n          <a href="http://atlanta.backpage.com/classifieds/AllCities">backpage.com</a>\r\n        \r\n      </div><!-- .footerDisclaimer -->\r\n    </div><!-- #tlFooter -->\r\n    \r\n\r\n  </div><!-- #pageBackground -->\r\n\r\n  \r\n  \r\n\r\n  \r\n  \r\n    </body>\r\n    </html>\r\n\r\n'

Full Source on GitHub

anidata/palantiri

Bonus

Scraping Tables with Pandas

`pandas.read_html`


In [6]:
import pandas as pd
dfs = pd.read_html('http://www.espn.com/nba/boxscore?gameId=400899810')
dfs[1]


Out[6]:
starters MIN FG 3PT FT OREB DREB REB AST STL BLK TO PF +/- PTS
0 Z. RandolphZ. RandolphPF 26 5-11 0-1 0-0 1 3 4 2 2 1 0 0 -14 10
1 J. GreenJ. GreenPF 21 0-8 0-1 0-0 4 1 5 2 2 0 2 2 -11 0
2 T. WilliamsT. WilliamsSF 22 5-10 1-3 0-0 1 3 4 0 0 0 2 2 -9 11
3 A. HarrisonA. HarrisonPG 25 4-9 1-4 4-5 0 2 2 4 1 3 1 1 -5 13
4 T. AllenT. AllenSG 17 4-9 0-0 0-1 0 1 1 1 1 0 0 2 -4 8