Web Scraper


Objective

-> Usage of Beautiful soup package + cfscrape to bypass anti bot systems


In [2]:
import cfscrape
from bs4 import BeautifulSoup
scraper = cfscrape.create_scraper()  # returns a CloudflareScraper instance
# Or: scraper = cfscrape.CloudflareScraper()  # CloudflareScraper inherits from requests.Session
r= scraper.get("https://etherchain.org/contracts").content  # => "<!DOCTYPE html><html><head>..."
soup = BeautifulSoup(r)
table = soup.find('table', {'class': 'table table-condensed'})
rows = table.find_all('tr')
print('Accounts :')
print('--------')
for row in rows:
    print row.find('a').get('href')[9:] ,row.find('a').contents[0]


Accounts :
--------
0xbf35faa9c265baf50c9cff8c389c363b05753275 Wallet
0x4eecf99d543b278106ac0c0e8ffe616f2137f10a LockMyEther
0xd79b4c6791784184e2755b2fc1659eaab0f80456 HonestDice
0xa9e4e3b1da2462752aea980698c335e70e9ab26c DynamicPyramid
0xfd2487cc0e5dce97f08be1bc8ef1dce8d5988b4d Doubler
0x7fd6d3537b39842cfe16e813851296d4745b51a7 Doubler
0x2ff2a65b0a324c04747bfdc63f4bf525d43e5c62 DonationMatcher
0xa0285193e366a634fd9639d650b2115d77681a37 Fox
0x4ed65e408439a7f6459b5cfbd364f373bd6ed5f7 PRNG_Challenge
0xeacba276ff97853102c3d540b663c08ec7fbe0ed x2
0x7d56485e026d5d3881f778e99969d2b1f90c50af ProtectTheCastle
0xca7c390f8f843a8c3036841fde755e5d0acb97da Diana
0xab7689c7be6d9b5428d6bf4c536dc699fc919260 Multi133v3
0x683c53084d997e6056c555f85f031f8317e26c2b MicroDAO
0xfe9c69945687539fabbf531133838d9cce522a76 Bunny
0xdcb13fa157eebf22ddc8c9aa1d6e394810de6fa3 PiggyBank
0xa39fcb48adf288f143459d57a1a0756718c919ae x15
0x9e2a68ed6b854e2e55433b5ec8f471c7b274df51 Bunnybank
0xa7481af3bbc9d902c02f58204c5683052de53330 Tripler
0x85c84c949b1c29c85b777f2873a19450dbb1a057 Multi133v2
0xa96f7d29dc792359b1ce24c7c54230882dee1be2 LooneyFifty
0x233820087a752349ee20daab1c18e0b7c546d3f6 FinneyDonationGamble
0xafcb0ca8f05c5374a4d11f4197fc25f51a76813d AuctionMaster
0x5158cf97c3e001b402ccb0f9063736ee8d6dad5a Ai
0x6fd8e0e34117c95e568f8938716013f948e0b2f9 TowerOfPower
0xa9d160e32ad37ac6f2b8231e4efe14d35abb576e KingOfTheEtherThrone
0xeb8986bde53ee86ec8eec367a2823904d35f23e9 BarcelonaBlockchainDonations
0x40b88bbd78cb0441259de7a7d608a39b7388369a Diana
0x3d5894b6801221bf296a2ac2d6ccf5ea09ce399a CasinoKonga
0xc0326a98c23296d295ad880b59a3ef7ea6f5e594 AllPayAuction
0x859495380ab8d65b4dd17dba7bec3f74db35146c Pioner_01
0xe19e5f100d6a31169b5dca265c9285059c41d4f6 NanoPyramid
0x2f5cd5d797ec8868a5aaefd784ed2c5bc46a1a47 test
0x883ee0b0ce06f11cb215c7e2299f1948b247784b GetRichNow
0x6dfaa563d04a77aff4c4ad2b17cf4c64d2983dc8 Rouleth
0x28a33d4407b3b94c83579f3e7760948ce13bb55f EtherBuyTheBlock
0xc78daa292c7f44dcbf138db81dc25272138f5aaf Ethereum_eight_bagger
0x273930d21e01ee25e4c219b63259d214872220a2 Wallet
0x2ef76694fbfd691141d83f921a5ba710525de9b0 LooneyLottery
0x1c98757e3b2199df438553892a678a74187b55b1 postCats
0x48b4cb193b587c6f2dab1a9123a7bd5e7d490ced SimpleStorage
0x19551bd3d61fcb4dd802e1319e50b00f4ea78208 Deal
0x0155ce35fe73249fa5d6a29f3b4b7b98732eb2ed lottopollo
0x294308484f47ff5a833a284ac6949eb02728fbe4 ShinySquirrels
0x79e6639a5efe95e62d6a2e4aca6a258f29941a19 DoubleTx
0x6d805b5de59d3f1779e4180b8547bcf728ff91ea LooneyDice
0x5547a311bbe52ecebcbe42c24e695a1bc3b5656f Panda
0x28102aa8f4879eb36966e857d1b8c438e600f254 DoubleFive
0x572ae26f3632a9bbdc3d3f914a29a0117f3b7980 PriceTicker
0x870fe80e76dae4a4c12690dec52456ab13176202 ShinySquirrels
/usr/local/lib/python2.7/dist-packages/bs4/__init__.py:181: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("html.parser"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.

The code that caused this warning is on line 174 of the file /usr/lib/python2.7/runpy.py. To get rid of this warning, change code that looks like this:

 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "html.parser")

  markup_type=markup_type))

In [3]:
r2= scraper.get("https://etherchain.org/accounts/10").content  # => "<!DOCTYPE html><html><head>..."
soup = BeautifulSoup(r2)
table = soup.find('table', {'class': 'table table-condensed'})
rows = table.find_all('tr')
for i in range(1,len(rows)):
    print rows[i].find_all('td')[0].find('a').contents[0], rows[i].find_all('td')[2].contents[0]


0xde188aa82df8434bdc2d2463233a7c00f97d6e73 464271.2143422989 Ether
0xab5801a7d398351b8be11c439e05c5b3259aec9b 459999.94916 Ether
0x3bf86ed8a3153ec933786a02ac090301855e576b 450000 Ether
0xf42ac567772ceb9089de2b091d2aedcd78c4c88e 445439.44838 Ether
0xbf09d77048e270b662330e9486b38b43cd781495 436000 Ether
0x91337a300e0361bddb2e377dd4e88ccb7796663d 414541.03350043364 Ether
0xe10c540088113fa6ec00b4b2c8824f8796e96ec4 384765.55353305204 Ether
0x9d2bfc36106f038250c01801685785b16c86c60d 380000 Ether
0x2b241f037337eb4acc61849bd272ac133f7cdf4b 378000 Ether
0xa7e4fecddc20d83f36971b67e13f1abc98dfcfa6 376840.99064359054 Ether
0x0788bcbec4eaf32b1396edb2e80e578d185d81d3 332701.2169721324 Ether
0xdb7fc663ef8bbaa482665210c0249e5a728b5d44 327219.0902612999 Ether
0x2b717cd432a323a4659039848d3b87de26fc9546 323999.99538 Ether
0x7d04d2edc058a1afc761d9c99ae4fc5c85d4c8a6 314807.84 Ether
0x1706d193862da7f8c746aae63d514df93dfa5dbf 309999.99405414 Ether
0x510e222df10b146f813acc5b94cbb2a9d1a47ade 301231.57606 Ether
0x7c532db9e0c06c26fd40acc56ac55c1ee92d3c3a 300000 Ether
0xfc361105dd90f9ede566499d69e9130395f12ac8 294899.9722874059 Ether
0x2c98e3a127b723f377d3d4b9b2525c9f21469bb2 280088 Ether
0x32cdf5310568121d3b252ddfaf7f65e401221961 280001.001 Ether
0x710023e095db4075730d61037a482378c66b8fba 280000.49465258 Ether
0x00b1e9a7f2b5528cbf5cb1be50437e7d8a92ce99 279997.8872127 Ether
0x5c4aa3c0e7f6917ee6c1204d85a01f08a80e6dd0 256289.68559 Ether
0x2b6ed29a95753c3ad948348e3e7b1a251080ffb9 250000 Ether
0xca92bc1ff0c0681a1dac4baaf79cd489baf9710a 242652.98958 Ether
0xb3b8a41348750766d223e37f5a10bcb6fde5214b 230000.06416 Ether
0x3de8c14c8e7a956f5cc4d82beff749ee65fdc358 224538.1193596902 Ether
0xcafb10ee663f465f9d10588ac44ed20ed608c11e 220747.13897623648 Ether
0xd3595e11f2f148af02ed5f8a36a803c34255d957 219272.42013316 Ether
0x961b80e08e899d0a09c4598444e611352c42775b 204625.85138968727 Ether
0x2bde3b9c0129be4689e245ba689b9b0ae4ac666d 203527.13951778 Ether
0xaf10cc6c50defff901b535691550d7af208939c5 200999.998719 Ether
0xf978b025b64233555cc3c19ada7f4199c9348bf7 200000.99494452 Ether
0x87d7ac0653ccc67aa9c3469eef4352193f7dbb86 200000 Ether
0xc77fc394d2a63fa75f0eee5cee8c3d251469f121 200000 Ether
0x5ed3bbc05240e0d399eb6ddfe60f62de4d9509af 193999.806 Ether
0xd0cd159a28bc599c867e5c3f82a66e6d6db9b6b7 176733.2172445673 Ether
0x00317cd2da2044840b1ebe775c676530a7c65ba3 175526.54323805185 Ether
0x5560248bd6436da52791bf4cb358c5b441f7f52e 174966.52154792773 Ether
0xa2f6abe26fe0e1c1f2684ab002ed02a59ffbf85a 172699.98708989 Ether
0x5b5b69f4e0add2df5d2176d7dbd20b4897bc7ec4 169031.99350958393 Ether
0x267be1c1d684f78cb4f6a176c4911b741e4ffdc0 168744.6073348478 Ether
0xdb6fd484cfa46eeeb73c71edee823e4812f9e2e1 167871.13436328683 Ether
0x86f901003d0ef7a261d7a76ed5f36f217666a4b6 165000 Ether
0x6812f391fd38375316f6613ee1b46b77ad846c52 164997.848 Ether
0x90e63c3d53e0ea496845b7a03ec7548b70014a91 161221.00458164975 Ether
0x281055afc982d96fab65b3a49cac8b878184cb16 160657.1468569332 Ether
0x9c6df936c884811b9a6b49f0dd0a62919a6581d4 157900.16958 Ether
0x6f46cf5569aefa1acc1009290c8e043747172d89 157441.0671917503 Ether
0x415655297a0f299d13acce68195890200c5d4a8b 153246.08536259165 Ether