In [1]:
# We need to traverse through the URLs, finding the new 'nothing' value
# at each iteration, until we no longer find a new 'nothing'
# Get all the imports out of the way. We'll use a different
# method of handling the binary data in PycURL which doesn't
# use the operator and functools modules
import pycurl
import re
from io import BytesIO
In [2]:
# Traversal method
def traverseURLs(start):
'''Traverses through the 'nothing' URLs until no new 'nothing' is found'''
curlObj = pycurl.Curl()
urls = []
base = "http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing="
current = start
while True:
# Get the page data
rawBytes = BytesIO()
curlObj.setopt(pycurl.URL, current)
curlObj.setopt(pycurl.WRITEDATA, rawBytes)
curlObj.perform()
# Isolate the 'nothing' value
# Note: according to the re module's documentation, we don't
# need to cache the pattern object for the regex since
# the module cache's recent patterns itself
message = rawBytes.getvalue().decode("UTF-8")
nothing = re.search(r"and the next nothing is (\d+)", message)
if nothing:
current = base + nothing.group(1)
else:
return current
In [3]:
# Start the traversal
initURL = "http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=12345"
endURL = traverseURLs(initURL)
print(endURL)
In [4]:
# The first traversal tells us to divide the old nothing
# by two and repeat
secondInitURL = "http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=8022"
secondEndURL = traverseURLs(secondInitURL)
print(secondEndURL)
In [ ]: