notebook.community

Edit and run



In [1]:

    
# We need to traverse through the URLs, finding the new 'nothing' value
# at each iteration, until we no longer find a new 'nothing'

# Get all the imports out of the way. We'll use a different
# method of handling the binary data in PycURL which doesn't
# use the operator and functools modules

import pycurl
import re
from io import BytesIO



In [2]:

    
# Traversal method
def traverseURLs(start):
    '''Traverses through the 'nothing' URLs until no new 'nothing' is found'''
    curlObj = pycurl.Curl()
    urls = []
    base = "http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing="
    current = start
    
    while True:
        # Get the page data
        rawBytes = BytesIO()
        curlObj.setopt(pycurl.URL, current)
        curlObj.setopt(pycurl.WRITEDATA, rawBytes)
        curlObj.perform()

        # Isolate the 'nothing' value
        # Note: according to the re module's documentation, we don't
        #       need to cache the pattern object for the regex since
        #       the module cache's recent patterns itself
        message = rawBytes.getvalue().decode("UTF-8")
        nothing = re.search(r"and the next nothing is (\d+)", message)

        if nothing:
            current = base + nothing.group(1)
        else:
            return current



In [3]:

    
# Start the traversal
initURL = "http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=12345"
endURL = traverseURLs(initURL)
print(endURL)









    



http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=16044



In [4]:

    
# The first traversal tells us to divide the old nothing
# by two and repeat
secondInitURL = "http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=8022"
secondEndURL = traverseURLs(secondInitURL)
print(secondEndURL)









    



http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=66831



In [ ]: