In [1]:
import pycurl
import re
import functools
import operator
from io import BytesIO

In [3]:
def traverseURLs(start):
    curlObj = pycurl.Curl()
    urls = []
    base = "http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing="
    current = start
    
    while True:
        urls.append(current)

        # Get the page data
        rawBytes = BytesIO()
        curlObj.setopt(pycurl.URL, current)
        curlObj.setopt(pycurl.WRITEDATA, rawBytes)
        curlObj.perform()

        # Isolate the 'nothing' value
        # Note: according to the re module's documentation, we don't
        #       need to cache the pattern object for the regex since
        #       the module cache's recent patterns itself
        message = rawBytes.getvalue().decode("UTF-8")
        nothing = re.search(r"and the next nothing is (\d+)", message)

        if nothing:
            current = base + nothing.group(1)
        else:
            return current

print(traverseURLs("http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=8022"))


http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=66831

In [30]:
testMsg = "this is a test 54321"
testMatch = re.search(r"\d+", testMsg)
testNothing = testMatch.group()
print(testNothing)
print(type(testNothing))
print(int(testNothing))
print(type(int(testNothing)))


54321
<class 'str'>
54321
<class 'int'>

In [40]:
urls[-2]


Out[40]:
'http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=82682'

In [ ]: