notebook.community

Edit and run



In [1]:

    
import pycurl
import re
import functools
import operator
from io import BytesIO



In [3]:

    
def traverseURLs(start):
    curlObj = pycurl.Curl()
    urls = []
    base = "http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing="
    current = start
    
    while True:
        urls.append(current)

        # Get the page data
        rawBytes = BytesIO()
        curlObj.setopt(pycurl.URL, current)
        curlObj.setopt(pycurl.WRITEDATA, rawBytes)
        curlObj.perform()

        # Isolate the 'nothing' value
        # Note: according to the re module's documentation, we don't
        #       need to cache the pattern object for the regex since
        #       the module cache's recent patterns itself
        message = rawBytes.getvalue().decode("UTF-8")
        nothing = re.search(r"and the next nothing is (\d+)", message)

        if nothing:
            current = base + nothing.group(1)
        else:
            return current

print(traverseURLs("http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=8022"))









    



http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=66831



In [30]:

    
testMsg = "this is a test 54321"
testMatch = re.search(r"\d+", testMsg)
testNothing = testMatch.group()
print(testNothing)
print(type(testNothing))
print(int(testNothing))
print(type(int(testNothing)))









    



54321
<class 'str'>
54321
<class 'int'>



In [40]:

    
urls[-2]









    Out[40]:





'http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=82682'



In [ ]: