In [1]:
import pycurl
import re
import functools
import operator
from io import BytesIO
In [3]:
def traverseURLs(start):
curlObj = pycurl.Curl()
urls = []
base = "http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing="
current = start
while True:
urls.append(current)
# Get the page data
rawBytes = BytesIO()
curlObj.setopt(pycurl.URL, current)
curlObj.setopt(pycurl.WRITEDATA, rawBytes)
curlObj.perform()
# Isolate the 'nothing' value
# Note: according to the re module's documentation, we don't
# need to cache the pattern object for the regex since
# the module cache's recent patterns itself
message = rawBytes.getvalue().decode("UTF-8")
nothing = re.search(r"and the next nothing is (\d+)", message)
if nothing:
current = base + nothing.group(1)
else:
return current
print(traverseURLs("http://www.pythonchallenge.com/pc/def/linkedlist.php?nothing=8022"))
In [30]:
testMsg = "this is a test 54321"
testMatch = re.search(r"\d+", testMsg)
testNothing = testMatch.group()
print(testNothing)
print(type(testNothing))
print(int(testNothing))
print(type(int(testNothing)))
In [40]:
urls[-2]
Out[40]:
In [ ]: