In [1]:
# Get at the source code in the same manner as in the last stage
import pycurl
sourceList = []
curlObj = pycurl.Curl()
curlObj.setopt(pycurl.URL, "http://www.pythonchallenge.com/pc/def/equality.html")
curlObj.setopt(pycurl.WRITEFUNCTION, lambda x: sourceList.append(x.decode(encoding="UTF-8")))
curlObj.perform()
In [2]:
# Process it in the same way too
import functools, operator
sourceCode = functools.reduce(operator.concat, sourceList)
In [3]:
# And isolate the comment block in the same way too.
# This time match 0 is the magic match
import re
matches = re.findall("<!--(.*?)-->", sourceCode, re.DOTALL)
searchSpace = matches[0].translate({ord('\n'):None})
In [4]:
# Search for a lowercase letter with exactly 3 uppercase
# letters on both sides. The [^A-Z] on each end are needed
# in order to prevent a match on more than 3 uppercase letters
# ex., AAAAbCCC would match without them
soln = re.findall("[^A-Z][A-Z]{3}([a-z])[A-Z]{3}[^A-Z]", searchSpace)
print(''.join(soln))
In [ ]: