In [3]:
# Use PyCurl to grab the Web page source code.
#
# PyCurl will give packets of bytes until all data has been transfered.
# Use a lambda function for the write function that will encode
# each packet of binary data into a string and append it to a list

import pycurl

sourceList = []
curlObj = pycurl.Curl()
curlObj.setopt(pycurl.URL, "http://www.pythonchallenge.com/pc/def/ocr.html")
curlObj.setopt(pycurl.WRITEFUNCTION, lambda data: sourceList.append(data.decode(encoding="UTF-8")))
curlObj.perform()

In [4]:
# Create a single string out of the list of source fragments.
# Use functions from the functools and operator modules since they already exist
#
# Alternatively, we could loop through the sourceList and build the string ourselves
import functools, operator

sourceCode = functools.reduce(operator.concat, sourceList)

In [8]:
# Now we have the entire page source as a single string, we need
# to isolate the search block. Since the block is an HTML
# comment, a regular expression search will find it
import re

# Do a non-greedy search for all HTML comments.
# The DOTALL flag lets the search continue past newlines
matches = re.findall("<!--(.*?)-->", sourceCode, flags=re.DOTALL)

# The second hit happens to be the block we want. Lets
# just use that a priori for convenience. Also, lets remove those newlines, just cause
searchSpace = matches[1].translate({ord("\n"):None})

In [10]:
# To find the solution, we'll use a recursive function that
# acts very similar to the Sieve of Eratosthenes.
#
# We will scan the search space linearly, building the solution.
# For each character, if it's in the solution, remove it from the solution
# and remove all occurrences of it from the search space. Otherwise, add
# it to the solution and continue.
#
# As we move through, the search space dwindles and resultant string
# becomes all of the unique characters, in the order they appeared in the
# search space
def removeDuplicates(space, soln=""):
    ''' Recursively remove all duplicate characters from the given string '''
    if(len(space) == 0):
        return soln
    
    if(space[0] in soln):
        delTable = {ord(space[0]):None}
        return removeDuplicates(space.translate(delTable), soln.translate(delTable))
    
    return removeDuplicates(space[1:], soln + space[0])

In [11]:
removeDuplicates(searchSpace)


Out[11]:
'equality'

In [ ]: