New York Times Puzzle Edition 2018

Building Blocks

In this section, each puzzle has a list of nine-letter words, each with three letters provided in either the first three, middle three, or last three positions. We also get a list of three letter blocks that can be used exactly once and must be used in that order.

Word source

The first iteration of this, I used the dictionary provided in most Linux systems at '/usr/share/dict/american-english'. Using this as the source of words, the solver was not able to find a match for the first puzzle for the word starting with 'str'. Looking at the available blocks, I guessed the word was 'strongman', and indeed this is missing from the dictionary. (A simple grep search confirms this.)

So then I turned to a larger corpus provided by nltk.corpus.words. But this was also missing the word 'strongman'. Finally I used the corpus nltk.corpus.wordnet. This list does not include many common words, but it does include more uncommon words, and 'strongman' is included.


In [105]:
import functools
import nltk
import pprint
import re

nltk.download('words')
nltk.download('wordnet')


[nltk_data] Downloading package words to /home/dennis/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package wordnet to /home/dennis/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Out[105]:
True

In [121]:
class BuildingBlocksCandidate():
    """Data structure for holding a single candidate solution to one of the entries.
    
    """
    def __init__(self, word_pattern, word, blocks):
        """
        Args:
            word_pattern: A string with some letters provided and the blanks represented by '.'
            word: A word that matches `word_pattern` and the missing letters from `blocks`.
            blocks: A list of strings used to complete `word` based on `word_pattern` and the
                complete list of blocks.
        """
        self.word_pattern = word_pattern
        self.word = word
        self.blocks = blocks
    
    def __repr__(self):
        return f'BuildingBlocksCandidate(word_patter="{self.word_pattern}", word="{self.word}", blocks={self.blocks})'


class BuildingBlocksEntry():
    """Data structure for holding a puzzle entry.  It maps a `word_pattern` to a list of `BuildingblocksCandidate` objects.
    
    A `BuildingBlocksEntry` can have zero or many `BuildingBlocksCandidate` objects.
    """
    def __init__(self, word_pattern, candidates=None):
        """
        Args:
            word_pattern: A string with some letters provided and the blanks represented by '.'
            candidates: If None, construct this object with an initial empty list of candidates.
                Otherwise, provide the list of `BuildingBlocksCandidate` objects.
        """
        self.word_pattern = word_pattern
        if candidates is None:
            self.candidates = []
        else:
            self.candidates = candidates
    
    def __repr__(self):
        return f'BuildingBlocksEntry(word_pattern="{self.word_pattern}", candidates={self.candidates})'
    
    def add_candidate(self, candidate):
        """Append a `BuildingBlocksCandidate` object to the list of candidates.
        
        Args:
            candidate: `BuildingBlocksCandidate` object to add.
        """
        self.candidates.append(candidate)
    
    def get_candidates(self):
        """Get the list of candidates.
        
        Returns:
            A list of `BuildingBlocksCandidate` objects.
        """
        return self.candidates


class BuildingBlocksPuzzle():
    """Represents an entire puzzle.
    
    """
    def __init__(self, word_patterns, blocks, solver=None, word_provider=None):
        """
        Args:
            word_patterns: A list of strings, each with some letters provided and the blanks represented by '.'
                Each pattern must be the same length, have the same block length, and have the same position for
                the provided words.
            blocks: A list of strings to use for constructing the words given the `word_patterns`
            solver: An object of class derived from `ConstraintSolverBase` used for solving the puzzle.
                This allows easy experimentation of different solvers.
                If None, the default solver `ConstraintSolverSimple` is used.
            word_provider: An object of class derived from `WordProviderBase` used for providing a list of
                words in the vocabulary.
                If None, the default `WordProviderNltkWordnet` is used.
        """
        self.word_patterns = word_patterns
        self.blocks = blocks
        if solver is None:
            self.solver = ConstraintSolverSimple()
        if word_provider is None:
            self.word_provider = WordProviderNltkWordnet()
        self.word_length = BuildingBlocksPuzzle.get_item_length(word_patterns)
        self.block_length = BuildingBlocksPuzzle.get_item_length(blocks)
    
    def solve(self, verbose=False):
        candidate_dct, used_blocks, unused_blocks = self.solver.solve(
            self.word_patterns,
            self.blocks,
            self.word_provider,
            verbose=verbose,
        )
        if verbose:
            BuildingBlocksPuzzle.print_solution(candidate_dct, unused_blocks)
        return candidate_dct, used_blocks, unused_blocks
        
    @staticmethod
    def get_item_length(items):
        """Helper function to determine the length of the words or blocks.
        
        If the lengths of the items are not uniform, a `ValueError` will be raised."""
        lengths = [len(b) for b in items]
        if min(lengths) != max(lengths):
            raise ValueError('The length of the items must be the same.')
        return min(lengths)
    
    @staticmethod
    def get_re_pattern(word_pattern, blocks):
        """Given the `word_pattern` of the form 'abc......' or '...abc...' or '......abc'
        and the available blocks, return the regular expression pattern to compile.
        """
        re_pattern = '^'  # start of word
        block_length = BuildingBlocksPuzzle.get_item_length(blocks)
        # Fill the blanks with regexp match patterns.
        for idx in range(0, len(word_pattern), block_length):
            chunk = word_pattern[idx:idx+block_length]
            if chunk == '.' * block_length:
                # Matching group with all the strings from `blocks` or'ed.
                pattern = '({})'.format('|'.join(blocks))  # e.g., (abc|def|ghi)
                # pattern = f'(\w{{{block_length}}})'  # e.g., '(\w{3})'  # Useful pattern for debugging.
                re_pattern += pattern
            elif '.' in chunk:
                raise ValueError('Chunks should be all letters or all ".", instead got {}'.format(chunk))
            else:
                re_pattern += chunk
        re_pattern += '$'  # end of word
        return re_pattern
    
    @staticmethod
    def print_solution(candidate_dct, unused_blocks):
        for word_pattern, entry in candidate_dct.items():
            candidates = entry.get_candidates()
            candidate = candidates[0]
            print(candidate.word_pattern, candidate.word, candidate.blocks)
        print('Unused blocks:', unused_blocks)


class WordProviderBase():
    def words(self):
        raise NotImplementedError('Implement this method!')
    
class WordProviderNltkWords(WordProviderBase):
    @functools.lru_cache(maxsize=128)
    def words(self):
        return nltk.corpus.words.words()

class WordProviderNltkWordnet(WordProviderBase):
    @functools.lru_cache(maxsize=128)
    def words(self):
        return list(nltk.corpus.wordnet.words())

class WordProviderFile(WordProviderBase):
    @functools.lru_cache(maxsize=128)
    def words(self):
        with open('/usr/share/dict/american-english', 'r') as f:
            return f.read().splitlines()

class ConstraintSolverBase():
    def solve(self, word_patterns, blocks, word_provider, verbose=False):
        """
        Returns a tuple of three items:
            - A dictionary mapping the word pattern to a `BuildingBlockEntry` with a single candidate
            - A set of used blocks
            - A set of unused blocks

        """
        candidate_dct = {}
        for word_pattern in word_patterns:
            candidate_dct[word_pattern] = self.find_candidates(word_pattern, blocks, word_provider)
        # if verbose:
        #     print('Candidate words:')
        #     pprint.pprint(candidate_dct)
        used_blocks = set([])
        unused_blocks = set(blocks)
        for word_pattern, entry in candidate_dct.items():
            for candidate in entry.get_candidates():
                blks = candidate.blocks
                used_blocks.update(set(blks))
                unused_blocks.difference_update(set(blks))
        return candidate_dct, used_blocks, unused_blocks

    @staticmethod
    def find_candidates(word_pattern, blocks, word_provider):
        """Return a `BuildingBlocksEntry` that fits the `word_pattern` and the `blocks`.
        
        This does not apply any constraint logic.
        """
        word_length = len(word_pattern)
        candidates = {}
        pattern = BuildingBlocksPuzzle.get_re_pattern(word_pattern, blocks)
        p = re.compile(pattern)
        words = word_provider.words()
        entry = BuildingBlocksEntry(word_pattern)
        for word in words:
            # word = word.strip().lower()
            if len(word) != word_length:
                continue
            matches = p.finditer(word)
            if matches:
                for match in matches:
                    blocks_used = tuple(match.groups())
                    entry.add_candidate(BuildingBlocksCandidate(word_pattern, word, blocks_used))
        return entry


class ConstraintSolverSimple(ConstraintSolverBase):
    """Simple strategy that makes some broad assumptions:
        1) Each word pattern is mapped to exactly one candidate.
        2) Each block is used at most one time.
    
    """
    def solve(self, word_patterns, blocks, word_provider, verbose=False):
        candidate_dct, used_blocks, unused_blocks = super().solve(word_patterns, blocks, word_provider, verbose=verbose)
        # ensure each word pattern is mapped to exactly one candidate
        error_messages = []
        for word_pattern, entry in candidate_dct.items():
            if len(entry.get_candidates()) != 1:
                error_messages.append(f'Expected exactly one candidate for {word_pattern}, found {entry.get_candidates()}')
        if len(error_messages) > 0:
            print(error_messages)
            raise ValueError('Constraints not met.')
        return candidate_dct, used_blocks, unused_blocks


class ConstraintSolverSearch(ConstraintSolverBase):
    """A solver that can handle situations where there are more than one candidate for a word pattern.
    
    The strategy is to treat each candidate within an entry as a node, then approach the problem as 
    a graph search with constraint propagation.  The algorithm goes like this:
        - Order the entries from least number of candidates to most.
        - While we have not satisfied all the constraints
            - Visit the next available entry
            - Push the next available candidate on the stack, considering the blocks already used.
            - Add the blocks for the candidate to the list of used blocks.
            - If no candidates are available, pop the candidate from the stack.
    # A more complex solver would first settle all the clues with a single candidate,
        # then search the remaining solution spaces.
        # This can be implemented as a search where we order the nodes for visiting from
        # the least number of candidates to the most.  After visting each node and "locking"
        # it, we update the others to remove any candidates using any of the blocks just locked.

    """
    def solve(self, word_patterns, blocks, word_provider, verbose=False):
        candidate_dct, used_blocks, unused_blocks = super().solve(word_patterns, blocks, word_provider, verbose=verbose)
        # TODO: pseudocode for now
        blocks_used = set([])
        for word_pattern, entry in sorted(candidate_dct.items(), key=lambda e: (e.n_candidates, entry.word)):
            candidates = [candidate for candidate in entry.get_candidates() if blocks_used.is_disjoint(set(candidate.blocks))]
            
        # 
        return candidate_dct, used_blocks, unused_blocks


p1 = BuildingBlocksPuzzle(
    word_patterns=[
        'pol......', 'imb......', 'por......', 'str......', 
        'veg......', 'con......', 'app......', 'wee......',
    ],
    blocks=[
        'ble', 'cal', 'cup', 'emi',
        'eta', 'ght', 'hig', 'hti',
        'ine', 'ion', 'kni', 'lia',
        'lio', 'man', 'mes', 'nce',
        'ong', 'rog', 'tag',
    ],
)
candidate_dct, used_blocks, unused_blocks = p1.solve(verbose=True)


pol...... polemical ('emi', 'cal')
imb...... imbroglio ('rog', 'lio')
por...... porcupine ('cup', 'ine')
str...... strongman ('ong', 'man')
veg...... vegetable ('eta', 'ble')
con...... contagion ('tag', 'ion')
app...... appliance ('lia', 'nce')
wee...... weeknight ('kni', 'ght')
Unused blocks: {'mes', 'hti', 'hig'}

In [ ]:


In [100]:
p2 = BuildingBlocksPuzzle(
    word_patterns=[
        '...ilo...', '...bfo...', '...irw...', '...mis...',
        '...lic...', '...erb...', '...per...', '...tai...',
    ],
    blocks=[
        'aff', 'cer', 'cha', 'com',
        'dum', 'ean', 'ell', 'ium',
        'nom', 'nty', 'one', 'quy',
        'sar', 'sno', 'sol', 'sta',
        'ted', 'und', 'ytt',
    ],
)
candidate_dct, used_blocks, unused_blocks = p2.solve(verbose=True)


...ilo... soliloquy ('sol', 'quy')
...bfo... dumbfound ('dum', 'und')
...irw... stairwell ('sta', 'ell')
...mis... commissar ('com', 'sar')
...lic... afflicted ('aff', 'ted')
...erb... ytterbium ('ytt', 'ium')
...per... chaperone ('cha', 'one')
...tai... certainty ('cer', 'nty')
Unused blocks: {'nom', 'ean', 'sno'}

In [101]:
p3 = BuildingBlocksPuzzle(
    word_patterns=[
        '......lly', '......ang', '......day', '......ide',
        '......ish', '......chy', '......ous', '......ast',
    ],
    blocks=[
        'abl', 'aci', 'akf', 'bli',
        'boo', 'bre', 'efu', 'ego',
        'est', 'gar', 'hop', 'ing',
        'mer', 'nds', 'nic', 'oli',
        'ter', 'viv', 'yes',
    ],
)
candidate_dct, used_blocks, unused_blocks = p3.solve(verbose=True)


......lly hopefully ('hop', 'efu')
......ang boomerang ('boo', 'mer')
......day yesterday ('yes', 'ter')
......ide blindside ('bli', 'nds')
......ish establish ('est', 'abl')
......chy oligarchy ('oli', 'gar')
......ous vivacious ('viv', 'aci')
......ast breakfast ('bre', 'akf')
Unused blocks: {'ing', 'ego', 'nic'}

In [ ]:

Profiling performance


In [122]:
%time candidate_dct, used_blocks, unused_blocks = p1.solve()


CPU times: user 144 ms, sys: 0 ns, total: 144 ms
Wall time: 143 ms

In [123]:
%prun p1.solve()


 
         1295497 function calls in 0.231 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        8    0.149    0.019    0.231    0.029 <ipython-input-121-04a03061f6bc>:182(find_candidates)
  1178625    0.053    0.000    0.053    0.000 {built-in method builtins.len}
   116680    0.029    0.000    0.029    0.000 {method 'finditer' of 're.Pattern' objects}
        8    0.000    0.000    0.000    0.000 <ipython-input-121-04a03061f6bc>:107(get_re_pattern)
        1    0.000    0.000    0.231    0.231 <ipython-input-121-04a03061f6bc>:159(solve)
        8    0.000    0.000    0.000    0.000 <ipython-input-121-04a03061f6bc>:102(<listcomp>)
        8    0.000    0.000    0.000    0.000 <ipython-input-121-04a03061f6bc>:26(__init__)
        8    0.000    0.000    0.000    0.000 <ipython-input-121-04a03061f6bc>:97(get_item_length)
        8    0.000    0.000    0.000    0.000 {built-in method builtins.isinstance}
        8    0.000    0.000    0.000    0.000 re.py:271(_compile)
        1    0.000    0.000    0.231    0.231 {built-in method builtins.exec}
       16    0.000    0.000    0.000    0.000 {built-in method builtins.min}
        8    0.000    0.000    0.000    0.000 <ipython-input-121-04a03061f6bc>:5(__init__)
       16    0.000    0.000    0.000    0.000 {method 'format' of 'str' objects}
       16    0.000    0.000    0.000    0.000 {method 'join' of 'str' objects}
        8    0.000    0.000    0.000    0.000 <ipython-input-121-04a03061f6bc>:42(add_candidate)
        8    0.000    0.000    0.000    0.000 {method 'groups' of 're.Match' objects}
        1    0.000    0.000    0.231    0.231 <ipython-input-121-04a03061f6bc>:212(solve)
        8    0.000    0.000    0.000    0.000 {built-in method builtins.max}
        8    0.000    0.000    0.000    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.231    0.231 <string>:1(<module>)
        8    0.000    0.000    0.000    0.000 re.py:232(compile)
        8    0.000    0.000    0.000    0.000 {method 'difference_update' of 'set' objects}
        8    0.000    0.000    0.000    0.000 {method 'update' of 'set' objects}
        2    0.000    0.000    0.000    0.000 {method 'items' of 'dict' objects}
        1    0.000    0.000    0.231    0.231 <ipython-input-121-04a03061f6bc>:86(solve)
       16    0.000    0.000    0.000    0.000 <ipython-input-121-04a03061f6bc>:50(get_candidates)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}

In [126]:
%load_ext line_profiler

In [129]:
%lprun -f BuildingBlocksPuzzle.solve -f ConstraintSolverBase.find_candidates p1.solve()


Timer unit: 1e-06 s

Total time: 2.89063 s
File: <ipython-input-121-04a03061f6bc>
Function: solve at line 86

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    86                                               def solve(self, verbose=False):
    87         1          8.0      8.0      0.0          candidate_dct, used_blocks, unused_blocks = self.solver.solve(
    88         1          5.0      5.0      0.0              self.word_patterns,
    89         1          2.0      2.0      0.0              self.blocks,
    90         1          3.0      3.0      0.0              self.word_provider,
    91         1    2890610.0 2890610.0    100.0              verbose=verbose,
    92                                                   )
    93         1          0.0      0.0      0.0          if verbose:
    94                                                       BuildingBlocksPuzzle.print_solution(candidate_dct, unused_blocks)
    95         1          0.0      0.0      0.0          return candidate_dct, used_blocks, unused_blocks

Total time: 1.49245 s
File: <ipython-input-121-04a03061f6bc>
Function: find_candidates at line 182

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   182                                               @staticmethod
   183                                               def find_candidates(word_pattern, blocks, word_provider):
   184                                                   """Return a `BuildingBlocksEntry` that fits the `word_pattern` and the `blocks`.
   185                                                   
   186                                                   This does not apply any constraint logic.
   187                                                   """
   188         8          9.0      1.1      0.0          word_length = len(word_pattern)
   189         8          5.0      0.6      0.0          candidates = {}
   190         8        323.0     40.4      0.0          pattern = BuildingBlocksPuzzle.get_re_pattern(word_pattern, blocks)
   191         8         88.0     11.0      0.0          p = re.compile(pattern)
   192         8         24.0      3.0      0.0          words = word_provider.words()
   193         8         39.0      4.9      0.0          entry = BuildingBlocksEntry(word_pattern)
   194   1178456     432455.0      0.4     29.0          for word in words:
   195                                                       # word = word.strip().lower()
   196   1178448     496494.0      0.4     33.3              if len(word) != word_length:
   197   1061768     372488.0      0.4     25.0                  continue
   198    116680      79291.0      0.7      5.3              matches = p.finditer(word)
   199    116680      44949.0      0.4      3.0              if matches:
   200    116688      66173.0      0.6      4.4                  for match in matches:
   201         8         18.0      2.2      0.0                      blocks_used = tuple(match.groups())
   202         8         88.0     11.0      0.0                      entry.add_candidate(BuildingBlocksCandidate(word_pattern, word, blocks_used))
   203         8          3.0      0.4      0.0          return entry

In [124]:
%load_ext memory_profiler

In [130]:
%memit p1.solve()


peak memory: 216.02 MiB, increment: 0.00 MiB

In [ ]: