In [7]:
from pathlib import Path
from collections import defaultdict

Problem 3


In [37]:
def has_pattern(word, n=3):
    """
    Return ``True`` if the given word has at least 
    ``n` consecutive double letters; 
    return ``False`` otherwise.
    """
    # Scan through the word with a moving window of size 2*n
    N = 2*n 
    
    # Impossible if word is too short
    if len(word) < N:
        return False

    for i in range(len(word) - N + 1):
        success = True
        
        # Check window for pattern
        for j in range(i, i + N, 2):
            if word[j] != word[j + 1]:
                # Fail!
                success = False
                break
        
        # Break loop if window has pattern
        if success:
            break
            
    return success

# Test some
words = ['bingo', 'committee', 'commttee']
for word in words:
    print(word)
    for k in range(1, 4):
        result = has_pattern(word, k)
        print('{!s} consecutive doubles?'.format(k), result)


bingo
1 consecutive doubles? False
2 consecutive doubles? False
3 consecutive doubles? False
committee
1 consecutive doubles? True
2 consecutive doubles? True
3 consecutive doubles? False
commttee
1 consecutive doubles? True
2 consecutive doubles? True
3 consecutive doubles? True

In [38]:
# Check all words in word list for pattern
p = Path('../data/homework_01/words.txt')
with p.open() as src:
    for line in src:
        word = line.strip()
        if has_pattern(word):
            print(word)


bookkeeper
bookkeepers
bookkeeping
bookkeepings

Problem 4


In [31]:
def find_anagrams(wordlist):
    """
    Given a list of words, return a collection of all lists of words
    that are anagrams of each other.
    Sort the collection from the longest list of anagrams to the shortest list,
    and only include lists of size at least 2.
    """
    # Initialize a dictionary that maps a sorted letter-tuple 
    # to the list of all words in the word list with those letters.
    # Use tuples, because dictionary keys must be immutable.
    words_by_letters = defaultdict(list) # key -> empty list
    
    # Fill dictionary
    for word in wordlist:
        letters = tuple(sorted(word)) 
        words_by_letters[letters].append(word)
        
    # Only keep lists of size at least 2
    anagram_list = [words for words in words_by_letters.values() if len(words) > 1]
    
    # Sort by descendingly by set size
    anagram_list.sort(key=lambda x: -len(x))
    
    return anagram_list

In [32]:
# Generate word list from file
p = Path('../data/homework_01/words.txt')
wordlist = (line.strip() for line in p.open())

# Find anagrams
anagrams = find_anagrams(wordlist)

# Print highlights
n = 3

print('-'*20)
print('Longest {!s} anagram sets:'.format(n))
for s in anagrams[:n]:
    print(s)
    
print('-'*20)    
print('Shortest {!s} anagram sets:'.format(n))
for s in anagrams[-n:]:
    print(s)


--------------------
Longest 3 anagram sets:
['apers', 'asper', 'pares', 'parse', 'pears', 'prase', 'presa', 'rapes', 'reaps', 'spare', 'spear']
['alerts', 'alters', 'artels', 'estral', 'laster', 'ratels', 'salter', 'slater', 'staler', 'stelar', 'talers']
['least', 'setal', 'slate', 'stale', 'steal', 'stela', 'taels', 'tales', 'teals', 'tesla']
--------------------
Shortest 3 anagram sets:
['insisted', 'tidiness']
['muss', 'sums']
['pileup', 'uppile']

In [ ]:
x = 3
y = x