In [7]:
from pathlib import Path
from collections import defaultdict
In [37]:
def has_pattern(word, n=3):
"""
Return ``True`` if the given word has at least
``n` consecutive double letters;
return ``False`` otherwise.
"""
# Scan through the word with a moving window of size 2*n
N = 2*n
# Impossible if word is too short
if len(word) < N:
return False
for i in range(len(word) - N + 1):
success = True
# Check window for pattern
for j in range(i, i + N, 2):
if word[j] != word[j + 1]:
# Fail!
success = False
break
# Break loop if window has pattern
if success:
break
return success
# Test some
words = ['bingo', 'committee', 'commttee']
for word in words:
print(word)
for k in range(1, 4):
result = has_pattern(word, k)
print('{!s} consecutive doubles?'.format(k), result)
In [38]:
# Check all words in word list for pattern
p = Path('../data/homework_01/words.txt')
with p.open() as src:
for line in src:
word = line.strip()
if has_pattern(word):
print(word)
In [31]:
def find_anagrams(wordlist):
"""
Given a list of words, return a collection of all lists of words
that are anagrams of each other.
Sort the collection from the longest list of anagrams to the shortest list,
and only include lists of size at least 2.
"""
# Initialize a dictionary that maps a sorted letter-tuple
# to the list of all words in the word list with those letters.
# Use tuples, because dictionary keys must be immutable.
words_by_letters = defaultdict(list) # key -> empty list
# Fill dictionary
for word in wordlist:
letters = tuple(sorted(word))
words_by_letters[letters].append(word)
# Only keep lists of size at least 2
anagram_list = [words for words in words_by_letters.values() if len(words) > 1]
# Sort by descendingly by set size
anagram_list.sort(key=lambda x: -len(x))
return anagram_list
In [32]:
# Generate word list from file
p = Path('../data/homework_01/words.txt')
wordlist = (line.strip() for line in p.open())
# Find anagrams
anagrams = find_anagrams(wordlist)
# Print highlights
n = 3
print('-'*20)
print('Longest {!s} anagram sets:'.format(n))
for s in anagrams[:n]:
print(s)
print('-'*20)
print('Shortest {!s} anagram sets:'.format(n))
for s in anagrams[-n:]:
print(s)
In [ ]:
x = 3
y = x