In [31]:
from collections import defaultdict, OrderedDict
from operator import itemgetter
def count_words(s, n):
"""
Return the n most frequently occuring words in s.
:param:
- `s`: string of words separated by whitespace
- `n`: number of tuples to return
:return: list of (<word>, <count>) tuples in descending order
"""
words = s.split()
counts = defaultdict(lambda: 0)
# Count the number of occurences of each word in s
for word in words:
counts[word] += 1
# Sort the occurences in descending order (alphabetically in case of ties)
# reverse the keys so that they'l be alphabetical when reversed again
sorted_words = OrderedDict(reversed(sorted(counts.items())))
# sort in descending order by value
ordered_words = reversed(sorted(sorted_words.items(), key=itemgetter(1)))
top_n = list(ordered_words)[:n]
return top_n
def test_run():
"""Test count_words() with some inputs."""
print count_words("cat bat mat cat bat cat", 3)
print count_words("betty bought a bit of butter but the butter was bitter", 3)
test_run()
In [32]:
test_string = "cat bat mat cat bat cat"
n = 3
expected = [("cat", 3), ("bat", 2), ("mat", 1)]
actual = count_words(test_string, n)
assert actual == expected, actual
In [36]:
s = "betty bought a bit of butter but the butter was bitter"
n = 3
expected = [('butter', 2), ('a', 1), ('betty', 1)]
actual = count_words(s, n)
assert expected == actual, actual
In [38]:
import numpy
a = numpy.array([[1,0,3,0],
[0,6,0,8],
[0,10,11,0],
[13,0,0,16]])
b = numpy.array([[2,5,4,1]])
In [41]:
a.dot(b.transpose())
Out[41]: