Count words in python


In [31]:
from collections import defaultdict, OrderedDict
from operator import itemgetter

def count_words(s, n):
    """
    Return the n most frequently occuring words in s.

    :param:
     - `s`: string of words separated by whitespace
     - `n`: number of tuples to return
    :return: list of (<word>, <count>) tuples in descending order
    """
    words = s.split()
    counts = defaultdict(lambda: 0)
    # Count the number of occurences of each word in s
    for word in words:
        counts[word] += 1

    # Sort the occurences in descending order (alphabetically in case of ties)
    # reverse the keys so that they'l be alphabetical when reversed again    
    sorted_words = OrderedDict(reversed(sorted(counts.items())))

    # sort in descending order by value
    ordered_words = reversed(sorted(sorted_words.items(), key=itemgetter(1)))
    top_n = list(ordered_words)[:n]
    return top_n


def test_run():
    """Test count_words() with some inputs."""
    print count_words("cat bat mat cat bat cat", 3)
    print count_words("betty bought a bit of butter but the butter was bitter", 3)


test_run()


[('cat', 3), ('bat', 2), ('mat', 1)]
[('butter', 2), ('a', 1), ('betty', 1)]

In [32]:
test_string = "cat bat mat cat bat cat"
n = 3
expected = [("cat", 3), ("bat", 2), ("mat", 1)]
actual = count_words(test_string, n)
assert actual == expected, actual

In [36]:
s = "betty bought a bit of butter but the butter was bitter"
n = 3

expected = [('butter', 2), ('a', 1), ('betty', 1)]
actual = count_words(s, n)
assert expected == actual, actual

matrix multiplication


In [38]:
import numpy

a = numpy.array([[1,0,3,0],
                 [0,6,0,8],
                 [0,10,11,0],
                 [13,0,0,16]])
b = numpy.array([[2,5,4,1]])

In [41]:
a.dot(b.transpose())


Out[41]:
array([[14],
       [38],
       [94],
       [42]])