Exercise 1: Word counter


In [1]:
#!/usr/bin/python3
# -*- coding: utf-8 -*-

def get_words(line):
    '''
    This function takes a text and returns
    a list of lowercase words. It removes all digits
    and strips punctuation characters from the words.
    
    >>> get_words('Here are severall1000 words!\\n Blub')
    ['here', 'are', 'severall', 'words', 'blub']
    
    >>> get_words('"Where are they? 10 \t 1000!?"')
    ['where', 'are', 'they']
    
    '''
    import string
    line = line.lower()
    line = line.translate(str.maketrans('','',
                                            string.digits))
    words = line.split()
    words = [w.strip(string.punctuation) for w in words]
    words = [w for w in words if w != '']
    
    return words
    

def word_counter(f):
    '''
    This function takes a path to a text file. It returns 
    a Counter of all words in the given file. 
    '''
    import fileinput    
    from collections import Counter
    
    word_counts = Counter('')
 
    for line in fileinput.input(f):
        word_counts += Counter(get_words(line))
    return word_counts    
    

def main():
    wordc = word_counter(None)
    for w in sorted(wordc, key=wordc.get, reverse=True):
          print(w, wordc[w])
    

if __name__ == '__main__':
    import doctest
    doctest.testmod()   
    #main()

In [2]:
counts = dict(word_counter('sherlock.txt'))

d = {c: counts[c] for c in counts if c[0] == 'o'}
for w in sorted(d, key=d.get, reverse=True):
      print(w, d[w])


of 244
on 34
one 32
out 31
or 21
other 17
only 16
over 16
our 12
old 11
own 11
off 8
oh 7
once 5
obvious 5
occurred 4
observation 3
ourselves 2
o'clock 2
order 2
opinion 2
observed 2
obviously 1
outre 1
ordered 1
official 1
older 1
obligations 1
office 1
overcoat 1
open 1
onto 1
outstanding 1
outcry 1
over-tender 1
occipital 1
objections 1
opportunity 1
overpowering 1
opening 1

Exercise 2: Edit distance of two strings


In [3]:
def normalize_string(text):
    import string
    text = text.lower()
    text = text.translate(str.maketrans('','',
                                       string.punctuation+
                                       string.whitespace))
    return text

def edit_distance(str1, str2):
    '''
    This function implements the Wagner-Fischer algorithm
    to compute the edit distance between two strings
    
    >>> edit_distance('bear', 'bar')
    1
    
    >>> edit_distance('"computer"', 'comm"uter')
    1
    
    >>> edit_distance('ea t', 'feat')
    1
    
    >>> edit_distance('bee!', 'beer')
    1
    
    >>> edit_distance('science', 'seance')
    3
    
    >>> edit_distance('laboratory', 'lobotomy')
    4
    '''
        
    str1 = normalize_string(str1)
    str2 = normalize_string(str2)
    
    lenx = len(str1)+1
    leny = len(str2)+1
    
    d = [[0 for i in range(leny)] for i in range(lenx)]
    
    for i in range(leny):
        d[0][i] = i
        
    for j in range(lenx):
        d[j][0] = j
        
    for j in range(1,leny):
        for i in range(1,lenx):
            if str1[i-1] == str2[j-1]:
                d[i][j] = d[i-1][j-1]
            else:
                d[i][j] = min(d[i-1][j] +1,
                              d[i][j-1] +1,
                              d[i-1][j-1] +1)
    
    return d[lenx-1][leny-1]

if __name__ == '__main__':
    import doctest
    doctest.testmod()

In [4]:
str1 = 'Source code is hard to write, nevertheless \
        Python makes it easy to read!'
str2 = 'Sauce code was heard to writhe, nevertheleast \
        Python makes it supercalifragilisticexpialidocious to reap!'
edit_distance(str1, str2)


Out[4]:
40