Exercise 1)


In [1]:
help(str)


Help on class str in module __builtin__:

class str(basestring)
 |  str(object='') -> string
 |  
 |  Return a nice string representation of the object.
 |  If the argument is a string, the return value is the same object.
 |  
 |  Method resolution order:
 |      str
 |      basestring
 |      object
 |  
 |  Methods defined here:
 |  
 |  __add__(...)
 |      x.__add__(y) <==> x+y
 |  
 |  __contains__(...)
 |      x.__contains__(y) <==> y in x
 |  
 |  __eq__(...)
 |      x.__eq__(y) <==> x==y
 |  
 |  __format__(...)
 |      S.__format__(format_spec) -> string
 |      
 |      Return a formatted version of S as described by format_spec.
 |  
 |  __ge__(...)
 |      x.__ge__(y) <==> x>=y
 |  
 |  __getattribute__(...)
 |      x.__getattribute__('name') <==> x.name
 |  
 |  __getitem__(...)
 |      x.__getitem__(y) <==> x[y]
 |  
 |  __getnewargs__(...)
 |  
 |  __getslice__(...)
 |      x.__getslice__(i, j) <==> x[i:j]
 |      
 |      Use of negative indices is not supported.
 |  
 |  __gt__(...)
 |      x.__gt__(y) <==> x>y
 |  
 |  __hash__(...)
 |      x.__hash__() <==> hash(x)
 |  
 |  __le__(...)
 |      x.__le__(y) <==> x<=y
 |  
 |  __len__(...)
 |      x.__len__() <==> len(x)
 |  
 |  __lt__(...)
 |      x.__lt__(y) <==> x<y
 |  
 |  __mod__(...)
 |      x.__mod__(y) <==> x%y
 |  
 |  __mul__(...)
 |      x.__mul__(n) <==> x*n
 |  
 |  __ne__(...)
 |      x.__ne__(y) <==> x!=y
 |  
 |  __repr__(...)
 |      x.__repr__() <==> repr(x)
 |  
 |  __rmod__(...)
 |      x.__rmod__(y) <==> y%x
 |  
 |  __rmul__(...)
 |      x.__rmul__(n) <==> n*x
 |  
 |  __sizeof__(...)
 |      S.__sizeof__() -> size of S in memory, in bytes
 |  
 |  __str__(...)
 |      x.__str__() <==> str(x)
 |  
 |  capitalize(...)
 |      S.capitalize() -> string
 |      
 |      Return a copy of the string S with only its first character
 |      capitalized.
 |  
 |  center(...)
 |      S.center(width[, fillchar]) -> string
 |      
 |      Return S centered in a string of length width. Padding is
 |      done using the specified fill character (default is a space)
 |  
 |  count(...)
 |      S.count(sub[, start[, end]]) -> int
 |      
 |      Return the number of non-overlapping occurrences of substring sub in
 |      string S[start:end].  Optional arguments start and end are interpreted
 |      as in slice notation.
 |  
 |  decode(...)
 |      S.decode([encoding[,errors]]) -> object
 |      
 |      Decodes S using the codec registered for encoding. encoding defaults
 |      to the default encoding. errors may be given to set a different error
 |      handling scheme. Default is 'strict' meaning that encoding errors raise
 |      a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
 |      as well as any other name registered with codecs.register_error that is
 |      able to handle UnicodeDecodeErrors.
 |  
 |  encode(...)
 |      S.encode([encoding[,errors]]) -> object
 |      
 |      Encodes S using the codec registered for encoding. encoding defaults
 |      to the default encoding. errors may be given to set a different error
 |      handling scheme. Default is 'strict' meaning that encoding errors raise
 |      a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and
 |      'xmlcharrefreplace' as well as any other name registered with
 |      codecs.register_error that is able to handle UnicodeEncodeErrors.
 |  
 |  endswith(...)
 |      S.endswith(suffix[, start[, end]]) -> bool
 |      
 |      Return True if S ends with the specified suffix, False otherwise.
 |      With optional start, test S beginning at that position.
 |      With optional end, stop comparing S at that position.
 |      suffix can also be a tuple of strings to try.
 |  
 |  expandtabs(...)
 |      S.expandtabs([tabsize]) -> string
 |      
 |      Return a copy of S where all tab characters are expanded using spaces.
 |      If tabsize is not given, a tab size of 8 characters is assumed.
 |  
 |  find(...)
 |      S.find(sub [,start [,end]]) -> int
 |      
 |      Return the lowest index in S where substring sub is found,
 |      such that sub is contained within S[start:end].  Optional
 |      arguments start and end are interpreted as in slice notation.
 |      
 |      Return -1 on failure.
 |  
 |  format(...)
 |      S.format(*args, **kwargs) -> string
 |      
 |      Return a formatted version of S, using substitutions from args and kwargs.
 |      The substitutions are identified by braces ('{' and '}').
 |  
 |  index(...)
 |      S.index(sub [,start [,end]]) -> int
 |      
 |      Like S.find() but raise ValueError when the substring is not found.
 |  
 |  isalnum(...)
 |      S.isalnum() -> bool
 |      
 |      Return True if all characters in S are alphanumeric
 |      and there is at least one character in S, False otherwise.
 |  
 |  isalpha(...)
 |      S.isalpha() -> bool
 |      
 |      Return True if all characters in S are alphabetic
 |      and there is at least one character in S, False otherwise.
 |  
 |  isdigit(...)
 |      S.isdigit() -> bool
 |      
 |      Return True if all characters in S are digits
 |      and there is at least one character in S, False otherwise.
 |  
 |  islower(...)
 |      S.islower() -> bool
 |      
 |      Return True if all cased characters in S are lowercase and there is
 |      at least one cased character in S, False otherwise.
 |  
 |  isspace(...)
 |      S.isspace() -> bool
 |      
 |      Return True if all characters in S are whitespace
 |      and there is at least one character in S, False otherwise.
 |  
 |  istitle(...)
 |      S.istitle() -> bool
 |      
 |      Return True if S is a titlecased string and there is at least one
 |      character in S, i.e. uppercase characters may only follow uncased
 |      characters and lowercase characters only cased ones. Return False
 |      otherwise.
 |  
 |  isupper(...)
 |      S.isupper() -> bool
 |      
 |      Return True if all cased characters in S are uppercase and there is
 |      at least one cased character in S, False otherwise.
 |  
 |  join(...)
 |      S.join(iterable) -> string
 |      
 |      Return a string which is the concatenation of the strings in the
 |      iterable.  The separator between elements is S.
 |  
 |  ljust(...)
 |      S.ljust(width[, fillchar]) -> string
 |      
 |      Return S left-justified in a string of length width. Padding is
 |      done using the specified fill character (default is a space).
 |  
 |  lower(...)
 |      S.lower() -> string
 |      
 |      Return a copy of the string S converted to lowercase.
 |  
 |  lstrip(...)
 |      S.lstrip([chars]) -> string or unicode
 |      
 |      Return a copy of the string S with leading whitespace removed.
 |      If chars is given and not None, remove characters in chars instead.
 |      If chars is unicode, S will be converted to unicode before stripping
 |  
 |  partition(...)
 |      S.partition(sep) -> (head, sep, tail)
 |      
 |      Search for the separator sep in S, and return the part before it,
 |      the separator itself, and the part after it.  If the separator is not
 |      found, return S and two empty strings.
 |  
 |  replace(...)
 |      S.replace(old, new[, count]) -> string
 |      
 |      Return a copy of string S with all occurrences of substring
 |      old replaced by new.  If the optional argument count is
 |      given, only the first count occurrences are replaced.
 |  
 |  rfind(...)
 |      S.rfind(sub [,start [,end]]) -> int
 |      
 |      Return the highest index in S where substring sub is found,
 |      such that sub is contained within S[start:end].  Optional
 |      arguments start and end are interpreted as in slice notation.
 |      
 |      Return -1 on failure.
 |  
 |  rindex(...)
 |      S.rindex(sub [,start [,end]]) -> int
 |      
 |      Like S.rfind() but raise ValueError when the substring is not found.
 |  
 |  rjust(...)
 |      S.rjust(width[, fillchar]) -> string
 |      
 |      Return S right-justified in a string of length width. Padding is
 |      done using the specified fill character (default is a space)
 |  
 |  rpartition(...)
 |      S.rpartition(sep) -> (head, sep, tail)
 |      
 |      Search for the separator sep in S, starting at the end of S, and return
 |      the part before it, the separator itself, and the part after it.  If the
 |      separator is not found, return two empty strings and S.
 |  
 |  rsplit(...)
 |      S.rsplit([sep [,maxsplit]]) -> list of strings
 |      
 |      Return a list of the words in the string S, using sep as the
 |      delimiter string, starting at the end of the string and working
 |      to the front.  If maxsplit is given, at most maxsplit splits are
 |      done. If sep is not specified or is None, any whitespace string
 |      is a separator.
 |  
 |  rstrip(...)
 |      S.rstrip([chars]) -> string or unicode
 |      
 |      Return a copy of the string S with trailing whitespace removed.
 |      If chars is given and not None, remove characters in chars instead.
 |      If chars is unicode, S will be converted to unicode before stripping
 |  
 |  split(...)
 |      S.split([sep [,maxsplit]]) -> list of strings
 |      
 |      Return a list of the words in the string S, using sep as the
 |      delimiter string.  If maxsplit is given, at most maxsplit
 |      splits are done. If sep is not specified or is None, any
 |      whitespace string is a separator and empty strings are removed
 |      from the result.
 |  
 |  splitlines(...)
 |      S.splitlines(keepends=False) -> list of strings
 |      
 |      Return a list of the lines in S, breaking at line boundaries.
 |      Line breaks are not included in the resulting list unless keepends
 |      is given and true.
 |  
 |  startswith(...)
 |      S.startswith(prefix[, start[, end]]) -> bool
 |      
 |      Return True if S starts with the specified prefix, False otherwise.
 |      With optional start, test S beginning at that position.
 |      With optional end, stop comparing S at that position.
 |      prefix can also be a tuple of strings to try.
 |  
 |  strip(...)
 |      S.strip([chars]) -> string or unicode
 |      
 |      Return a copy of the string S with leading and trailing
 |      whitespace removed.
 |      If chars is given and not None, remove characters in chars instead.
 |      If chars is unicode, S will be converted to unicode before stripping
 |  
 |  swapcase(...)
 |      S.swapcase() -> string
 |      
 |      Return a copy of the string S with uppercase characters
 |      converted to lowercase and vice versa.
 |  
 |  title(...)
 |      S.title() -> string
 |      
 |      Return a titlecased version of S, i.e. words start with uppercase
 |      characters, all remaining cased characters have lowercase.
 |  
 |  translate(...)
 |      S.translate(table [,deletechars]) -> string
 |      
 |      Return a copy of the string S, where all characters occurring
 |      in the optional argument deletechars are removed, and the
 |      remaining characters have been mapped through the given
 |      translation table, which must be a string of length 256 or None.
 |      If the table argument is None, no translation is applied and
 |      the operation simply removes the characters in deletechars.
 |  
 |  upper(...)
 |      S.upper() -> string
 |      
 |      Return a copy of the string S converted to uppercase.
 |  
 |  zfill(...)
 |      S.zfill(width) -> string
 |      
 |      Pad a numeric string S with zeros on the left, to fill a field
 |      of the specified width.  The string S is never truncated.
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __new__ = <built-in method __new__ of type object>
 |      T.__new__(S, ...) -> a new object with type S, a subtype of T


In [2]:
help(list)


Help on class list in module __builtin__:

class list(object)
 |  list() -> new empty list
 |  list(iterable) -> new list initialized from iterable's items
 |  
 |  Methods defined here:
 |  
 |  __add__(...)
 |      x.__add__(y) <==> x+y
 |  
 |  __contains__(...)
 |      x.__contains__(y) <==> y in x
 |  
 |  __delitem__(...)
 |      x.__delitem__(y) <==> del x[y]
 |  
 |  __delslice__(...)
 |      x.__delslice__(i, j) <==> del x[i:j]
 |      
 |      Use of negative indices is not supported.
 |  
 |  __eq__(...)
 |      x.__eq__(y) <==> x==y
 |  
 |  __ge__(...)
 |      x.__ge__(y) <==> x>=y
 |  
 |  __getattribute__(...)
 |      x.__getattribute__('name') <==> x.name
 |  
 |  __getitem__(...)
 |      x.__getitem__(y) <==> x[y]
 |  
 |  __getslice__(...)
 |      x.__getslice__(i, j) <==> x[i:j]
 |      
 |      Use of negative indices is not supported.
 |  
 |  __gt__(...)
 |      x.__gt__(y) <==> x>y
 |  
 |  __iadd__(...)
 |      x.__iadd__(y) <==> x+=y
 |  
 |  __imul__(...)
 |      x.__imul__(y) <==> x*=y
 |  
 |  __init__(...)
 |      x.__init__(...) initializes x; see help(type(x)) for signature
 |  
 |  __iter__(...)
 |      x.__iter__() <==> iter(x)
 |  
 |  __le__(...)
 |      x.__le__(y) <==> x<=y
 |  
 |  __len__(...)
 |      x.__len__() <==> len(x)
 |  
 |  __lt__(...)
 |      x.__lt__(y) <==> x<y
 |  
 |  __mul__(...)
 |      x.__mul__(n) <==> x*n
 |  
 |  __ne__(...)
 |      x.__ne__(y) <==> x!=y
 |  
 |  __repr__(...)
 |      x.__repr__() <==> repr(x)
 |  
 |  __reversed__(...)
 |      L.__reversed__() -- return a reverse iterator over the list
 |  
 |  __rmul__(...)
 |      x.__rmul__(n) <==> n*x
 |  
 |  __setitem__(...)
 |      x.__setitem__(i, y) <==> x[i]=y
 |  
 |  __setslice__(...)
 |      x.__setslice__(i, j, y) <==> x[i:j]=y
 |      
 |      Use  of negative indices is not supported.
 |  
 |  __sizeof__(...)
 |      L.__sizeof__() -- size of L in memory, in bytes
 |  
 |  append(...)
 |      L.append(object) -- append object to end
 |  
 |  count(...)
 |      L.count(value) -> integer -- return number of occurrences of value
 |  
 |  extend(...)
 |      L.extend(iterable) -- extend list by appending elements from the iterable
 |  
 |  index(...)
 |      L.index(value, [start, [stop]]) -> integer -- return first index of value.
 |      Raises ValueError if the value is not present.
 |  
 |  insert(...)
 |      L.insert(index, object) -- insert object before index
 |  
 |  pop(...)
 |      L.pop([index]) -> item -- remove and return item at index (default last).
 |      Raises IndexError if list is empty or index is out of range.
 |  
 |  remove(...)
 |      L.remove(value) -- remove first occurrence of value.
 |      Raises ValueError if the value is not present.
 |  
 |  reverse(...)
 |      L.reverse() -- reverse *IN PLACE*
 |  
 |  sort(...)
 |      L.sort(cmp=None, key=None, reverse=False) -- stable sort *IN PLACE*;
 |      cmp(x, y) -> -1, 0, 1
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __hash__ = None
 |  
 |  __new__ = <built-in method __new__ of type object>
 |      T.__new__(S, ...) -> a new object with type S, a subtype of T


In [3]:
help(tuple)


Help on class tuple in module __builtin__:

class tuple(object)
 |  tuple() -> empty tuple
 |  tuple(iterable) -> tuple initialized from iterable's items
 |  
 |  If the argument is a tuple, the return value is the same object.
 |  
 |  Methods defined here:
 |  
 |  __add__(...)
 |      x.__add__(y) <==> x+y
 |  
 |  __contains__(...)
 |      x.__contains__(y) <==> y in x
 |  
 |  __eq__(...)
 |      x.__eq__(y) <==> x==y
 |  
 |  __ge__(...)
 |      x.__ge__(y) <==> x>=y
 |  
 |  __getattribute__(...)
 |      x.__getattribute__('name') <==> x.name
 |  
 |  __getitem__(...)
 |      x.__getitem__(y) <==> x[y]
 |  
 |  __getnewargs__(...)
 |  
 |  __getslice__(...)
 |      x.__getslice__(i, j) <==> x[i:j]
 |      
 |      Use of negative indices is not supported.
 |  
 |  __gt__(...)
 |      x.__gt__(y) <==> x>y
 |  
 |  __hash__(...)
 |      x.__hash__() <==> hash(x)
 |  
 |  __iter__(...)
 |      x.__iter__() <==> iter(x)
 |  
 |  __le__(...)
 |      x.__le__(y) <==> x<=y
 |  
 |  __len__(...)
 |      x.__len__() <==> len(x)
 |  
 |  __lt__(...)
 |      x.__lt__(y) <==> x<y
 |  
 |  __mul__(...)
 |      x.__mul__(n) <==> x*n
 |  
 |  __ne__(...)
 |      x.__ne__(y) <==> x!=y
 |  
 |  __repr__(...)
 |      x.__repr__() <==> repr(x)
 |  
 |  __rmul__(...)
 |      x.__rmul__(n) <==> n*x
 |  
 |  count(...)
 |      T.count(value) -> integer -- return number of occurrences of value
 |  
 |  index(...)
 |      T.index(value, [start, [stop]]) -> integer -- return first index of value.
 |      Raises ValueError if the value is not present.
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __new__ = <built-in method __new__ of type object>
 |      T.__new__(S, ...) -> a new object with type S, a subtype of T

Exercise 2)


In [4]:
# tuples + lists: slicing, concatination, indexing
# only lists: reverse, sort, pop
# only tuple: hash
hash((1,2))


Out[4]:
1299869600

Exercise 3)


In [5]:
myTuple = tuple([1])
print myTuple
type(myTuple)


(1,)
Out[5]:
tuple

In [6]:
myTuple = (1,)
print myTuple
type(myTuple)


(1,)
Out[6]:
tuple

Exercise 4)


In [7]:
words = ['is', 'NLP', 'fun', '?']
tmp = words[0]
words[0] = words[1]
words[1] = tmp
words[3] = '!'
words


Out[7]:
['NLP', 'is', 'fun', '!']

In [8]:
words = ['is', 'NLP', 'fun', '?']
words[0], words[1], words[3] = words[1], words[0], '!'
words


Out[8]:
['NLP', 'is', 'fun', '!']

Exercise 5)


In [9]:
help(cmp)


Help on built-in function cmp in module __builtin__:

cmp(...)
    cmp(x, y) -> integer
    
    Return negative if x<y, zero if x==y, positive if x>y.


In [10]:
cmp(3,9)


Out[10]:
-1

In [11]:
cmp(9,3)


Out[11]:
1

In [12]:
# can differentiate 3 cases

Exercise 6)


In [13]:
sent = ['The', 'dog', 'gave', 'John', 'the', 'newspaper']
n = 3
[sent[i:i+n] for i in range(len(sent)-n+1)]


Out[13]:
[['The', 'dog', 'gave'],
 ['dog', 'gave', 'John'],
 ['gave', 'John', 'the'],
 ['John', 'the', 'newspaper']]

In [14]:
sent = ['The', 'dog', 'gave', 'John', 'the', 'newspaper']
n = 1
[sent[i:i+n] for i in range(len(sent)-n+1)]


Out[14]:
[['The'], ['dog'], ['gave'], ['John'], ['the'], ['newspaper']]

In [15]:
sent = ['The', 'dog', 'gave', 'John', 'the', 'newspaper']
n = len(sent)
[sent[i:i+n] for i in range(len(sent)-n+1)]


Out[15]:
[['The', 'dog', 'gave', 'John', 'the', 'newspaper']]

Exercise 7)


In [16]:
if (0):
    print 'true!'

In [17]:
if (1):
    print 'true!'


true!

In [18]:
if ('foo'):
    print 'true!'


true!

In [19]:
if (()):
    print 'true!'

In [20]:
if ((1,2)):
    print 'true!'


true!

In [21]:
if (-1):
    print 'true!'


true!

Exercise 8)


In [22]:
'Monty' < 'Python'


Out[22]:
True

In [23]:
'Z' < 'a'


Out[23]:
True

In [24]:
'z' < 'a'


Out[24]:
False

In [25]:
'Monty' < 'Montague'


Out[25]:
False

In [27]:
('Monty', 1) < ('Monty', 2)


Out[27]:
True

In [28]:
('Monty', 1) < ('Montague', 2)


Out[28]:
False

In [29]:
(1, 'Monty') < (2, 'Montague')


Out[29]:
True

Exercise 9)


In [30]:
# a
myStr = '  some    whitespaced string  '
' '.join(myStr.split())


Out[30]:
'some whitespaced string'

In [31]:
# b
import re
re.sub(r'\s+', ' ', re.sub(r'^\s+|\s+$', '', myStr))


Out[31]:
'some whitespaced string'

Exercise 10)


In [32]:
def sortWords(words):
    def cmp_len(word1, word2):
        return cmp(len(word1), len(word2))
    return sorted(words, cmp=cmp_len)

sortWords(['The', 'dog', 'gave', 'John', 'the', 'newspaper'])


Out[32]:
['The', 'dog', 'the', 'gave', 'John', 'newspaper']

Exercise 11)


In [33]:
sent1 = ['The', 'dog', 'gave', 'John', 'the', 'newspaper']
sent2 = sent1
sent1[1] = 'cat'
sent2


Out[33]:
['The', 'cat', 'gave', 'John', 'the', 'newspaper']

In [34]:
# a
sent1 = ['The', 'dog', 'gave', 'John', 'the', 'newspaper']
sent2 = sent1[:]
sent1[1] = 'cat'
sent2
# [:] -> copy list items, instead of creating reference to same list


Out[34]:
['The', 'dog', 'gave', 'John', 'the', 'newspaper']

In [35]:
# b
text1 = [['The', 'dog', 'gave', 'John', 'the', 'newspaper'], ['The', 'cat', 'miowed']]
text2 = text1[:]
text1[0][1] = 'monkey'
text2
# did not copy inner lists, but references to them


Out[35]:
[['The', 'monkey', 'gave', 'John', 'the', 'newspaper'],
 ['The', 'cat', 'miowed']]

In [36]:
# c
from copy import deepcopy
help(deepcopy)


Help on function deepcopy in module copy:

deepcopy(x, memo=None, _nil=[])
    Deep copy operation on arbitrary Python objects.
    
    See the module's __doc__ string for more info.


In [37]:
text1 = [['The', 'dog', 'gave', 'John', 'the', 'newspaper'], ['The', 'cat', 'miowed']]
text3 = deepcopy(text1)
text1[0][1] = 'monkey'
text3


Out[37]:
[['The', 'dog', 'gave', 'John', 'the', 'newspaper'], ['The', 'cat', 'miowed']]

Exercise 12)


In [38]:
word_table = [[''] * 3] * 4
word_table[1][2] = "hello"
word_table
# multiplication adds references to the same list, not copies of it


Out[38]:
[['', '', 'hello'], ['', '', 'hello'], ['', '', 'hello'], ['', '', 'hello']]

In [39]:
word_table = [['' for count1 in range(3)] for count2 in range(4)]
word_table[1][2] = "hello"
word_table


Out[39]:
[['', '', ''], ['', '', 'hello'], ['', '', ''], ['', '', '']]

Exercise 13)


In [40]:
word_vowels = [[]]
words = ['The', 'dog', 'gave', 'John', 'the', 'newspaper', 'The', 'cat', 'miowed']
for word in words:
    if (len(word) > len(word_vowels)-1):
        for index in range(len(word_vowels), len(word)+1):
            word_vowels.append([])
    num_vowels = len(re.findall(r'[aeiouAEIOU]', word))
    if (num_vowels > len(word_vowels[len(word)])-1):
        for index in range(len(word_vowels[len(word)]), num_vowels+1):
            word_vowels[len(word)].append(set())
    word_vowels[len(word)][num_vowels].add(word)
print word_vowels[3][1]
print word_vowels[9][3]


set(['the', 'The', 'dog', 'cat'])
set(['newspaper'])

Exercise 14)


In [2]:
def novel10(text):
    splitIndex = len(text) / 10
    print [w for w in text[-splitIndex:] if w not in text[:-splitIndex]]
from nltk.book import *
novel10(text3)


*** Introductory Examples for the NLTK Book ***
Loading text1, ..., text9 and sent1, ..., sent9
Type the name of the text or sentence to view it.
Type: 'texts()' or 'sents()' to list the materials.
text1: Moby Dick by Herman Melville 1851
text2: Sense and Sensibility by Jane Austen 1811
text3: The Book of Genesis
text4: Inaugural Address Corpus
text5: Chat Corpus
text6: Monty Python and the Holy Grail
text7: Wall Street Journal
text8: Personals Corpus
text9: The Man Who Was Thursday by G . K . Chesterton 1908
[u'nati', u'aga', u'His', u'Phallu', u'Hezron', u'Carmi', u'Jemuel', u'Jamin', u'Ohad', u'Jachin', u'Shaul', u'Canaanitish', u'Gershon', u'Kohath', u'Merari', u'Zar', u'Hezron', u'Hamul', u'Tola', u'Phuvah', u'Job', u'Shimron', u'Sered', u'Jahleel', u'Din', u'Ziphion', u'Haggi', u'Shuni', u'Ezbon', u'Eri', u'Arodi', u'Areli', u'Jimnah', u'Ishuah', u'Isui', u'Beriah', u'Serah', u'Beriah', u'Heber', u'Malchiel', u'sixteen', u'Belah', u'Becher', u'Ashbel', u'Gera', u'Naaman', u'Ehi', u'Rosh', u'Muppim', u'Huppim', u'Ard', u'Hushim', u'Jahzeel', u'Guni', u'Jezer', u'Shillem', u'direct', u'presented', u'shepherds', u'occupation', u'fathe', u'shepherd', u'presented', u'occupation', u'shepherds', u'morever', u'pasture', u'activity', u'rulers', u'pilgrimage', u'attained', u'pilgrimage', u'Rameses', u'nourished', u'boug', u'faileth', u'fail', u'exchange', u'horses', u'bodies', u'lan', u'desolate', u'priests', u'priests', u'assigned', u'sow', u'increase', u'parts', u'saved', u'priests', u'multiplied', u'nigh', u'bed', u'si', u'strengthened', u'bed', u'issue', u'begettest', u'Padan', u'guiding', u'wittingly', u'Angel', u'redeemed', u'lads', u'remove', u'Not', u'Manass', u'last', u'excellency', u'dignity', u'excellency', u'pow', u'Unstable', u'excel', u'wentest', u'bed', u'defiledst', u'couch', u'instruments', u'cruelty', u'secret', u'assembly', u'honour', u'unit', u'selfwill', u'wall', u'fierce', u'cru', u'lion', u'whelp', u'prey', u'stooped', u'couched', u'lion', u'lion', u'rouse', u'sceptre', u'lawgiver', u'Shiloh', u'Binding', u'foal', u'colt', u'His', u'teeth', u'haven', u'haven', u'ships', u'Zidon', u'strong', u'couching', u'burdens', u'tribute', u'tribes', u'adder', u'path', u'biteth', u'horse', u'heels', u'rider', u'waited', u'salvation', u'overcome', u'overcome', u'last', u'royal', u'dainties', u'hind', u'loose', u'giveth', u'bough', u'bough', u'run', u'wa', u'archers', u'sorely', u'arms', u'strong', u'shepherd', u'blessings', u'blessings', u'blessings', u'breasts', u'blessings', u'blessings', u'progenitors', u'utmost', u'hil', u'crown', u'ravin', u'wolf', u'devour', u'prey', u'spoil', u'tribes', u'peop', u'purchase', u'commanding', u'bed', u'yielded', u'physicians', u'embalm', u'physicians', u'embalmed', u'embalm', u'past', u'elders', u'elders', u'chariots', u'horsemen', u'threshingfloor', u'Atad', u'lamentati', u'floor', u'Atad', u'Egyptia', u'Abelmizraim', u'requite', u'messenger', u'Forgive', u'forgive', u'meant', u'Machir', u'visit', u'visit', u'embalmed', u'coffin']

Exercise 15)


In [42]:
import nltk
def countWords(sent):
    sent = sent.split()
    fdist = nltk.FreqDist(w.lower() for w in sent)
    for key in sorted(fdist.keys()):
        print '%s: %d' % (key, fdist[key])
countWords(' '.join(sent9))


,: 1
.: 1
a: 1
and: 1
as: 2
cloud: 1
lay: 1
london: 1
of: 3
on: 1
park: 1
ragged: 1
red: 1
saffron: 1
side: 1
suburb: 1
sunset: 2
the: 2

Exercise 16)


In [43]:
# a
def gematria(word):
    letter_vals = {'a':1, 'b':2, 'c':3, 'd':4, 'e':5, 'f':80, 'g':3, 'h':8, 'i':10, 'j':10, 'k':20, 'l':30, 'm':40, 'n':50, 'o':70, 'p':80, 'q':100, 'r':200, 's':300, 't':400, 'u':6, 'v':6, 'w':800, 'x':60, 'y':10, 'z':7}
    return sum(letter_vals[l] for l in word if len(re.findall(r'[a-z]', l)) > 0)
gematria('gematria')


Out[43]:
660

In [44]:
# b
for fileid in nltk.corpus.state_union.fileids():
    words666 = [w.lower() for w in nltk.corpus.state_union.words(fileid) if w.isalpha() and gematria(w.lower()) == 666]
    print '\n%s: %d' % (fileid, len(words666))
    print set(words666)


1945-Truman.txt: 2
set([u'outlook', u'eloquent'])

1946-Truman.txt: 13
set([u'retain', u'outlook', u'market'])

1947-Truman.txt: 0
set([])

1948-Truman.txt: 2
set([u'market'])

1949-Truman.txt: 2
set([u'market'])

1950-Truman.txt: 1
set([u'outlook'])

1951-Truman.txt: 0
set([])

1953-Eisenhower.txt: 1
set([u'market'])

1954-Eisenhower.txt: 6
set([u'retain', u'market'])

1955-Eisenhower.txt: 3
set([u'outlook', u'market'])

1956-Eisenhower.txt: 1
set([u'outlook'])

1957-Eisenhower.txt: 2
set([u'retain', u'market'])

1958-Eisenhower.txt: 5
set([u'retain', u'extra'])

1959-Eisenhower.txt: 1
set([u'outlook'])

1960-Eisenhower.txt: 5
set([u'outlook', u'eloquent', u'miraculous', u'philosophy'])

1961-Kennedy.txt: 0
set([])

1962-Kennedy.txt: 11
set([u'retain', u'market'])

1963-Johnson.txt: 0
set([])

1963-Kennedy.txt: 5
set([u'market', u'extra'])

1964-Johnson.txt: 1
set([u'market'])

1965-Johnson-1.txt: 0
set([])

1965-Johnson-2.txt: 0
set([])

1966-Johnson.txt: 0
set([])

1967-Johnson.txt: 2
set([u'outlook', u'extra'])

1968-Johnson.txt: 3
set([u'outlook', u'market'])

1969-Johnson.txt: 0
set([])

1970-Nixon.txt: 0
set([])

1971-Nixon.txt: 1
set([u'extra'])

1972-Nixon.txt: 0
set([])

1973-Nixon.txt: 1
set([u'philosophy'])

1974-Nixon.txt: 0
set([])

1975-Ford.txt: 0
set([])

1976-Ford.txt: 3
set([u'eloquent', u'extra'])

1977-Ford.txt: 0
set([])

1978-Carter.txt: 1
set([u'retain'])

1979-Carter.txt: 2
set([u'retain', u'extra'])

1980-Carter.txt: 0
set([])

1981-Reagan.txt: 4
set([u'market'])

1982-Reagan.txt: 0
set([])

1983-Reagan.txt: 2
set([u'market'])

1984-Reagan.txt: 1
set([u'market'])

1985-Reagan.txt: 1
set([u'market'])

1986-Reagan.txt: 1
set([u'squander'])

1987-Reagan.txt: 1
set([u'market'])

1988-Reagan.txt: 2
set([u'market', u'extra'])

1989-Bush.txt: 1
set([u'retain'])

1990-Bush.txt: 2
set([u'market', u'extra'])

1991-Bush-1.txt: 0
set([])

1991-Bush-2.txt: 0
set([])

1992-Bush.txt: 3
set([u'papers', u'market', u'extra'])

1993-Clinton.txt: 1
set([u'market'])

1994-Clinton.txt: 2
set([u'market'])

1995-Clinton.txt: 1
set([u'market'])

1996-Clinton.txt: 2
set([u'market'])

1997-Clinton.txt: 1
set([u'market'])

1998-Clinton.txt: 4
set([u'competency', u'market'])

1999-Clinton.txt: 1
set([u'extra'])

2000-Clinton.txt: 3
set([u'retina', u'miraculous', u'extra'])

2001-GWBush-1.txt: 1
set([u'philosophy'])

2001-GWBush-2.txt: 0
set([])

2002-GWBush.txt: 0
set([])

2003-GWBush.txt: 3
set([u'miraculous', u'market', u'extra'])

2004-GWBush.txt: 2
set([u'extra', u'papers'])

2005-GWBush.txt: 2
set([u'market', u'extra'])

2006-GWBush.txt: 0
set([])

In [45]:
# c
import random
def decode(text):
    num = random.randint(1, 1000)
    return num, set([w.lower() for w in text if w.isalpha() and gematria(w.lower()) == num])

result = decode(text4)
print result[0]
print result[1]


765
set([u'partaking', u'poetry', u'against', u'authorizing', u'gratefully', u'thorough', u'operated', u'frightened', u'tells', u'mentor'])

Exercise 17)


In [46]:
def shorten(text, n=20):
    most_freq = nltk.FreqDist(text).most_common(n)
    most_freq = [w for (w, num) in most_freq]
    print most_freq
    return [w for w in text if w not in most_freq]

print ' '.join(shorten(text3, 50)[:100])


[u',', u'and', u'the', u'of', u'.', u'And', u'his', u'he', u'to', u';', u'unto', u'in', u'that', u'I', u'said', u'him', u'a', u'my', u'was', u'for', u'it', u'with', u'me', u'thou', u"'", u'is', u'thy', u's', u'thee', u'be', u'shall', u'they', u'all', u':', u'God', u'them', u'not', u'father', u'which', u'will', u'land', u'Jacob', u'came', u'her', u'LORD', u'were', u'she', u'Joseph', u'from', u'their']
In beginning created heaven earth earth without form void darkness upon face deep Spirit moved upon face waters Let there light there light saw light good divided light darkness called light Day darkness called Night evening morning first day Let there firmament midst waters let divide waters waters made firmament divided waters under firmament waters above firmame so called firmament Heaven evening morning second day Let waters under heaven gathered together one place let dry appe so called dry Earth gathering together waters called Se saw good Let earth bring forth grass herb yielding seed fruit tree yielding fruit after

Exercise 18)


In [47]:
def getWords(prop, value):
    lexicon = [('fish', 'water animal', 'fish'), ('house', 'building', 'haus'), ('whale', 'water animal', 'wejl')]
    if prop == 'meaning':
        return [w for (w, m, p) in lexicon if m == value]
    if prop == 'pronunciation':
        return [w for (w, m, p) in lexicon if p == value]
    
getWords('meaning', 'water animal')


Out[47]:
['fish', 'whale']

In [48]:
getWords('pronunciation', 'haus')


Out[48]:
['house']

Exercise 19)


In [49]:
from nltk.corpus import wordnet as wn
list_syns = [wn.synset('minke_whale.n.01'), wn.synset('orca.n.01'), wn.synset('novel.n.01'), wn.synset('tortoise.n.01')]
comp = wn.synset('right_whale.n.01')
sorted(list_syns, lambda x,y: cmp(comp.shortest_path_distance(x), comp.shortest_path_distance(y)))


Out[49]:
[Synset('lesser_rorqual.n.01'),
 Synset('killer_whale.n.01'),
 Synset('tortoise.n.01'),
 Synset('novel.n.01')]

Exercise 20)


In [55]:
def sortWords(wordList):
    fdist = nltk.FreqDist(wordList)
    return fdist.keys()
sortWords(['one', 'two', 'two', 'four', 'four', 'four', 'four', 'three', 'three', 'three'])


Out[55]:
['four', 'three', 'two', 'one']

Exercise 21)


In [59]:
def unknownWords(text, vocab):
    return set(text).difference(set(vocab))
unknownWords(text3, nltk.corpus.words.words())


Out[59]:
{u'Sell',
 u'Allonbachuth',
 u'childr',
 u'gr',
 u'clusters',
 u'Togarmah',
 u'caused',
 u'badne',
 u'Matred',
 u'Gether',
 u'ceased',
 u'mules',
 u'Take',
 u'fearest',
 u'Ebal',
 u'likene',
 u'Husham',
 u'menservants',
 u'Ehi',
 u'sevens',
 u'Jimnah',
 u'Nod',
 u'Milcah',
 u'Peniel',
 u'tr',
 u'Avith',
 u'answered',
 u'preserved',
 u'Not',
 u'dukes',
 u'suffered',
 u'spee',
 u'Casluhim',
 u'Day',
 u'Mehujael',
 u'Feed',
 u'Whoso',
 u'Muppim',
 u'hadst',
 u'souls',
 u'fai',
 u'putting',
 u'breaketh',
 u'nourished',
 u'wagons',
 u'Mesopotamia',
 u'Abrah',
 u'Where',
 u'Mibsam',
 u'chesnut',
 u'knowest',
 u'Es',
 u'perceived',
 u'peop',
 u'Adbeel',
 u'And',
 u'presented',
 u'clothed',
 u'interpretations',
 u'Jobab',
 u'hous',
 u'Binding',
 u'joined',
 u'marvelled',
 u'daughers',
 u'Jetheth',
 u'Pison',
 u'guiding',
 u'Go',
 u'tru',
 u'buryingplace',
 u'Unto',
 u'Our',
 u'favoured',
 u'talked',
 u'Mamre',
 u'Hanoch',
 u'toucheth',
 u'repenteth',
 u'Midianites',
 u'Reu',
 u'compassed',
 u'honourable',
 u'creepeth',
 u'foals',
 u'Hadoram',
 u'tarried',
 u'Hebron',
 u'Tebah',
 u'sepulchre',
 u'espied',
 u'favour',
 u'Until',
 u'lights',
 u'Gaham',
 u'Jidlaph',
 u'Hear',
 u'Maachah',
 u'Put',
 u';',
 u'seemed',
 u'Have',
 u'pressed',
 u'Me',
 u'Huppim',
 u'Ephra',
 u'themselv',
 u'servants',
 u'Havilah',
 u'honour',
 u'hairs',
 u'leaped',
 u'kindled',
 u'Mizz',
 u'circumcis',
 u'youngest',
 u'Hagar',
 u'Woman',
 u'Mahanaim',
 u'dea',
 u'hast',
 u'Zeboim',
 u'bulls',
 u'My',
 u'Becher',
 u'Zeboiim',
 u'Zo',
 u'togeth',
 u'Shem',
 u'bou',
 u'women',
 u'Naphtuhim',
 u'concubi',
 u'Unstable',
 u'spices',
 u'changes',
 u'Riphath',
 u'named',
 u'Said',
 u'followed',
 u'Mehetabel',
 u'oversig',
 u'Assyr',
 u'When',
 u'marriages',
 u'erected',
 u'Shammah',
 u'Also',
 u'threshingfloor',
 u'tithes',
 u'Therefore',
 u'Asshur',
 u'Dishan',
 u'bre',
 u'Kor',
 u'isles',
 u'Zilpah',
 u'Assyria',
 u'doeth',
 u'Beor',
 u'Hul',
 u'walketh',
 u'bundles',
 u'asses',
 u'Bedad',
 u'sepulchres',
 u'longedst',
 u'wentest',
 u'reproa',
 u'angels',
 u'Ithran',
 u'Huz',
 u'Almighty',
 u'separated',
 u'sheweth',
 u'Kohath',
 u'Phallu',
 u'Asenath',
 u'waxed',
 u'healed',
 u'Mash',
 u'Bless',
 u'Leummim',
 u'Zillah',
 u'Ezbon',
 u'So',
 u'charged',
 u'Ziphion',
 u'needeth',
 u'appointed',
 u'Se',
 u'Perizzite',
 u'wrestled',
 u'shepherds',
 u'Now',
 u'Madai',
 u'refrained',
 u'Manass',
 u'meeteth',
 u'goest',
 u'Ashkenaz',
 u'Wherefore',
 u'flo',
 u'maidservants',
 u'believed',
 u'Chedorlaomer',
 u'speaketh',
 u'fle',
 u'spilled',
 u'lingered',
 u'Out',
 u'Lie',
 u'purchased',
 u'saith',
 u'Whereas',
 u'progenitors',
 u'pieces',
 u'escaped',
 u'heard',
 u'Abr',
 u'urged',
 u'Mahalath',
 u'Sheleph',
 u'stories',
 u'rods',
 u'habitations',
 u'Cause',
 u'spe',
 u'tak',
 u'!',
 u'journeyed',
 u'chariots',
 u'Thirty',
 u'inhabitants',
 u'LO',
 u'Who',
 u'nuts',
 u'Diklah',
 u',)',
 u'faults',
 u'Rebek',
 u'hou',
 u'hor',
 u'dwe',
 u'Zaavan',
 u'Isra',
 u'Why',
 u'Arvadite',
 u'merchantmen',
 u'mouths',
 u'Wilt',
 u'Nineveh',
 u'Joktan',
 u'halted',
 u'?)',
 u'fleddest',
 u'verified',
 u'Know',
 u'Jahzeel',
 u'fo',
 u'All',
 u'twins',
 u'branches',
 u'morter',
 u'speckl',
 u'drinketh',
 u'liveth',
 u'Eldaah',
 u'Moabites',
 u'rebuked',
 u'gavest',
 u'pleaseth',
 u'darkne',
 u'anoth',
 u'bakers',
 u'Er',
 u'pulled',
 u'reproved',
 u'Fill',
 u'Reumah',
 u'What',
 u'Drink',
 u'words',
 u'selfwill',
 u'dreamed',
 u'Paran',
 u'offerings',
 u'interpreted',
 u'held',
 u'sinning',
 u'committed',
 u'Hazo',
 u'rewarded',
 u'younge',
 u'birds',
 u'Kemuel',
 u'Hinder',
 u'Gerar',
 u'clo',
 u'possessions',
 u'mandrakes',
 u'lentiles',
 u'offeri',
 u'Zemarite',
 u'ships',
 u',',
 u'Here',
 u'embalmed',
 u'destroyed',
 u'Carmi',
 u'messes',
 u'womenservants',
 u'dunge',
 u'decreased',
 u'troubled',
 u'Machir',
 u'prisoners',
 u'Lest',
 u'There',
 u'Amalekites',
 u'Moreover',
 u'males',
 u'hunter',
 u'refused',
 u'Sojourn',
 u'obeyed',
 u'From',
 u'Serah',
 u'Can',
 u'sceptre',
 u'Ishuah',
 u'Arodi',
 u'killed',
 u'Tubalcain',
 u'sinners',
 u'feet',
 u'Eshban',
 u'Karnaim',
 u'Lehabim',
 u'Arphaxad',
 u'Pinon',
 u'Timna',
 u'horses',
 u'Lasha',
 u'Say',
 u'shekels',
 u'Zohar',
 u'garmen',
 u'carcases',
 u'Phara',
 u'longeth',
 u'reigned',
 u'Hazezontamar',
 u'Rosh',
 u'Zerah',
 u'Pharez',
 u'anointedst',
 u'Luz',
 u'Beerlahairoi',
 u'Except',
 u'Send',
 u'skins',
 u'hundredfo',
 u'Lud',
 u'hanged',
 u'befell',
 u'findest',
 u'giveth',
 u'ev',
 u'opened',
 u'Malchiel',
 u'kids',
 u'Ohad',
 u'ringstraked',
 u'vessels',
 u'strangers',
 u'Rephaims',
 u'Terah',
 u'Obal',
 u'Bozrah',
 u'Edomites',
 u'Eliezer',
 u'ri',
 u'Zebulun',
 u'Manasseh',
 u'Egy',
 u'sheddeth',
 u'Sheba',
 u'This',
 u'hang',
 u'deceived',
 u'spies',
 u'Swear',
 u'Stand',
 u'grapes',
 u'placed',
 u'heads',
 u'pris',
 u'Jetur',
 u'Fulfil',
 u'planted',
 u'Forasmuch',
 u'Ammon',
 u'created',
 u'Hori',
 u'deprived',
 u'Iram',
 u'Shebah',
 u'leanfleshed',
 u'seekest',
 u'seest',
 u'called',
 u'prospered',
 u'Anamim',
 u'faileth',
 u'Emins',
 u'grisl',
 u'curseth',
 u'waited',
 u'Speak',
 u'Even',
 u'Shaveh',
 u'asswaged',
 u'Edar',
 u'Guni',
 u'grisled',
 u'Amalek',
 u'Aner',
 u'knoweth',
 u'comforted',
 u'visited',
 u'Machpelah',
 u'barr',
 u'Timnah',
 u'reached',
 u'Ashbel',
 u'visions',
 u'seeth',
 u'lieth',
 u'Sered',
 u'Jebusites',
 u'citi',
 u'beguiled',
 u'aileth',
 u'entreated',
 u'Irad',
 u'households',
 u'Zaphnathpaaneah',
 u'beari',
 u'worshipped',
 u'Whose',
 u'Anah',
 u'blossoms',
 u'catt',
 u'Zidon',
 u'Do',
 u'eyes',
 u'Jehovahjireh',
 u'stars',
 u'serva',
 u'Ophir',
 u'mayest',
 u'Abimelech',
 u'Jeush',
 u'Dinhabah',
 u'conceived',
 u'herds',
 u'Sabtech',
 u'abated',
 u'appe',
 u'storehouses',
 u'ste',
 u'Set',
 u'morever',
 u'Potipherah',
 u'EleloheIsrael',
 u'Only',
 u'nations',
 u'coats',
 u'See',
 u'slimepits',
 u'husba',
 u'fruits',
 u'Lahairoi',
 u'Yet',
 u'repented',
 u'communing',
 u'proceedeth',
 u'tongues',
 u'Horites',
 u'Pau',
 u'remained',
 u'thistles',
 u'dainties',
 u'Peace',
 u'Tarshish',
 u'Beersheba',
 u'jewels',
 u'rained',
 u'shrubs',
 u'Ezer',
 u'magnified',
 u'firmame',
 u'Neither',
 u'Shaul',
 u'Then',
 u'overthrew',
 u'pursued',
 u'Kenaz',
 u'After',
 u'Egyptia',
 u'officers',
 u'Hiddekel',
 u'priests',
 u'wandered',
 u'Behold',
 u'overtook',
 u'They',
 u'laughed',
 u'Ask',
 u'Gihon',
 u'Dothan',
 u'Spirit',
 u'Kenites',
 u'Aram',
 u'Seir',
 u'vestures',
 u'comest',
 u'Hadad',
 u'Thy',
 u'bereaved',
 u'shewed',
 u'measures',
 u'troughs',
 u'wells',
 u'Up',
 u'Hadar',
 u'butlers',
 u'Abidah',
 u'seasons',
 u'Isa',
 u'Kadmonites',
 u'fetcht',
 u'Twelve',
 u'mightier',
 u'Benam',
 u'camest',
 u'garments',
 u'nostrils',
 u'consumed',
 u'archers',
 u'Some',
 u'kn',
 u'counted',
 u'looked',
 u'Shillem',
 u'Ahuzzath',
 u'Shinar',
 u'Earth',
 u'bondmen',
 u'If',
 u'How',
 u'trembled',
 u'Beno',
 u'laws',
 u'stones',
 u'Hai',
 u'Zibeon',
 u'twel',
 u'Ham',
 u'departing',
 u'arrayed',
 u'Kedemah',
 u'liest',
 u'generatio',
 u'Gilead',
 u'Nebajoth',
 u'Erech',
 u'compasseth',
 u'fatfleshed',
 u'Amorites',
 u'Tamar',
 u'wor',
 u'imagined',
 u'Jabal',
 u'weapons',
 u'We',
 u'Shobal',
 u'Shur',
 u'prevailed',
 u'giants',
 u'(',
 u'Return',
 u'Atad',
 u'Ephron',
 u'failed',
 u'Peradventure',
 u'judged',
 u'attained',
 u'Bera',
 u'Shemeber',
 u'thousands',
 u'?',
 u'lighted',
 u'Pildash',
 u'magicians',
 u'wrestlings',
 u'silv',
 u'loved',
 u'natio',
 u'Heth',
 u'Canaanites',
 u'trees',
 u'yielded',
 u'spi',
 u'While',
 u'ones',
 u'hid',
 u'bracelets',
 u'Heber',
 u'Discern',
 u'savoury',
 u'Zepho',
 u'hil',
 u'having',
 u'firstborn',
 u'princes',
 u'devoured',
 u'Because',
 u'boug',
 u'Angel',
 u'countries',
 u'grap',
 u'begettest',
 u'fainted',
 u'sacrifices',
 u'hasted',
 u'avenged',
 u'Eshcol',
 u'physicians',
 u'Padanaram',
 u'journeys',
 u'Mezahab',
 u'Calah',
 u'slayeth',
 u'bak',
 u'generations',
 u'ears',
 u'waters',
 u'strengthened',
 u'sto',
 u'seas',
 u'Hittites',
 u'signs',
 u'Moab',
 u'Abide',
 u'Pass',
 u'observed',
 u'Sabtah',
 u'crieth',
 u'hastened',
 u'gutters',
 u'Esek',
 u'Tola',
 u'knees',
 u'Jabbok',
 u'lambs',
 u'That',
 u'Sichem',
 u'Hereby',
 u'tents',
 u'Forgive',
 u'Jac',
 u'colours',
 u'Jubal',
 u'dost',
 u'strakes',
 u'Egyptians',
 u'Cush',
 u'cru',
 u'beasts',
 u'gathered',
 u'chode',
 u'multiplied',
 u'Moriah',
 u'elders',
 u'Be',
 u'Ephah',
 u'fathe',
 u'loins',
 u'comi',
 u'Remain',
 u'His',
 u'Thus',
 u'windows',
 u'Chaldees',
 u'Thahash',
 u'cities',
 u'Cainan',
 u'Merari',
 u'horsemen',
 u'Hazarmaveth',
 u'Gatam',
 u'Haggi',
 u'Eliphaz',
 u'Give',
 u'Yea',
 u'damsels',
 u'Mishma',
 u'Blessed',
 u'rams',
 u'fulfilled',
 u'Bring',
 u'became',
 u'faces',
 u'hith',
 u'Sitnah',
 u'Zar',
 u'asked',
 u'Ye',
 u'Mesha',
 u'cakes',
 u'co',
 u'Succoth',
 u'appeared',
 u'deeds',
 u'Calneh',
 u'spake',
 u'Jezer',
 u'Lamech',
 u'Gera',
 u'mercies',
 u'Asshurim',
 u'kings',
 u'biteth',
 u'famished',
 u'Hamul',
 u'fema',
 u'Buz',
 u'Ajah',
 u'Edom',
 u'fountains',
 u'Get',
 u'Ishbak',
 u'But',
 u'.',
 u'purposing',
 u'poured',
 u'Moreh',
 u'En',
 u'Hushim',
 u'entered',
 u'Zebul',
 u'kine',
 u'oth',
 u'shoulders',
 u'Elbethel',
 u'families',
 u'Areli',
 u'Galeed',
 u'digged',
 u'blessings',
 u'Shed',
 u'Penuel',
 u'mocking',
 u'Whence',
 u'Shepho',
 u'lives',
 u'Haran',
 u'Night',
 u'Thorns',
 u'Accad',
 u'towns',
 u'doth',
 u'Nahath',
 u'sheepshearers',
 u'To',
 u'possessi',
 u'Beware',
 u'lovest',
 u'Two',
 u'Euphrates',
 u'Hitti',
 u'Cursed',
 u'Philistim',
 u'Salem',
 u'Beriah',
 u'Achbor',
 u'sle',
 u'Kittim',
 u'messengers',
 u'changed',
 u'Samlah',
 u'played',
 u'riv',
 u'ir',
 u'Almodad',
 u'Esau',
 u'rulers',
 u'Gather',
 u'heels',
 u'commanded',
 u'cometh',
 u'Nay',
 u'Tell',
 u'sheaves',
 u'Birsha',
 u'feebler',
 u'flocks',
 u'Elparan',
 u'Belah',
 u')',
 u'things',
 u'began',
 u'Spake',
 u'Lotan',
 u'parts',
 u'bands',
 u'handmaids',
 u'Tidal',
 u'daughters',
 u'Appoint',
 u'booths',
 u'Padan',
 u'Escape',
 u'Aran',
 u'endued',
 u'commended',
 u'Midian',
 u'persons',
 u'fowls',
 u'firstlings',
 u'Haste',
 u'Admah',
 u'Kiriathaim',
 u'traffick',
 u'subtil',
 u'colts',
 u'thoughts',
 u'kid',
 u'asketh',
 u'Adah',
 u'Sod',
 u'loveth',
 u'Slay',
 u'Enmishpat',
 u'remaineth',
 u'Is',
 u'Cheran',
 u'It',
 u'Phichol',
 u'Iscah',
 u'Jaalam',
 u'Manahath',
 u'hearkened',
 u'thi',
 u'In',
 u'Zarah',
 u'Perizzites',
 u'builded',
 u'discerned',
 u'bodies',
 u'She',
 u'Sarai',
 u'plagues',
 u'Gomorrah',
 u'badest',
 u'womenservan',
 u'gods',
 u'hands',
 u'numbering',
 u'daughte',
 u'laded',
 u'bakemeats',
 u'Ashteroth',
 u'Bilhah',
 u'Methusael',
 u'years',
 u'plains',
 u'meanest',
 u'Happy',
 u'servan',
 u'Kirjatharba',
 u'camels',
 u'hills',
 u'tim',
 u'hated',
 u'supplanted',
 u'Masrekah',
 u'Eri',
 u'ewes',
 u'Amal',
 u'nati',
 u'Naphtali',
 u'Surely',
 u'Phut',
 u'Arbah',
 u'By',
 u'boys',
 u'Cherubims',
 u'mountains',
 u'On',
 u'Din',
 u'mocked',
 u'increased',
 u'Oh',
 u'Thou',
 u'Of',
 u'Salah',
 u'fath',
 u'stooped',
 u'wives',
 u'awaked',
 u'Sidon',
 u'findeth',
 u'lads',
 u'aprons',
 u'Perizzit',
 u'Resen',
 u'emptied',
 u'droves',
 u'dreams',
 u'ruled',
 u'Bashemath',
 u'daught',
 u'Tiras',
 u'wiv',
 u'With',
 u'Chesed',
 u'baskets',
 u'lamentati',
 u'Ard',
 u'Are',
 u'served',
 u'Fear',
 u'Shalt',
 u'stretched',
 u'canst',
 u'numbered',
 u'burdens',
 u'Keturah',
 u'wombs',
 u'Phuvah',
 u'divineth',
 u'fathers',
 u'Ishmeelites',
 u'shew',
 u'reviv',
 u'vowedst',
 u'travailed',
 u'images',
 u'pillows',
 u'enemies',
 u'saidst',
 u'delivered',
 u'bones',
 u'Hast',
 u'Hemam',
 u'gard',
 u'Hirah',
 u'Ludim',
 u'sowed',
 u'Lift',
 u'Let',
 u'denied',
 u'earrings',
 u'handfuls',
 u'Fifteen',
 u'communed',
 u'He',
 u'goats',
 u'Heaven',
 u'intreated',
 u'Look',
 u'rul',
 u'Uz',
 u'Raamah',
 u'tribes',
 u'Arise',
 u'Bethel',
 u'Bow',
 u'Euphrat',
 u'Kenizzites',
 u'pla',
 u'Potiphar',
 u'weig',
 u';)',
 u'Naamah',
 u'Hebrews',
 u'gifts',
 u'sawest',
 u'Cast',
 u'ribs',
 u'months',
 u'oa',
 u'blessi',
 u'sakes',
 u'Gomer',
 u'Tubal',
 u'standest',
 ...}

Exercise 22)


In [62]:
from operator import itemgetter
print sent3[:-1]
print sorted(sent3[:-1], key=itemgetter(1))
print sorted(sent3[:-1], key=itemgetter(-1))


['In', 'the', 'beginning', 'God', 'created', 'the', 'heaven', 'and', 'the', 'earth']
['earth', 'beginning', 'heaven', 'the', 'the', 'the', 'In', 'and', 'God', 'created']
['God', 'created', 'and', 'the', 'the', 'the', 'beginning', 'earth', 'In', 'heaven']

In [63]:
help(itemgetter)


Help on class itemgetter in module operator:

class itemgetter(__builtin__.object)
 |  itemgetter(item, ...) --> itemgetter object
 |  
 |  Return a callable object that fetches the given item(s) from its operand.
 |  After f = itemgetter(2), the call f(r) returns r[2].
 |  After g = itemgetter(2, 5, 3), the call g(r) returns (r[2], r[5], r[3])
 |  
 |  Methods defined here:
 |  
 |  __call__(...)
 |      x.__call__(...) <==> x(...)
 |  
 |  __getattribute__(...)
 |      x.__getattribute__('name') <==> x.name
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __new__ = <built-in method __new__ of type object>
 |      T.__new__(S, ...) -> a new object with type S, a subtype of T


In [64]:
i = itemgetter(0)
print i('hallo')
print i(['hallo', 'welt'])


h
hallo

Exercise 23)


In [3]:
import nltk
def insert(trie, key, value):
    if key:
        first, rest = key[0], key[1:]
        if first not in trie:
            trie[first] = {}
        insert(trie[first], rest, value)
    else:
        trie['value'] = value
        
trie = nltk.defaultdict(dict)
insert(trie, 'chat', 'cat')
insert(trie, 'chien', 'dog')
insert(trie, 'chair', 'flesh')
trie['c']['h']['a']['t']['value']


Out[3]:
'cat'

In [4]:
import pprint
def lookup(trie, key):
    if len(key) == 0:
        if 'value' in trie:
            result = trie['value']
            return result
        elif (len(trie) == 1):
            keys = trie.keys()
            return lookup(trie[keys[0]], '')
        else:
            return 'no value found'
    else:
        if (key[0] in trie):
            return lookup(trie[key[0]], key[1:])
        else:
            return 'no value found'

print lookup(trie, 'ch')


no value found

Exercise 24)


In [1]:
# TODO

Exercise 25)


In [2]:
help(nltk.edit_distance)


Help on function edit_distance in module nltk.metrics.distance:

edit_distance(s1, s2, transpositions=False)
    Calculate the Levenshtein edit-distance between two strings.
    The edit distance is the number of characters that need to be
    substituted, inserted, or deleted, to transform s1 into s2.  For
    example, transforming "rain" to "shine" requires three steps,
    consisting of two substitutions and one insertion:
    "rain" -> "sain" -> "shin" -> "shine".  These operations could have
    been done in other orders, but at least three steps are needed.
    
    This also optionally allows transposition edits (e.g., "ab" -> "ba"),
    though this is disabled by default.
    
    :param s1, s2: The strings to be analysed
    :param transpositions: Whether to allow transposition edits
    :type s1: str
    :type s2: str
    :type transpositions: bool
    :rtype int


In [7]:
nltk.edit_distance('kitten', 'sitting', True)


Out[7]:
3

Exercise 26)


In [18]:
# a
def catalan_recursive(n):
    if (n == 0):
        return 1
    i = 0
    result = 0
    original_n = n
    while i < original_n:
        result += catalan_recursive(i) * catalan_recursive(n-1)
        n -= 1
        i += 1
    return result

catalan_recursive(6)


Out[18]:
132

In [15]:
# b
def catalan_dynamic(n, lookup={0:1}):
    result = 0
    if n == 0:
        return 1
    for i in range(n):
        if i not in lookup:
            lookup[i] = catalan_dynamic(i, lookup)
        if n-1 not in lookup:
            lookup[n-1] = catalan_dynamic(n-1, lookup)
        result += lookup[i] * lookup[n-1]    
        n -= 1
    return result

catalan_dynamic(6)


Out[15]:
132

In [35]:
# c
from timeit import Timer
t = Timer(lambda: catalan_recursive(10))
print t.timeit(number=10)
t = Timer(lambda: catalan_dynamic(10))
print t.timeit(number=10)


0.89065578542
0.000288574442266

Exercise 27)


In [2]:
# TODO

Exercise 28)


In [3]:
# TODO

Exercise 29)


In [5]:
import nltk
trie = nltk.defaultdict(dict)
insert(trie, 'chat', 'cat')
insert(trie, 'chien', 'dog')
insert(trie, 'chair', 'flesh')
insert(trie, 'chic', 'stylish')
trie['c']['h']['a']['t']['value']


Out[5]:
'cat'

In [6]:
def pprint_trie(trie, line=''):
    if 'value' in trie:
        print line + ': \'' + trie['value'] + '\''
        return
    for index, key in enumerate(sorted(trie.keys())):
        if (index == 0):
            pprint_trie(trie[key], line + key)
        else:
            pprint_trie(trie[key], ('-' * len(line)) + key)

pprint_trie(trie)


chair: 'flesh'
---t: 'cat'
--ic: 'stylish'
---en: 'dog'

Exercise 30)


In [15]:
def lookup_unique(key, trie, unique='', buffer_unique=''):
    if len(key) == 0:
        if len(buffer_unique) > 0:
            return buffer_unique
        else:  
            return unique
    if len(trie[key[0]]) == 1:
        if len(buffer_unique) > 0:
            new_buffer_unique = buffer_unique
        else:
            new_buffer_unique = unique + key[0]
        return lookup_unique(key[1:], trie[key[0]], unique + key[0], new_buffer_unique)
    return lookup_unique(key[1:], trie[key[0]], unique + key[0])
        

def compress(text):          
    trie = nltk.defaultdict(dict)
    for word in text:
        insert(trie, word, word)
    return [lookup_unique(w, trie) for w in text]

compressed = compress(text1)
from __future__ import division
print (100.0/len(''.join(text1))) * len(''.join(compressed))
print ' '.join(compressed[:200])


93.1709714487
[ Mob Dic by Herm Melv 1851 ] ETY . ( Suppl by a Late Consu Ush to a Gramm School ) The pale Ush -- threadb in coat , heart , body , and brain ; I see him now . He was ever dusti his old lexicons and gramm , with a queer handk , mockingl embellishe with all the gay flags of all the known nations of the world . He loved to dust his old gramm ; it someh mildl reminde him of his mortality . " While you take in hand to school others , and to teach them by what name a whale - fish is to be called in our tongue leaving out , through ignoranc , the letter H , which almo alone maket the significat of the word , you deliver that which is not true ." -- HAC " WHALE . ... Sw . and Dan . HVAL . This animal is named from roundn or rolling ; for in Dan . HVALT is arched or vaulte ." -- WE ' S DIC " WHALE . ... It is more immediatel from the Dut . and Ger . WALLEN ;

In [16]:
compressed = compress(sent3)
print (100.0/len(''.join(sent3))) * len(''.join(compressed))
print ' '.join(compressed)


24.4444444444
I t b G c t h a t e .

Exercise 31)


In [1]:
def load(fileName):
    f = open(fileName + '.txt')
    return f.read()
raw = load('corpus')
import textwrap
wrapped = textwrap.wrap(raw)
print wrapped[:10]


['\xef\xbb\xbfWeb-Based E-Assessment Beyond Multiple-Choice: The Application of', 'PHP- and HTML5 Technologies to Different Testing Formats', "Documentation  Master's Thesis in  Linguistics and Web Technology", 'presented to the Faculty of Foreign Languages and Cultures at the', 'Philipps-Universit\xc3\xa4t Marburg  by    Julia Neumann from Naumburg', '(Germany) Marburg, 2015 Contents         List of Abbreviations   3 1', 'Introduction    4 2       User Guide      5 3       Overall', 'Organization of the Code        7 3.1     General Design of the', 'JavaScript Components     9 4       Implementation of the Testing', 'Formats   11 4.1     Crossword       11 4.2     Dynamic Multiple-']

In [23]:
def justify(wrapped_text):
    line_length = max(len(line) for line in wrapped_text)
    for line in wrapped_text:
        words = line.split()
        num_chars = sum(len(word) for word in words)
        num_spaces = line_length - num_chars
        num_slots = len(words) - 1
        fixed_spaces = int(num_spaces / num_slots)
        spaces = 0
        for index, word in enumerate(words[:-1]):
            word += ' ' * fixed_spaces
            spaces += fixed_spaces
            words[index] = word
            
        while num_spaces - spaces > 0:
            remainder = (num_spaces - spaces) % num_slots
            chunk_size = int(len(words) / (remainder + 1))
            chunk = 0
            for index, word in enumerate(words[:-1]):
                if remainder and chunk == chunk_size:
                    word += ' '
                    spaces += 1
                    chunk = 0
                else:    
                    chunk += 1
                words[index] = word
            
        print ''.join(words)
        
justify(wrapped[:30])


Web-Based E-Assessment Beyond  Multiple-Choice: The Application  of
PHP-   and   HTML5   Technologies   to   Different   Testing   Formats
Documentation  Master's  Thesis  in  Linguistics  and  Web  Technology
presented to  the Faculty  of Foreign  Languages and  Cultures at  the
Philipps-Universität  Marburg   by  Julia    Neumann  from   Naumburg
(Germany)  Marburg,   2015  Contents   List  of   Abbreviations  3   1
Introduction     4     2      User     Guide      5      3     Overall
Organization   of   the   Code   7   3.1   General   Design   of   the
JavaScript   Components   9   4   Implementation    of   the   Testing
Formats    11    4.1    Crossword    11    4.2    Dynamic    Multiple-
Choice   13    4.3    Drag-and-Drop    15   5     Database   Structure
17   6    General   Features     19   6.1    Index   Page     19   6.2
Contact  Page   20  6.3    Color  Changer   20  6.4    Inline  Editing
and    Deletion    21    6.5    Exporting    Tests    22    References
25   Appendix    I:    Database    Structure   26     Declaration   of
Authorship       27       List       of       Abbreviations       AJAX
Asynchronous      JavaScript      and      XML      CSS      Cascading
Style     Sheets      DOM     Document      Object     Model      HTML
Hypertext     Markup     Language     JPEG      Joint     Photographic
Experts       Group       MVC       Model-View-Controller       MySQLi
MySQL    Improved    PHP     PHP:    Hypertext    Preprocessor     PNG
Portable     Network      Graphics      SQL       Structured     Query
Language      SVG       Scalable       Vector        Graphics      XML
Extensible  Markup   Language  1    Introduction  This   documentation
provides an overview of an application  developed for the creation and
management of web-based assessment  tasks in three different  formats.
The application consists  of a user-friendly  interface to a  database
structure for storing the created tests and  allows its users not only
to generate  new tests,   but also  to  edit,   delete, view,  and run
existing tests.  Thus, it  constitutes  a  tool that  can be   used by

Exercise 32)


In [7]:
import nltk
def summarize(text_sents, n):
    from operator import itemgetter
    freqDist = nltk.FreqDist([w.lower() for sent in text_sents for w in sent])
    scoresSents = [(sum(freqDist[word] for word in sent), index, sent) for (index, sent) in enumerate(text_sents)]
    sortByFreq = sorted(scoresSents, key=itemgetter(0), reverse=True)[:n]
    sortByIndex = sorted(sortByFreq, key=itemgetter(1))
    for (freq, index, sent) in sortByIndex:
        print index, ': ', sent, '\n'
    
from nltk.corpus import brown
summarize(brown.sents(categories='religion'), 10)


274 :  [u'``', u'So', u'that', u'the', u'man', u'should', u'not', u'have', u'thoughts', u'of', u'grandeur', u',', u'and', u'become', u'lifted', u'up', u',', u'as', u'if', u'he', u'had', u'no', u'lord', u',', u'because', u'of', u'the', u'dominion', u'that', u'had', u'been', u'given', u'to', u'him', u',', u'and', u'the', u'freedom', u',', u'fall', u'into', u'sin', u'against', u'God', u'his', u'Creator', u',', u'overstepping', u'his', u'bounds', u',', u'and', u'take', u'up', u'an', u'attitude', u'of', u'self-conceited', u'arrogance', u'towards', u'God', u',', u'a', u'law', u'was', u'given', u'him', u'by', u'God', u',', u'that', u'he', u'might', u'know', u'that', u'he', u'had', u'for', u'lord', u'the', u'lord', u'of', u'all', u'.'] 

304 :  [u'But', u'He', u'set', u'a', u'bound', u'to', u'his', u'(', u'state', u'of', u')', u'sin', u',', u'by', u'interposing', u'death', u',', u'and', u'thus', u'causing', u'sin', u'to', u'cease', u',', u'putting', u'an', u'end', u'to', u'it', u'by', u'the', u'dissolution', u'of', u'the', u'flesh', u',', u'which', u'should', u'take', u'place', u'in', u'the', u'earth', u',', u'so', u'that', u'man', u',', u'ceasing', u'at', u'length', u'to', u'live', u'in', u'sin', u',', u'and', u'dying', u'to', u'it', u',', u'might', u'live', u'to', u'God', u"''", u'.'] 

383 :  [u'What', u'otherwise', u'could', u'``', u'the', u'lawyer', u',', u'doctor', u',', u'minister', u',', u'the', u'men', u'of', u'science', u'and', u'letters', u"''", u'do', u'when', u'told', u'that', u'they', u'had', u'``', u'become', u'the', u'cherubim', u'and', u'seraphim', u'and', u'the', u'three', u'archangels', u'who', u'stood', u'before', u'the', u'golden', u'throne', u'of', u'the', u'merchant', u',', u'and', u'continually', u'cried', u',', u"'", u'Holy', u',', u'holy', u',', u'holy', u'is', u'the', u'Almighty', u'Dollar', u"'", u'``', u'?', u'?'] 

401 :  [u'We', u'have', u'not', u'the', u'leisure', u',', u'or', u'the', u'patience', u',', u'or', u'the', u'skill', u',', u'to', u'comprehend', u'what', u'was', u'working', u'in', u'the', u'mind', u'and', u'heart', u'of', u'a', u'then', u'recent', u'graduate', u'from', u'the', u'Harvard', u'Divinity', u'School', u'who', u'would', u'muster', u'the', u'audacity', u'to', u'contradict', u'his', u'most', u'formidable', u'instructor', u',', u'the', u'majesterial', u'Andrews', u'Norton', u',', u'by', u'saying', u'that', u',', u'while', u'he', u'believed', u'Jesus', u'``', u'like', u'other', u'religious', u'teachers', u"''", u',', u'worked', u'miracles', u',', u'``', u'I', u'see', u'not', u'how', u'a', u'miracle', u'proves', u'a', u'doctrine', u"''", u'.'] 

406 :  [u'At', u'one', u'time', u'I', u'became', u'disturbed', u'in', u'the', u'faith', u'in', u'which', u'I', u'had', u'grown', u'up', u'by', u'the', u'apparent', u'inroads', u'being', u'made', u'upon', u'both', u'Old', u'and', u'New', u'Testaments', u'by', u'a', u'``', u'Higher', u'Criticism', u"''", u'of', u'the', u'Bible', u',', u'to', u'refute', u'which', u'I', u'felt', u'the', u'need', u'of', u'a', u'better', u'knowledge', u'of', u'Hebrew', u'and', u'of', u'archaeology', u',', u'for', u'it', u'seemed', u'to', u'me', u'that', u'to', u'pull', u'out', u'some', u'of', u'the', u'props', u'of', u'our', u'faith', u'was', u'to', u'weaken', u'the', u'entire', u'structure', u'.'] 

417 :  [u'The', u'outcome', u'of', u'such', u'an', u'experiment', u'has', u'been', u'in', u'due', u'time', u'the', u'acceptance', u'of', u'the', u'Bible', u'as', u'the', u'Word', u'of', u'God', u'inspired', u'in', u'a', u'sense', u'utterly', u'different', u'from', u'any', u'merely', u'human', u'book', u',', u'and', u'with', u'it', u'the', u'acceptance', u'of', u'our', u'Lord', u'Jesus', u'Christ', u'as', u'the', u'only', u'begotten', u'Son', u'of', u'God', u',', u'Son', u'of', u'Man', u'by', u'the', u'Virgin', u'Mary', u',', u'the', u'Saviour', u'of', u'the', u'world', u'.'] 

418 :  [u'I', u'believe', u',', u'therefore', u',', u'that', u'we', u'are', u'without', u'exception', u'sinners', u',', u'by', u'nature', u'alienated', u'from', u'God', u',', u'and', u'that', u'Jesus', u'Christ', u',', u'the', u'Son', u'of', u'God', u',', u'came', u'to', u'earth', u',', u'the', u'representative', u'Head', u'of', u'a', u'new', u'race', u',', u'to', u'die', u'upon', u'the', u'cross', u'and', u'pay', u'the', u'penalty', u'of', u'the', u'sin', u'of', u'the', u'world', u',', u'and', u'that', u'he', u'who', u'thus', u'receives', u'Christ', u'as', u'his', u'personal', u'Saviour', u'is', u'``', u'born', u'again', u"''", u'spiritually', u',', u'with', u'new', u'privileges', u',', u'appetites', u',', u'and', u'affections', u',', u'destined', u'to', u'live', u'and', u'grow', u'in', u'His', u'likeness', u'forever', u'.'] 

657 :  [u'Although', u'the', u'primary', u'mathematical', u'properties', u'of', u'the', u'middle', u'number', u'at', u'the', u'center', u'of', u'the', u'Lo', u'Shu', u',', u'and', u'the', u'interrelation', u'of', u'all', u'the', u'other', u'numbers', u'to', u'it', u',', u'might', u'seem', u'enough', u'to', u'account', u'for', u'the', u'deep', u'fascination', u'which', u'the', u'Lo', u'Shu', u'held', u'for', u'the', u'Old', u'Chinese', u'philosophers', u',', u'this', u'was', u'actually', u'only', u'a', u'beginning', u'of', u'wonders', u'.'] 

964 :  [u'Presumably', u',', u'if', u'the', u'reverse', u'is', u'the', u'case', u'and', u'the', u'good', u'effect', u'is', u'more', u'certain', u'than', u'the', u'evil', u'result', u'that', u'may', u'be', u'forthcoming', u',', u'not', u'only', u'must', u'the', u'good', u'and', u'the', u'evil', u'be', u'prudentially', u'weighed', u'and', u'found', u'proportionate', u',', u'but', u'also', u'calculation', u'of', u'the', u'probabilities', u'and', u'of', u'the', u'degree', u'of', u'certainty', u'or', u'uncertainty', u'in', u'the', u'good', u'or', u'evil', u'effect', u'must', u'be', u'taken', u'into', u'account', u'.'] 

1258 :  [u'We', u'should', u'recall', u'the', u'number', u'of', u'movements', u'for', u'the', u'service', u'of', u'mankind', u'which', u'arose', u'from', u'the', u'kindred', u'Evangelicalism', u'of', u'the', u'British', u'Isles', u'and', u'the', u'Pietism', u'of', u'the', u'Continent', u'of', u'Europe', u'--', u'among', u'them', u'prison', u'reform', u',', u'anti-slavery', u'measures', u',', u'legislation', u'for', u'the', u'alleviation', u'of', u'conditions', u'of', u'labour', u',', u'the', u'Inner', u'Mission', u',', u'and', u'the', u'Red', u'Cross', u'.'] 

Exercise 33)


In [8]:
# TODO

Exercise 34)


In [9]:
# TODO

Exercise 35)


In [10]:
# TODO

Exercise 36)


In [30]:
def word_square(n):
    # works only if n < 5, with 5 exceeds maximum recursion callstack
    # TODO: Do this iteratively to avoid the callstack issue?
    from nltk.corpus import words
    myWords = [word.upper() for word in filter(lambda w: len(w) == n, words.words())] # get all words of length n
    
    square = []
    skipWords = [[] for i in range(n)] # cache for words that have already been tested at position i
    
    def check_against_square(word): # checks if current state of square would allow to add word to it
        if word in square:
            return False
        for (index, square_word) in enumerate(square):
            if (word[index] != square_word[len(square)]):
                return False
        return True
    
    def add_word(): # recursively adds / removes words from square until solution is found
        if len(square) == n:
            return True
        for word in myWords:
            if len(square) == n:
                return True
            if (word not in skipWords[len(square)]) and check_against_square(word): # add the word to square if it hasn't been tested unsuccessfully already and if it fits 
                square.append(word)
                add_word()
        if len(square) != n and len(square) != 0:   
            skipWords[len(square) - 1].append(square.pop()) # add word to cache
            for i in range(len(square) + 1, n): # reset the following parts of the cache
                skipWords[i] = []
            add_word()
        return False
            
        
    if add_word():
        for word in square:
            print word
    else:
        print 'No square found :/'
            
word_square(4)


AANI
ABAC
NACE
ICED

In [31]:
word_square(3)


AAL
ABA
LAB