In [1]:
from __future__ import print_function, division
import sys
import string
import random
In [8]:
def process_file(filename, order=2):
"""Reads a file and performs Markov analysis.
filename: string
order: integer number of words in the prefix
returns: map from prefix to list of possible suffixes.
"""
for line in open(filename):
for word in line.rstrip().split():
process_word(word, order)
In [9]:
def process_word(word, order=2):
"""Processes each word.
word: string
order: integer
During the first few iterations, all we do is store up the words;
after that we start adding entries to the dictionary.
"""
global prefix
if len(prefix) < order:
prefix += (word,)
return
try:
suffix_map[prefix].append(word)
except KeyError:
# if there is no entry for this prefix, make one
suffix_map[prefix] = [word]
prefix = shift(prefix, word)
In [16]:
def random_text(n=100, start=None):
"""Generates random wordsfrom the analyzed text.
Starts with a random prefix from the dictionary.
n: number of words to generate
"""
# choose a random prefix (not weighted by frequency)
if start is None:
start = random.choice(list(suffix_map.keys()))
for i in range(n):
suffixes = suffix_map.get(start, None)
if suffixes == None:
# if the start isn't in map, we got to the end of the
# original text, so we have to start again.
random_text(n-i)
return
# choose a random suffix
word = random.choice(suffixes)
print(word, end=' ')
start = shift(start, word)
In [17]:
def shift(t, word):
"""Forms a new tuple by removing the head and adding word to the tail.
t: tuple of strings
word: string
Returns: tuple of strings
"""
return t[1:] + (word,)
In [18]:
suffix_map = {} # map from prefixes to a list of suffixes
prefix = () # current tuple of words
In [19]:
process_file('corpus2.txt')
In [22]:
random_text()
In [15]:
random_text()
In [21]:
random_text()
In [23]:
random_text()
In [24]:
random_text()
In [25]:
random_text()
In [26]:
random_text()
In [27]:
random_text()
In [28]:
random_text()
In [29]:
random_text()
In [ ]: