In [1]:
from cfgen import GrammarModel
# can turn these off:
%load_ext autoreload
%autoreload 2
# %autoreload 0
Initialize model
In [2]:
CORPUS_PATH = './resources/corpora/frankenstein.txt'
MODEL_ORDER = 2
my_model = GrammarModel(CORPUS_PATH, MODEL_ORDER)
Make a random sentence with fixed grammar
In [3]:
from random import seed
seed(5)
simple_sentence = 'The cow jumped over the moon, and the little dog laughed.'
for ii in range(3):
print(my_model.make_sentence(fixed_grammar=True, sample_sentence=simple_sentence))
Compare to a CFG without Markov
In [4]:
from random import seed
seed(5)
simple_sentence = 'The cow jumped over the moon, and the little dog laughed.'
for ii in range(3):
print(my_model.make_sentence(fixed_grammar=True, sample_sentence=simple_sentence, do_markov=False))
In [5]:
from random import seed
seed(5)
for ii in range(2):
print(my_model.make_sentence(do_markov=False))
In [9]:
from random import seed
seed(5)
for ii in range(2):
print(my_model.make_sentence(do_markov=True))
In [12]:
for ii in range(2):
print(my_model.make_sentence_markov(10))
Need to put these into setup.py:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
Select a random sentence from the corpus
Issue: rare symbols that appear and cause problems when random sentences are used
In [ ]: