In [1]:
    
from cfgen import GrammarModel
# can turn these off:
%load_ext autoreload
%autoreload 2
# %autoreload 0
    
    
Initialize model
In [2]:
    
CORPUS_PATH = './resources/corpora/frankenstein.txt'
MODEL_ORDER = 2
my_model = GrammarModel(CORPUS_PATH, MODEL_ORDER)
    
Make a random sentence with fixed grammar
In [3]:
    
from random import seed
seed(5)
simple_sentence = 'The cow jumped over the moon, and the little dog laughed.'
for ii in range(3):
    print(my_model.make_sentence(fixed_grammar=True, sample_sentence=simple_sentence))
    
    
Compare to a CFG without Markov
In [4]:
    
from random import seed
seed(5)
simple_sentence = 'The cow jumped over the moon, and the little dog laughed.'
for ii in range(3):
    print(my_model.make_sentence(fixed_grammar=True, sample_sentence=simple_sentence, do_markov=False))
    
    
In [5]:
    
from random import seed
seed(5)
for ii in range(2):
    print(my_model.make_sentence(do_markov=False))
    
    
In [9]:
    
from random import seed
seed(5)
for ii in range(2):
    print(my_model.make_sentence(do_markov=True))
    
    
In [12]:
    
for ii in range(2):
    print(my_model.make_sentence_markov(10))
    
    
Need to put these into setup.py:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
Select a random sentence from the corpus
Issue: rare symbols that appear and cause problems when random sentences are used
In [ ]: