Homework: Competitive Grammar Writing


In [1]:
from pcfg_parse_gen import Pcfg, PcfgGenerator, CkyParse
import nltk

def print_tree(tree_string):
    tree_string = tree_string.strip()
    tree = nltk.Tree.fromstring(tree_string)
    tree.pretty_print()

def draw_tree(tree_string):
    tree_string = tree_string.strip()
    tree = nltk.Tree.fromstring(tree_string)
    tree.draw()

Parsing sentences with your grammar

While you are developing your grammar you should parse with your grammar both example sentences and samples from your and other grammars.


In [2]:
parse_gram = Pcfg(["S1.gr","S2.gr","Vocab.gr"])


#reading grammar file: S1.gr
#reading grammar file: S2.gr
#reading grammar file: Vocab.gr

In [3]:
parser = CkyParse(parse_gram, beamsize=0.00001)
ce, trees = parser.parse_sentences(["Arthur is the king ."])
print("-cross entropy: {}".format(ce))
for tree_string in trees:
    print_tree(tree_string)


(TOP (S1 (NP (Proper Arthur) ) (_VP (VP (VerbT is) (NP (Det the) (Nbar (Noun king) ))) (Punc .))) )
-cross entropy: -3.7863679540999504
             TOP              
              |                
              S1              
   ___________|___             
  |              _VP          
  |            ___|________    
  |           VP           |  
  |       ____|___         |   
  |      |        NP       |  
  |      |     ___|___     |   
  NP     |    |      Nbar  |  
  |      |    |       |    |   
Proper VerbT Det     Noun Punc
  |      |    |       |    |   
Arthur   is  the     king  .  

#parsing: ['Arthur', 'is', 'the', 'king', '.']
#-cross entropy (bits/word): -3.78637

In [4]:
ce, trees = parser.parse_sentences(["five strangers are at the Round Table ."])
print("-cross entropy: {}".format(ce))
for tree_string in trees:
    print_tree(tree_string)


(TOP (S2 (_Misc (Misc five) (_Misc (Misc strangers) (_Misc (Misc are) (_Prep (Prep at) (_Det (Det the) (_Misc (Misc (_Round Round) (_Table Table)) (_Misc (Misc .) ))))))) ) )
-cross entropy: -9.807330330570931
        TOP                                             
         |                                               
         S2                                             
         |                                               
       _Misc                                            
  _______|_______                                        
 |             _Misc                                    
 |        _______|__________                             
 |       |                _Misc                         
 |       |        __________|_____                       
 |       |       |              _Prep                   
 |       |       |     ___________|_____                 
 |       |       |    |                _Det             
 |       |       |    |      ___________|_____           
 |       |       |    |     |               _Misc       
 |       |       |    |     |            _____|______    
 |       |       |    |     |          Misc        _Misc
 |       |       |    |     |      _____|_____       |   
Misc    Misc    Misc Prep  Det  _Round      _Table  Misc
 |       |       |    |     |     |           |      |   
five strangers  are   at   the  Round       Table    .  

#parsing: ['five', 'strangers', 'are', 'at', 'the', 'Round', 'Table', '.']
#-cross entropy (bits/word): -9.80733

Use parse_file to parse a file of sentences.


In [5]:
ce, trees = parser.parse_file('example_sentences.txt')
print("-cross entropy: {}".format(ce))


(TOP (S1 (NP (Proper Arthur) ) (_VP (VP (VerbT is) (NP (Det the) (Nbar (Noun king) ))) (Punc .))) )
(TOP (S1 (NP (Proper Arthur) ) (_VP (VP (VerbT rides) (NP (Det the) (Nbar (Nbar (Noun horse) ) (PP (Prep near) (NP (Det the) (Nbar (Noun castle) )))))) (Punc .))) )
(TOP (S2 (_Misc (Misc riding) (_Misc (Misc to) (_Misc (Misc Camelot) (_VerbT (VerbT is) (_Misc (Misc hard) (_Misc (Misc .) )))))) ) )
(TOP (S2 (_Misc (Misc do) (_Misc (Misc coconuts) (_Misc (Misc speak) (_Misc (Misc ?) )))) ) )
(TOP (S2 (_Misc (Misc what) (_Misc (Misc does) (_Proper (Proper Arthur) (_Misc (Misc ride) (_Misc (Misc ?) ))))) ) )
(TOP (S2 (_Misc (Misc who) (_Misc (Misc does) (_Proper (Proper Arthur) (_Misc (Misc suggest) (_Misc (Misc she) (_Misc (Misc carry) (_Misc (Misc ?) ))))))) ) )
(TOP (S2 (_Misc (Misc why) (_Misc (Misc does) (_Misc (Misc England) (_Misc (Misc have) (_Det (Det a) (_Noun (Noun king) (_Misc (Misc ?) ))))))) ) )
(TOP (S2 (_Misc (Misc are) (_Misc (Misc they) (_Misc (Misc suggesting) (_Proper (Proper Arthur) (_Misc (Misc ride) (_Misc (Misc to) (_Misc (Misc Camelot) (_Misc (Misc ?) )))))))) ) )
(TOP (S2 (_Misc (Misc five) (_Misc (Misc strangers) (_Misc (Misc are) (_Prep (Prep at) (_Det (Det the) (_Misc (Misc (_Round Round) (_Table Table)) (_Misc (Misc .) ))))))) ) )
(TOP (S2 (_Proper (Proper Guinevere) (_Misc (Misc might) (_Misc (Misc have) (_Misc (Misc known) (_Misc (Misc .) ))))) ) )
(TOP (S2 (_Proper (Proper Guinevere) (_Misc (Misc should) (_Misc (Misc be) (_Misc (Misc riding) (_Prep (Prep with) (_Proper (Proper Patsy) (_Misc (Misc .) ))))))) ) )
(TOP (S2 (_Misc (Misc it) (_VerbT (VerbT is) (_Proper (Proper (_Sir Sir) (_Lancelot Lancelot)) (_Misc (Misc who) (_Misc (Misc knows) (_Proper (Proper Zoot) (_Misc (Misc !) ))))))) ) )
(TOP (S2 (_Misc (Misc either) (_Proper (Proper Arthur) (_Misc (Misc knows) (_Misc (Misc or) (_Proper (Proper Patsy) (_Misc (Misc does) (_Misc (Misc .) ))))))) ) )
(TOP (S2 (_Misc (Misc neither) (_Proper (Proper (_Sir Sir) (_Lancelot Lancelot)) (_Misc (Misc nor) (_Proper (Proper Guinevere) (_Misc (Misc will) (_Misc (Misc speak) (_Prep (Prep of) (_Misc (Misc it) (_Misc (Misc .) ))))))))) ) )
(TOP (S2 (_Det (Det the) (_Misc (Misc (_Holy Holy) (_Grail Grail)) (_Misc (Misc was) (_Misc (Misc covered) (_Prep (Prep by) (_Det (Det a) (_Misc (Misc yellow) (_Noun (Noun fruit) (_Misc (Misc .) ))))))))) ) )
(TOP (S2 (_Proper (Proper Zoot) (_Misc (Misc might) (_Misc (Misc have) (_Misc (Misc been) (_Misc (Misc carried) (_Prep (Prep by) (_Det (Det a) (_Noun (Noun swallow) (_Misc (Misc .) ))))))))) ) )
(TOP (S2 (_Proper (Proper Arthur) (_Misc (Misc rode) (_Misc (Misc to) (_Misc (Misc Camelot) (_Misc (Misc and) (_Misc (Misc drank) (_Prep (Prep from) (_Misc (Misc his) (_Noun (Noun chalice) (_Misc (Misc .) )))))))))) ) )
(TOP (S2 (_Misc (Misc they) (_Misc (Misc migrate) (_Misc (Misc precisely) (_Misc (Misc because) (_Misc (Misc they) (_Misc (Misc know) (_Misc (Misc they) (_Misc (Misc will) (_Misc (Misc grow) (_Misc (Misc .) )))))))))) ) )
(TOP (S2 (_Misc (Misc do) (_Misc (Misc not) (_Misc (Misc speak) (_Misc (Misc !) )))) ) )
(TOP (S2 (_Proper (Proper Arthur) (_Misc (Misc will) (_Misc (Misc have) (_Misc (Misc been) (_Misc (Misc riding) (_Prep (Prep for) (_Misc (Misc eight) (_Misc (Misc nights) (_Misc (Misc .) ))))))))) ) )
(TOP (S2 (_Proper (Proper Arthur) (_Misc (Misc ,) (_Misc (Misc sixty) (_Misc (Misc inches) (_Misc (Misc ,) (_VerbT (VerbT is) (_Det (Det a) (_Misc (Misc tiny) (_Noun (Noun king) (_Misc (Misc .) )))))))))) ) )
(TOP (S2 (_Proper (Proper Arthur) (_Misc (Misc knows) (_Proper (Proper Patsy) (_Misc (Misc ,) (_Det (Det the) (_Misc (Misc trusty) (_Noun (Noun servant) (_Misc (Misc .) )))))))) ) )
(TOP (S2 (_Proper (Proper Arthur) (_Misc (Misc and) (_Proper (Proper Guinevere) (_Misc (Misc migrate) (_Misc (Misc frequently) (_Misc (Misc .) )))))) ) )
(TOP (S2 (_Misc (Misc he) (_Misc (Misc knows) (_Misc (Misc what) (_Misc (Misc they) (_Misc (Misc are) (_Misc (Misc covering) (_Prep (Prep with) (_Det (Det that) (_Noun (Noun story) (_Misc (Misc .) )))))))))) ) )
(TOP (S2 (_Proper (Proper Arthur) (_Misc (Misc suggested) (_Det (Det that) (_Det (Det the) (_Noun (Noun castle) (_Misc (Misc be) (_Misc (Misc carried) (_Misc (Misc .) )))))))) ) )
(TOP (S2 (_Det (Det the) (_Noun (Noun king) (_Misc (Misc drank) (_Misc (Misc to) (_Det (Det the) (_Noun (Noun castle) (_Det (Det that) (_Misc (Misc was) (_Misc (Misc his) (_Noun (Noun home) (_Misc (Misc .) ))))))))))) ) )
(TOP (S2 (_Misc (Misc when) (_Det (Det the) (_Noun (Noun king) (_VerbT (VerbT drinks) (_Misc (Misc ,) (_Proper (Proper Patsy) (_VerbT (VerbT drinks) (_Misc (Misc .) )))))))) ) )
-cross entropy: -10.276152770660259
#parsing: ['Arthur', 'is', 'the', 'king', '.']
#parsing: ['Arthur', 'rides', 'the', 'horse', 'near', 'the', 'castle', '.']
#parsing: ['riding', 'to', 'Camelot', 'is', 'hard', '.']
#parsing: ['do', 'coconuts', 'speak', '?']
#parsing: ['what', 'does', 'Arthur', 'ride', '?']
#parsing: ['who', 'does', 'Arthur', 'suggest', 'she', 'carry', '?']
#parsing: ['why', 'does', 'England', 'have', 'a', 'king', '?']
#parsing: ['are', 'they', 'suggesting', 'Arthur', 'ride', 'to', 'Camelot', '?']
#parsing: ['five', 'strangers', 'are', 'at', 'the', 'Round', 'Table', '.']
#parsing: ['Guinevere', 'might', 'have', 'known', '.']
#parsing: ['Guinevere', 'should', 'be', 'riding', 'with', 'Patsy', '.']
#parsing: ['it', 'is', 'Sir', 'Lancelot', 'who', 'knows', 'Zoot', '!']
#parsing: ['either', 'Arthur', 'knows', 'or', 'Patsy', 'does', '.']
#parsing: ['neither', 'Sir', 'Lancelot', 'nor', 'Guinevere', 'will', 'speak', 'of', 'it', '.']
#parsing: ['the', 'Holy', 'Grail', 'was', 'covered', 'by', 'a', 'yellow', 'fruit', '.']
#parsing: ['Zoot', 'might', 'have', 'been', 'carried', 'by', 'a', 'swallow', '.']
#parsing: ['Arthur', 'rode', 'to', 'Camelot', 'and', 'drank', 'from', 'his', 'chalice', '.']
#parsing: ['they', 'migrate', 'precisely', 'because', 'they', 'know', 'they', 'will', 'grow', '.']
#parsing: ['do', 'not', 'speak', '!']
#parsing: ['Arthur', 'will', 'have', 'been', 'riding', 'for', 'eight', 'nights', '.']
#parsing: ['Arthur', ',', 'sixty', 'inches', ',', 'is', 'a', 'tiny', 'king', '.']
#parsing: ['Arthur', 'knows', 'Patsy', ',', 'the', 'trusty', 'servant', '.']
#parsing: ['Arthur', 'and', 'Guinevere', 'migrate', 'frequently', '.']
#parsing: ['he', 'knows', 'what', 'they', 'are', 'covering', 'with', 'that', 'story', '.']
#parsing: ['Arthur', 'suggested', 'that', 'the', 'castle', 'be', 'carried', '.']
#parsing: ['the', 'king', 'drank', 'to', 'the', 'castle', 'that', 'was', 'his', 'home', '.']
#parsing: ['when', 'the', 'king', 'drinks', ',', 'Patsy', 'drinks', '.']
#-cross entropy (bits/word): -10.2762

Generating sentences with your grammar

While you are developing your grammar you should generate sentences with your grammar to check what your grammar is doing. Try to write your grammars to that it will generate hard to parse sentences.


In [6]:
gen_gram = Pcfg(["S1.gr","Vocab.gr"])


#reading grammar file: S1.gr
#reading grammar file: Vocab.gr

In [7]:
gen = PcfgGenerator(gen_gram)
for _ in range(20):
    print(" ".join(gen.generate()))


every land carries no chalice .
any weight has the winter .
this husk is Sir Lancelot .
no sun covers the master
a weight above another story carries a sovereign .
another weight has this swallow
this sovereign has this land
any chalice drinks no weight
that fruit is every sun .
another master is a husk .
every horse drinks each swallow
every swallow has every story .
any servant rides another pound
the king carries any coconut through a corner .
Sir Knight drinks this sovereign
each fruit covers each coconut
this horse carries any defeater .
another home carries a sun
every swallow rides a corner
another corner carries this master near every defeater