Dependency Grammar in NLTK

(C) 2019 by Damir Cavar

Based on the NLTK HOWTO Dependency.

We load the DependencyGrammar module from NLTK Grammar:


In [1]:
from nltk.grammar import DependencyGrammar

We can load different Dependency Grammar parsers from NLTK:


In [2]:
from nltk.parse import (
    DependencyGraph,
    ProjectiveDependencyParser,
    NonprojectiveDependencyParser,
)

In [3]:
treebank_data = """Pierre  NNP     2       NMOD
Vinken  NNP     8       SUB
,       ,       2       P
61      CD      5       NMOD
years   NNS     6       AMOD
old     JJ      2       NMOD
,       ,       2       P
will    MD      0       ROOT
join    VB      8       VC
the     DT      11      NMOD
board   NN      9       OBJ
as      IN      9       VMOD
a       DT      15      NMOD
nonexecutive    JJ      15      NMOD
director        NN      12      PMOD
Nov.    NNP     9       VMOD
29      CD      16      NMOD
.       .       9       VMOD
"""

In [4]:
dg = DependencyGraph(treebank_data)

In [5]:
dg.tree().pprint()


(will
  (Vinken Pierre , (old (years 61)) ,)
  (join (board the) (as (director a nonexecutive)) (Nov. 29) .))

In [6]:
for head, rel, dep in dg.triples():
    print(
        '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'
        .format(h=head, r=rel, d=dep)
    )


(will, MD), SUB, (Vinken, NNP)
(Vinken, NNP), NMOD, (Pierre, NNP)
(Vinken, NNP), P, (,, ,)
(Vinken, NNP), NMOD, (old, JJ)
(old, JJ), AMOD, (years, NNS)
(years, NNS), NMOD, (61, CD)
(Vinken, NNP), P, (,, ,)
(will, MD), VC, (join, VB)
(join, VB), OBJ, (board, NN)
(board, NN), NMOD, (the, DT)
(join, VB), VMOD, (as, IN)
(as, IN), PMOD, (director, NN)
(director, NN), NMOD, (a, DT)
(director, NN), NMOD, (nonexecutive, JJ)
(join, VB), VMOD, (Nov., NNP)
(Nov., NNP), NMOD, (29, CD)
(join, VB), VMOD, (., .)

Dependency Version of the Penn Treebank


In [7]:
from nltk.corpus import dependency_treebank

In [8]:
t = dependency_treebank.parsed_sents()[0]

In [9]:
print(t.to_conll(3))  # doctest: +NORMALIZE_WHITESPACE


Pierre	NNP	2
Vinken	NNP	8
,	,	2
61	CD	5
years	NNS	6
old	JJ	2
,	,	2
will	MD	0
join	VB	8
the	DT	11
board	NN	9
as	IN	9
a	DT	15
nonexecutive	JJ	15
director	NN	12
Nov.	NNP	9
29	CD	16
.	.	8

"Using the output of zpar (like Malt-TAB but with zero-based indexing)":


In [10]:
zpar_data = """
Pierre  NNP     1       NMOD
Vinken  NNP     7       SUB
,       ,       1       P
61      CD      4       NMOD
years   NNS     5       AMOD
old     JJ      1       NMOD
,       ,       1       P
will    MD      -1      ROOT
join    VB      7       VC
the     DT      10      NMOD
board   NN      8       OBJ
as      IN      8       VMOD
a       DT      14      NMOD
nonexecutive    JJ      14      NMOD
director        NN      11      PMOD
Nov.    NNP     8       VMOD
29      CD      15      NMOD
.       .       7       P
"""

In [11]:
zdg = DependencyGraph(zpar_data, zero_based=True)

In [12]:
print(zdg.tree())


(will
  (Vinken Pierre , (old (years 61)) ,)
  (join (board the) (as (director a nonexecutive)) (Nov. 29))
  .)

Projective Dependency Parsing


In [13]:
grammar = DependencyGrammar.fromstring("""
'fell' -> 'price' | 'stock'
'price' -> 'of' 'the'
'of' -> 'stock'
'stock' -> 'the'
""")

In [14]:
print(grammar)


Dependency grammar with 5 productions
  'fell' -> 'price'
  'fell' -> 'stock'
  'price' -> 'of' 'the'
  'of' -> 'stock'
  'stock' -> 'the'

In [15]:
dp = ProjectiveDependencyParser(grammar)

In [16]:
for t in sorted(dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])):
    print(t)


(fell (price the (of (stock the))))
(fell (price the of) (stock the))
(fell (price the of the) stock)

Non-Projective Dependency Parsing


In [17]:
grammar = DependencyGrammar.fromstring("""
'taught' -> 'play' | 'man'
'man' -> 'the'
'play' -> 'golf' | 'dog' | 'to'
'dog' -> 'his'
""")

In [18]:
print(grammar)


Dependency grammar with 7 productions
  'taught' -> 'play'
  'taught' -> 'man'
  'man' -> 'the'
  'play' -> 'golf'
  'play' -> 'dog'
  'play' -> 'to'
  'dog' -> 'his'

In [19]:
dp = NonprojectiveDependencyParser(grammar)

In [20]:
g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])

In [21]:
print(g.root['word'])


taught

In [22]:
print(g)


defaultdict(<function DependencyGraph.__init__.<locals>.<lambda> at 0x0000026AC1D89798>,
            {0: {'address': 0,
                 'ctag': 'TOP',
                 'deps': defaultdict(<class 'list'>, {'ROOT': [3]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': 'TOP',
                 'word': None},
             1: {'address': 1,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'the'},
             2: {'address': 2,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [1]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'man'},
             3: {'address': 3,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [2, 7]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'taught'},
             4: {'address': 4,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'his'},
             5: {'address': 5,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [4]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'dog'},
             6: {'address': 6,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'to'},
             7: {'address': 7,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [5, 6, 8]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'play'},
             8: {'address': 8,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'golf'}})

In [23]:
x = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])
for i in x:
    print(i)


defaultdict(<function DependencyGraph.__init__.<locals>.<lambda> at 0x0000026AC1D89B88>,
            {0: {'address': 0,
                 'ctag': 'TOP',
                 'deps': defaultdict(<class 'list'>, {'ROOT': [3]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': 'TOP',
                 'word': None},
             1: {'address': 1,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'the'},
             2: {'address': 2,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [1]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'man'},
             3: {'address': 3,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [2, 7]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'taught'},
             4: {'address': 4,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'his'},
             5: {'address': 5,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [4]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'dog'},
             6: {'address': 6,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'to'},
             7: {'address': 7,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {'': [5, 6, 8]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'play'},
             8: {'address': 8,
                 'ctag': None,
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': None,
                 'word': 'golf'}})

In [24]:
for _, node in sorted(g.nodes.items()):
    if node['word'] is not None:
        print('{address} {word}: {d}'.format(d=node['deps'][''], **node))


1 the: []
2 man: [1]
3 taught: [2, 7]
4 his: []
5 dog: [4]
6 to: []
7 play: [5, 6, 8]
8 golf: []

In [25]:
print(g.tree())


(taught (man the) (play (dog his) to golf))

In [ ]: