This is a task using edges. In the Hebrew data source, nodes corresponding to constituents may have labelled edges pointing to other constituents. We make an inventory of all such edges labeled mother.
In [1]:
import sys
import collections
from laf.fabric import LafFabric
processor = LafFabric(verbose='DETAIL')
In [2]:
API = processor.load('etcbc4', '--', 'mother',
{
"xmlids": {"node": False, "edge": False},
"features": ("otype book", "mother"),
})
In [3]:
F = API['F']
C = API['C']
NN = API['NN']
msg = API['msg']
infile = API['infile']
outfile = API['outfile']
my_file = API['my_file']
msg("Get the mothers...")
out = outfile("mothers.tsv")
bookname = None
found = 0
mother_kind = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
for i in NN():
otype = F.otype.v(i)
for mother in C.mother.v(i):
found += 1
motype = F.otype.v(mother)
mother_kind[otype][motype] += 1
out.write("{}\t{}\t{}\t{}\n".format(otype, i, motype, mother))
if otype == "book":
if bookname: sys.stderr.write("{} ({})\n".format(bookname, found))
bookname = F.book.v(i)
sys.stderr.write("{} ({})\n".format(bookname, found))
sys.stderr.write("Total {}\n".format(found))
for source_type in sorted(mother_kind):
for target_type in sorted(mother_kind[source_type]):
print("{:<15} ==mother==> {:<15} : {:>10} x".format(source_type, target_type, mother_kind[source_type][target_type]))
API['close']()
In [ ]: