This is a task using edges. In the Hebrew data source, nodes corresponding to constituents may have labelled edges pointing to other constituents. We make an inventory of all such edges labeled mother
.
In [1]:
import sys
import collections
from laf.fabric import LafFabric
processor = LafFabric(verbose='DETAIL')
In [2]:
API = processor.load('etcbc4', '--', 'mother',
{
"xmlids": {"node": False, "edge": False},
"features": ("otype book", "mother"),
})
In [3]:
F = API['F']
C = API['C']
NN = API['NN']
msg = API['msg']
infile = API['infile']
outfile = API['outfile']
my_file = API['my_file']
msg("Get the mothers...")
out = outfile("mothers.tsv")
bookname = None
found = 0
mother_kind = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
for i in NN():
otype = F.otype.v(i)
for mother in C.mother.v(i):
found += 1
motype = F.otype.v(mother)
mother_kind[otype][motype] += 1
out.write("{}\t{}\t{}\t{}\n".format(otype, i, motype, mother))
if otype == "book":
if bookname: sys.stderr.write("{} ({})\n".format(bookname, found))
bookname = F.book.v(i)
sys.stderr.write("{} ({})\n".format(bookname, found))
sys.stderr.write("Total {}\n".format(found))
for source_type in sorted(mother_kind):
for target_type in sorted(mother_kind[source_type]):
print("{:<15} ==mother==> {:<15} : {:>10} x".format(source_type, target_type, mother_kind[source_type][target_type]))
API['close']()
In [ ]: