In [21]:
import os
import glob
import discoursegraphs as dg
In [22]:
ANAPHORICITY_DIR = os.path.expanduser('/home/arne/repos/pcc-annis-merged/maz176/anaphora/kobold/')
In [23]:
anaphoricity_files = glob.glob(os.path.join(ANAPHORICITY_DIR, '*.txt'))
In [24]:
anaphoricity_files[0]
Out[24]:
In [25]:
adg = dg.read_anaphoricity(anaphoricity_files[0])
In [26]:
list(dg.select_nodes_by_layer(adg, adg.ns+':annotated'))
Out[26]:
In [27]:
adg.node[60]
Out[27]:
In [27]:
In [28]:
# from discoursegraphs.readwrite.anaphoricity import ANNOTATIONS
# def gen_anaphoricity_str(docgraph, anaphora='es'):
# assert anaphora in ('das', 'es')
# ret_str = u''
# # annotated_token_ids = list(dg.select_nodes_by_layer(docgraph, docgraph.ns+':annotated'))
# annotated_token_ids = [tok_id for tok_id in dg.select_nodes_by_layer(docgraph, docgraph.ns+':annotated')
# if docgraph.get_token(tok_id).lower() == anaphora]
# for token_id in docgraph.tokens:
# if token_id in annotated_token_ids:
# certainty_str = '' if docgraph.ns+':certainty' == '1.0' else '?'
# ret_str += u'{}/{}{} '.format(
# docgraph.get_token(token_id),
# ANNOTATIONS[docgraph.node[token_id][docgraph.ns+':annotation']],
# certainty_str)
# else:
# ret_str += u'{} '.format(docgraph.get_token(token_id))
# return ret_str
In [29]:
# print gen_anaphoricity_str(adg)
In [31]:
for anaphoricity_file in anaphoricity_files:
adg = dg.read_anaphoricity(anaphoricity_file)
fname = os.path.basename(anaphoricity_file)
dg.readwrite.anaphoricity.write_anaphoricity(adg,
os.path.join(ANAPHORICITY_DIR, 'das', fname),
anaphora='das')
dg.readwrite.anaphoricity.write_anaphoricity(adg,
os.path.join(ANAPHORICITY_DIR, 'es', fname),
anaphora='es')
In [30]: