In [21]:
import os
import glob
import discoursegraphs as dg

In [22]:
ANAPHORICITY_DIR = os.path.expanduser('/home/arne/repos/pcc-annis-merged/maz176/anaphora/kobold/')

In [23]:
anaphoricity_files = glob.glob(os.path.join(ANAPHORICITY_DIR, '*.txt'))

In [24]:
anaphoricity_files[0]


Out[24]:
'/home/arne/repos/pcc-annis-merged/maz176/anaphora/kobold/maz-12576.txt'

In [25]:
adg = dg.read_anaphoricity(anaphoricity_files[0])

In [26]:
list(dg.select_nodes_by_layer(adg, adg.ns+':annotated'))


Out[26]:
[30, 52, 107, 185]

In [27]:
adg.node[60]


Out[27]:
{'anaphoricity:token': u'daf\xfcr',
 'label': u'daf\xfcr',
 'layers': {'anaphoricity', 'anaphoricity:token'}}

In [27]:


In [28]:
# from discoursegraphs.readwrite.anaphoricity import ANNOTATIONS

# def gen_anaphoricity_str(docgraph, anaphora='es'):
#     assert anaphora in ('das', 'es')
#     ret_str = u''
# #     annotated_token_ids = list(dg.select_nodes_by_layer(docgraph, docgraph.ns+':annotated'))
#     annotated_token_ids = [tok_id for tok_id in dg.select_nodes_by_layer(docgraph, docgraph.ns+':annotated')
#                            if docgraph.get_token(tok_id).lower() == anaphora]
#     for token_id in docgraph.tokens:
#         if token_id in annotated_token_ids:
#             certainty_str = '' if docgraph.ns+':certainty' == '1.0' else '?'
#             ret_str += u'{}/{}{} '.format(
#                 docgraph.get_token(token_id),
#                 ANNOTATIONS[docgraph.node[token_id][docgraph.ns+':annotation']],
#                 certainty_str)
#         else:
#             ret_str += u'{} '.format(docgraph.get_token(token_id))
#     return ret_str

In [29]:
# print gen_anaphoricity_str(adg)

In [31]:
for anaphoricity_file in anaphoricity_files:
    adg = dg.read_anaphoricity(anaphoricity_file)
    fname = os.path.basename(anaphoricity_file)
    dg.readwrite.anaphoricity.write_anaphoricity(adg,
                                                 os.path.join(ANAPHORICITY_DIR, 'das', fname),
                                                 anaphora='das')
    dg.readwrite.anaphoricity.write_anaphoricity(adg,
                                             os.path.join(ANAPHORICITY_DIR, 'es', fname),
                                             anaphora='es')

In [30]: