In [1]:
import os

from pocores import Pocores, print_coreference_report
from discoursegraphs.readwrite.mmax2 import spanstring2text, spanstring2tokens
from discoursegraphs.readwrite import ConllDocumentGraph, MMAXDocumentGraph
from discoursegraphs import get_pointing_chains, print_dot

DOC_ID = 'maz-11299'

cdg = ConllDocumentGraph(os.path.expanduser('~/repos/pocores/src/pocores/test/maz176/{}.conll2009'.format(DOC_ID)))
mdg = MMAXDocumentGraph(os.path.expanduser('~/repos/pcc-annis-merged/maz176/coreference/{}.mmax'.format(DOC_ID)))

In [2]:
# %load_ext gvmagic
# %dotstr print_dot(cdg)
# %dotstr print_dot(mdg)

In [3]:
p = Pocores(cdg)
p.resolve_anaphora()
p.add_coreference_chains_to_docgraph()

In [4]:
get_pointing_chains(p.document)


Out[4]:
[['s13_t7', 's3_t8'],
 ['s16_t13', 's5_t5'],
 ['s11_t8', 's4_t3'],
 ['s16_t10', 's16_t9'],
 ['s8_t46', 's8_t27'],
 ['s3_t1', 's2_t7'],
 ['s10_t7', 's1_t1'],
 ['s8_t1', 's6_t13'],
 ['s15_t6',
  's15_t5',
  's14_t5',
  's11_t5',
  's10_t2',
  's9_t6',
  's9_t5',
  's8_t34',
  's6_t5',
  's3_t3',
  's2_t2'],
 ['s13_t3', 's9_t2', 's8_t13']]

In [5]:
get_pointing_chains(mdg)


Out[5]:
[['markable_100011', 'markable_3'],
 ['markable_18', 'markable_6'],
 ['markable_1000205', 'markable_1000200', 'markable_1000211'],
 ['markable_70',
  'markable_1000132',
  'markable_1000122',
  'markable_23',
  'markable_3'],
 ['markable_70', 'markable_1000132', 'markable_1000207', 'markable_1000208'],
 ['markable_70',
  'markable_1000131',
  'markable_1000122',
  'markable_23',
  'markable_3'],
 ['markable_70', 'markable_1000131', 'markable_1000207', 'markable_1000208'],
 ['markable_10', 'markable_1000206', 'markable_10004'],
 ['markable_1000184',
  'markable_1000149',
  'markable_63',
  'markable_1000132',
  'markable_1000122',
  'markable_23',
  'markable_3'],
 ['markable_1000184',
  'markable_1000149',
  'markable_63',
  'markable_1000132',
  'markable_1000207',
  'markable_1000208'],
 ['markable_1000184',
  'markable_1000149',
  'markable_63',
  'markable_1000131',
  'markable_1000122',
  'markable_23',
  'markable_3'],
 ['markable_1000184',
  'markable_1000149',
  'markable_63',
  'markable_1000131',
  'markable_1000207',
  'markable_1000208'],
 ['markable_24', 'markable_7'],
 ['markable_93', 'markable_16']]

FIXME: MMAXDocumentGraph contains each token_id twice in .tokens and .get_tokens()


In [6]:
import discoursegraphs as dg

# mdg_tokens = list(mdg.get_tokens())
# for i, (tid, tok) in enumerate(p.document.get_tokens()):
#     print i, tid, tok, mdg_tokens[i][1]

In [7]:
oldtoks = list(p.document.get_tokens())
newtoks = list(mdg.get_tokens())

In [8]:
len(oldtoks), len(newtoks)


Out[8]:
(212, 212)

In [9]:
for sent_id in mdg.sentences:
    print sent_id,


markable_5000 markable_5001 markable_5002 markable_5003 markable_5004 markable_5005 markable_5006 markable_5007 markable_5008 markable_5009 markable_50010 markable_50011 markable_50012 markable_50013 markable_50014 markable_50015 markable_50016

In [10]:
len(mdg.tokens)


Out[10]:
212

In [11]:
for tok_id in mdg.tokens:
    print tok_id,


word_1 word_2 word_3 word_4 word_5 word_6 word_7 word_8 word_9 word_10 word_11 word_12 word_13 word_14 word_15 word_16 word_17 word_18 word_19 word_20 word_21 word_22 word_23 word_24 word_25 word_26 word_27 word_28 word_29 word_30 word_31 word_32 word_33 word_34 word_35 word_36 word_37 word_38 word_39 word_40 word_41 word_42 word_43 word_44 word_45 word_46 word_47 word_48 word_49 word_50 word_51 word_52 word_53 word_54 word_55 word_56 word_57 word_58 word_59 word_60 word_61 word_62 word_63 word_64 word_65 word_66 word_67 word_68 word_69 word_70 word_71 word_72 word_73 word_74 word_75 word_76 word_77 word_78 word_79 word_80 word_81 word_82 word_83 word_84 word_85 word_86 word_87 word_88 word_89 word_90 word_91 word_92 word_93 word_94 word_95 word_96 word_97 word_98 word_99 word_100 word_101 word_102 word_103 word_104 word_105 word_106 word_107 word_108 word_109 word_110 word_111 word_112 word_113 word_114 word_115 word_116 word_117 word_118 word_119 word_120 word_121 word_122 word_123 word_124 word_125 word_126 word_127 word_128 word_129 word_130 word_131 word_132 word_133 word_134 word_135 word_136 word_137 word_138 word_139 word_140 word_141 word_142 word_143 word_144 word_145 word_146 word_147 word_148 word_149 word_150 word_151 word_152 word_153 word_154 word_155 word_156 word_157 word_158 word_159 word_160 word_161 word_162 word_163 word_164 word_165 word_166 word_167 word_168 word_169 word_170 word_171 word_172 word_173 word_174 word_175 word_176 word_177 word_178 word_179 word_180 word_181 word_182 word_183 word_184 word_185 word_186 word_187 word_188 word_189 word_190 word_191 word_192 word_193 word_194 word_195 word_196 word_197 word_198 word_199 word_200 word_201 word_202 word_203 word_204 word_205 word_206 word_207 word_208 word_209 word_210 word_211 word_212

In [12]:
from discoursegraphs import create_token_mapping

pocores2mmax = create_token_mapping(p.document, mdg)
mmax2pocores = create_token_mapping(mdg, p.document)

In [13]:
mdg.merge_graphs(p.document)

In [14]:
get_pointing_chains(mdg)


Out[14]:
[['word_166', 'word_129', 'word_90'],
 ['word_143', 'word_1'],
 ['word_10', 'word_8'],
 ['word_201', 'word_200'],
 ['word_78', 'word_60'],
 ['markable_18', 'markable_6'],
 ['markable_10', 'markable_1000206', 'markable_10004'],
 ['markable_70',
  'markable_1000132',
  'markable_1000122',
  'markable_23',
  'markable_3'],
 ['markable_70', 'markable_1000132', 'markable_1000207', 'markable_1000208'],
 ['markable_70',
  'markable_1000131',
  'markable_1000122',
  'markable_23',
  'markable_3'],
 ['markable_70', 'markable_1000131', 'markable_1000207', 'markable_1000208'],
 ['markable_93', 'markable_16'],
 ['markable_100011', 'markable_3'],
 ['word_170', 'word_17'],
 ['markable_1000205', 'markable_1000200', 'markable_1000211'],
 ['word_153', 'word_23'],
 ['word_186',
  'word_185',
  'word_178',
  'word_150',
  'word_138',
  'word_133',
  'word_132',
  'word_111',
  'word_52',
  'word_12',
  'word_3'],
 ['word_204', 'word_37'],
 ['markable_1000184',
  'markable_1000149',
  'markable_63',
  'markable_1000132',
  'markable_1000122',
  'markable_23',
  'markable_3'],
 ['markable_1000184',
  'markable_1000149',
  'markable_63',
  'markable_1000132',
  'markable_1000207',
  'markable_1000208'],
 ['markable_1000184',
  'markable_1000149',
  'markable_63',
  'markable_1000131',
  'markable_1000122',
  'markable_23',
  'markable_3'],
 ['markable_1000184',
  'markable_1000149',
  'markable_63',
  'markable_1000131',
  'markable_1000207',
  'markable_1000208'],
 ['markable_24', 'markable_7'],
 ['word_123', 'word_104']]

In [15]:
get_pointing_chains(mdg, layer='mmax')


Out[15]:
[['markable_100011', 'markable_3'],
 ['markable_18', 'markable_6'],
 ['markable_1000205', 'markable_1000200', 'markable_1000211'],
 ['markable_70',
  'markable_1000132',
  'markable_1000122',
  'markable_23',
  'markable_3'],
 ['markable_70', 'markable_1000132', 'markable_1000207', 'markable_1000208'],
 ['markable_70',
  'markable_1000131',
  'markable_1000122',
  'markable_23',
  'markable_3'],
 ['markable_70', 'markable_1000131', 'markable_1000207', 'markable_1000208'],
 ['markable_10', 'markable_1000206', 'markable_10004'],
 ['markable_1000184',
  'markable_1000149',
  'markable_63',
  'markable_1000132',
  'markable_1000122',
  'markable_23',
  'markable_3'],
 ['markable_1000184',
  'markable_1000149',
  'markable_63',
  'markable_1000132',
  'markable_1000207',
  'markable_1000208'],
 ['markable_1000184',
  'markable_1000149',
  'markable_63',
  'markable_1000131',
  'markable_1000122',
  'markable_23',
  'markable_3'],
 ['markable_1000184',
  'markable_1000149',
  'markable_63',
  'markable_1000131',
  'markable_1000207',
  'markable_1000208'],
 ['markable_24', 'markable_7'],
 ['markable_93', 'markable_16']]

In [16]:
get_pointing_chains(mdg, layer='pocores')


Out[16]:
[['word_170', 'word_17'],
 ['word_143', 'word_1'],
 ['word_78', 'word_60'],
 ['word_153', 'word_23'],
 ['word_186',
  'word_185',
  'word_178',
  'word_150',
  'word_138',
  'word_133',
  'word_132',
  'word_111',
  'word_52',
  'word_12',
  'word_3'],
 ['word_201', 'word_200'],
 ['word_10', 'word_8'],
 ['word_123', 'word_104'],
 ['word_166', 'word_129', 'word_90'],
 ['word_204', 'word_37']]

In [17]:
for chain in get_pointing_chains(mdg, layer='pocores'):
    print list(p._get_wordlist(chain, verbose=True))


[(u'Runde', 'word_170'), (u'Runde', 'word_17')]
[(u'Feigenblatt', 'word_143'), (u'Feigenblatt', 'word_1')]
[(u'Es', 'word_78'), (u'Wort', 'word_60')]
[(u'Politiker', 'word_153'), (u'Politiker', 'word_23')]
[(u'sich', 'word_186'), (u'sie', 'word_185'), (u'Jugendliche', 'word_178'), (u'ihnen', 'word_150'), (u'Jugendlichen', 'word_138'), (u'ihnen', 'word_133'), (u'sie', 'word_132'), (u'Jugendliche', 'word_111'), (u'Jugendlichen', 'word_52'), (u'sie', 'word_12'), (u'Jugendlichen', 'word_3')]
[(u'ihren', 'word_201'), (u'Gewerbeverein', 'word_200')]
[(u'Das', 'word_10'), (u'Musikcaf\xe9', 'word_8')]
[(u'sie', 'word_123'), (u'SPD', 'word_104')]
[(u'es', 'word_166'), (u'das', 'word_129'), (u'Andrae', 'word_90')]
[(u'Rathaus', 'word_204'), (u'Rathaus', 'word_37')]

In [18]:
for chain in get_pointing_chains(mdg, layer='mmax'):
    print [(spanstring2text(mdg,mdg.node[node_id]['mmax:span']), node_id) for node_id in chain]


[(u'sie', 'markable_100011'), (u'Die Jugendlichen in Zossen', 'markable_3')]
[(u'Mit dem Treffen im Rathaus', 'markable_18'), (u'bei der ersten Zossener Runde', 'markable_6')]
[(u'Die', 'markable_1000205'), (u'ihren', 'markable_1000200'), (u'Vertreter von PDS und Gewerbeverein', 'markable_1000211')]
[(u'die Politiker', 'markable_70'), (u'ihnen', 'markable_1000132'), (u'sie', 'markable_1000122'), (u'die beiden geladenen Jugendlichen', 'markable_23'), (u'Die Jugendlichen in Zossen', 'markable_3')]
[(u'die Politiker', 'markable_70'), (u'ihnen', 'markable_1000132'), (u'Karola Andrae ( B\xfcrgerb\xfcndnis/FDP ) , Susanne Michler ( CDU ) und Joachim Zanow ( SPD', 'markable_1000207'), (u'drei Erwachsene', 'markable_1000208')]
[(u'die Politiker', 'markable_70'), (u'sie', 'markable_1000131'), (u'sie', 'markable_1000122'), (u'die beiden geladenen Jugendlichen', 'markable_23'), (u'Die Jugendlichen in Zossen', 'markable_3')]
[(u'die Politiker', 'markable_70'), (u'sie', 'markable_1000131'), (u'Karola Andrae ( B\xfcrgerb\xfcndnis/FDP ) , Susanne Michler ( CDU ) und Joachim Zanow ( SPD', 'markable_1000207'), (u'drei Erwachsene', 'markable_1000208')]
[(u'der Stadt', 'markable_10'), (u'Zossener', 'markable_1000206'), (u'in Zossen', 'markable_10004')]
[(u'sie', 'markable_1000184'), (u'mit ihnen', 'markable_1000149'), (u'Die Jugendlichen', 'markable_63'), (u'ihnen', 'markable_1000132'), (u'sie', 'markable_1000122'), (u'die beiden geladenen Jugendlichen', 'markable_23'), (u'Die Jugendlichen in Zossen', 'markable_3')]
[(u'sie', 'markable_1000184'), (u'mit ihnen', 'markable_1000149'), (u'Die Jugendlichen', 'markable_63'), (u'ihnen', 'markable_1000132'), (u'Karola Andrae ( B\xfcrgerb\xfcndnis/FDP ) , Susanne Michler ( CDU ) und Joachim Zanow ( SPD', 'markable_1000207'), (u'drei Erwachsene', 'markable_1000208')]
[(u'sie', 'markable_1000184'), (u'mit ihnen', 'markable_1000149'), (u'Die Jugendlichen', 'markable_63'), (u'sie', 'markable_1000131'), (u'sie', 'markable_1000122'), (u'die beiden geladenen Jugendlichen', 'markable_23'), (u'Die Jugendlichen in Zossen', 'markable_3')]
[(u'sie', 'markable_1000184'), (u'mit ihnen', 'markable_1000149'), (u'Die Jugendlichen', 'markable_63'), (u'sie', 'markable_1000131'), (u'Karola Andrae ( B\xfcrgerb\xfcndnis/FDP ) , Susanne Michler ( CDU ) und Joachim Zanow ( SPD', 'markable_1000207'), (u'drei Erwachsene', 'markable_1000208')]
[(u'des Abends', 'markable_24'), (u'am Dienstagabend', 'markable_7')]
[(u'ins Rathaus', 'markable_93'), (u'im Rathaus', 'markable_16')]

In [19]:
pocores_chains = get_pointing_chains(mdg, layer='pocores')
mmax_chains = get_pointing_chains(mdg, layer='mmax')

for chain in mmax_chains:
    for node_id in chain:
        span_str = mdg.node[node_id]['mmax:span']
        markable_was_found = any([token_id in chain
                               for token_id in spanstring2tokens(span_str)
                               for chain in pocores_chains])
        
        # at least one word in this markable was found by pocores
        markable_str = spanstring2text(mdg, span_str)
#         print markable_str, markable_was_found
        if not markable_was_found:
            markable_was_considered = any(mmax2pocores[mmax_token_id] in p.candidate_report
                                          for mmax_token_id in spanstring2tokens(span_str))
            if not markable_was_considered:
                print ("No word from the markable '{}' ({} - {}) "
                       "was even considered as an anaphora".format(markable_str, node_id, span_str))
            else:
                for mmax_token_id in spanstring2tokens(span_str):
                    if mmax2pocores[mmax_token_id] in p.candidate_report:
                        report = p.candidate_report[mmax2pocores[mmax_token_id]]
                        if report['anaphora_type'] == 'pronominal':
                            print "Candidate was falsely rejected."
                            print p.candidate_report[mmax2pocores[mmax_token_id]]
        
    print



No word from the markable 'Die' (markable_1000205 - word_206) was even considered as an anaphora





No word from the markable 'Zossener' (markable_1000206 - word_16) was even considered as an anaphora








In [20]:
for ana in p.candidate_report:
    if p.candidate_report[ana]['anaphora_type'] == 'pronominal':
        print ana, p.candidate_report[ana]


s9_t5 defaultdict(<type 'str'>, {'anaphora_type': 'pronominal', 'filtered_candidates': ['s5_t13', 's6_t5', 's8_t10', 's8_t34'], 'filter_results': OrderedDict([('distance', (['s5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46', 's9_t2'], 'Candidates mentioned no more than 4 sentences ago')), ('non_reflexive', (['s5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46', 's9_t2'], ", which also don't represent reflexive personal pronouns, e.g. sich, einander, dich, mir")), ('agreement', (['s5_t13', 's6_t5', 's8_t10', 's8_t34'], ', which are in morphological agreement with the anaphora')), ('binding', (['s5_t13', 's6_t5', 's8_t10', 's8_t34'], 'and which can be bound by the anaphora'))]), 'candidates': ['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1', 's3_t3', 's3_t8', 's3_t10', 's4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46', 's9_t2']})
s13_t3 defaultdict(<type 'str'>, {'anaphora_type': 'pronominal', 'filtered_candidates': ['s9_t2'], 'filter_results': OrderedDict([('distance', (['s9_t2', 's9_t5', 's9_t6', 's10_t2', 's10_t7', 's11_t5', 's11_t8', 's12_t5'], 'Candidates mentioned no more than 4 sentences ago')), ('non_reflexive', (['s9_t2', 's9_t5', 's9_t6', 's10_t2', 's10_t7', 's11_t5', 's11_t8', 's12_t5'], ", which also don't represent reflexive personal pronouns, e.g. sich, einander, dich, mir")), ('agreement', (['s9_t2'], ', which are in morphological agreement with the anaphora')), ('binding', (['s9_t2'], 'and which can be bound by the anaphora'))]), 'candidates': ['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1', 's3_t3', 's3_t8', 's3_t10', 's4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46', 's9_t2', 's9_t5', 's9_t6', 's10_t2', 's10_t7', 's11_t5', 's11_t8', 's12_t5']})
s9_t2 defaultdict(<type 'str'>, {'anaphora_type': 'pronominal', 'filtered_candidates': ['s5_t3', 's5_t5', 's6_t13', 's8_t1', 's8_t13'], 'filter_results': OrderedDict([('distance', (['s5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46'], 'Candidates mentioned no more than 4 sentences ago')), ('non_reflexive', (['s5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46'], ", which also don't represent reflexive personal pronouns, e.g. sich, einander, dich, mir")), ('agreement', (['s5_t3', 's5_t5', 's6_t13', 's8_t1', 's8_t13'], ', which are in morphological agreement with the anaphora')), ('binding', (['s5_t3', 's5_t5', 's6_t13', 's8_t1', 's8_t13'], 'and which can be bound by the anaphora'))]), 'candidates': ['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1', 's3_t3', 's3_t8', 's3_t10', 's4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46']})
s11_t5 defaultdict(<type 'str'>, {'anaphora_type': 'pronominal', 'filtered_candidates': ['s8_t10', 's8_t34', 's9_t5', 's9_t6', 's10_t2'], 'filter_results': OrderedDict([('distance', (['s8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46', 's9_t2', 's9_t5', 's9_t6', 's10_t2', 's10_t7'], 'Candidates mentioned no more than 4 sentences ago')), ('non_reflexive', (['s8_t1', 's8_t5', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46', 's9_t2', 's9_t5', 's9_t6', 's10_t2', 's10_t7'], ", which also don't represent reflexive personal pronouns, e.g. sich, einander, dich, mir")), ('agreement', (['s8_t10', 's8_t34', 's9_t5', 's9_t6', 's10_t2'], ', which are in morphological agreement with the anaphora')), ('binding', (['s8_t10', 's8_t34', 's9_t5', 's9_t6', 's10_t2'], 'and which can be bound by the anaphora'))]), 'candidates': ['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1', 's3_t3', 's3_t8', 's3_t10', 's4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46', 's9_t2', 's9_t5', 's9_t6', 's10_t2', 's10_t7']})
s16_t10 defaultdict(<type 'str'>, {'anaphora_type': 'pronominal', 'filtered_candidates': ['s13_t8', 's16_t9'], 'filter_results': OrderedDict([('distance', (['s12_t5', 's13_t3', 's13_t7', 's13_t8', 's14_t5', 's15_t3', 's15_t5', 's15_t6', 's15_t9', 's16_t5', 's16_t7', 's16_t9'], 'Candidates mentioned no more than 4 sentences ago')), ('non_reflexive', (['s12_t5', 's13_t3', 's13_t7', 's13_t8', 's14_t5', 's15_t3', 's15_t5', 's15_t9', 's16_t5', 's16_t7', 's16_t9'], ", which also don't represent reflexive personal pronouns, e.g. sich, einander, dich, mir")), ('agreement', (['s13_t8', 's16_t9'], ', which are in morphological agreement with the anaphora')), ('binding', (['s13_t8', 's16_t9'], 'and which can be bound by the anaphora'))]), 'candidates': ['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1', 's3_t3', 's3_t8', 's3_t10', 's4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46', 's9_t2', 's9_t5', 's9_t6', 's10_t2', 's10_t7', 's11_t5', 's11_t8', 's12_t5', 's13_t3', 's13_t7', 's13_t8', 's14_t5', 's15_t3', 's15_t5', 's15_t6', 's15_t9', 's16_t5', 's16_t7', 's16_t9']})
s8_t46 defaultdict(<type 'str'>, {'anaphora_type': 'pronominal', 'filtered_candidates': ['s4_t5', 's6_t20', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t27'], 'filter_results': OrderedDict([('distance', (['s4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42'], 'Candidates mentioned no more than 4 sentences ago')), ('non_reflexive', (['s4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42'], ", which also don't represent reflexive personal pronouns, e.g. sich, einander, dich, mir")), ('agreement', (['s4_t5', 's6_t20', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t27', 's8_t42'], ', which are in morphological agreement with the anaphora')), ('binding', (['s4_t5', 's6_t20', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t27'], 'and which can be bound by the anaphora'))]), 'candidates': ['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1', 's3_t3', 's3_t8', 's3_t10', 's4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42']})
s8_t1 defaultdict(<type 'str'>, {'anaphora_type': 'pronominal', 'filtered_candidates': ['s4_t7', 's5_t3', 's5_t5', 's6_t13'], 'filter_results': OrderedDict([('distance', (['s4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20'], 'Candidates mentioned no more than 4 sentences ago')), ('non_reflexive', (['s4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20'], ", which also don't represent reflexive personal pronouns, e.g. sich, einander, dich, mir")), ('agreement', (['s4_t7', 's5_t3', 's5_t5', 's6_t13'], ', which are in morphological agreement with the anaphora')), ('binding', (['s4_t7', 's5_t3', 's5_t5', 's6_t13'], 'and which can be bound by the anaphora'))]), 'candidates': ['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1', 's3_t3', 's3_t8', 's3_t10', 's4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20']})
s15_t6 defaultdict(<type 'str'>, {'anaphora_type': 'pronominal', 'filtered_candidates': ['s15_t5'], 'filter_results': OrderedDict([('distance', (['s11_t5', 's11_t8', 's12_t5', 's13_t3', 's13_t7', 's13_t8', 's14_t5', 's15_t3', 's15_t5'], 'Candidates mentioned no more than 4 sentences ago')), ('non_reflexive', (['s11_t5', 's11_t8', 's12_t5', 's13_t3', 's13_t7', 's13_t8', 's14_t5', 's15_t3', 's15_t5'], ", which also don't represent reflexive personal pronouns, e.g. sich, einander, dich, mir")), ('agreement', (['s11_t5', 's11_t8', 's14_t5', 's15_t5'], ', which are in morphological agreement with the anaphora')), ('binding', (['s15_t5'], 'and which can be bound by the anaphora'))]), 'candidates': ['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1', 's3_t3', 's3_t8', 's3_t10', 's4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46', 's9_t2', 's9_t5', 's9_t6', 's10_t2', 's10_t7', 's11_t5', 's11_t8', 's12_t5', 's13_t3', 's13_t7', 's13_t8', 's14_t5', 's15_t3', 's15_t5']})
s8_t8 defaultdict(<type 'str'>, {'anaphora_type': 'pronominal', 'filtered_candidates': [], 'filter_results': OrderedDict([('distance', (['s4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5'], 'Candidates mentioned no more than 4 sentences ago')), ('non_reflexive', (['s4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5'], ", which also don't represent reflexive personal pronouns, e.g. sich, einander, dich, mir")), ('agreement', (['s4_t3', 's5_t13', 's6_t5'], ', which are in morphological agreement with the anaphora')), ('binding', ([], 'and which can be bound by the anaphora'))]), 'candidates': ['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1', 's3_t3', 's3_t8', 's3_t10', 's4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5']})
s15_t5 defaultdict(<type 'str'>, {'anaphora_type': 'pronominal', 'filtered_candidates': ['s11_t5', 's11_t8', 's14_t5'], 'filter_results': OrderedDict([('distance', (['s11_t5', 's11_t8', 's12_t5', 's13_t3', 's13_t7', 's13_t8', 's14_t5', 's15_t3'], 'Candidates mentioned no more than 4 sentences ago')), ('non_reflexive', (['s11_t5', 's11_t8', 's12_t5', 's13_t3', 's13_t7', 's13_t8', 's14_t5', 's15_t3'], ", which also don't represent reflexive personal pronouns, e.g. sich, einander, dich, mir")), ('agreement', (['s11_t5', 's11_t8', 's14_t5'], ', which are in morphological agreement with the anaphora')), ('binding', (['s11_t5', 's11_t8', 's14_t5'], 'and which can be bound by the anaphora'))]), 'candidates': ['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1', 's3_t3', 's3_t8', 's3_t10', 's4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46', 's9_t2', 's9_t5', 's9_t6', 's10_t2', 's10_t7', 's11_t5', 's11_t8', 's12_t5', 's13_t3', 's13_t7', 's13_t8', 's14_t5', 's15_t3']})
s9_t6 defaultdict(<type 'str'>, {'anaphora_type': 'pronominal', 'filtered_candidates': ['s8_t10', 's8_t34', 's9_t5'], 'filter_results': OrderedDict([('distance', (['s5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46', 's9_t2', 's9_t5'], 'Candidates mentioned no more than 4 sentences ago')), ('non_reflexive', (['s5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46', 's9_t2', 's9_t5'], ", which also don't represent reflexive personal pronouns, e.g. sich, einander, dich, mir")), ('agreement', (['s8_t10', 's8_t34', 's9_t5'], ', which are in morphological agreement with the anaphora')), ('binding', (['s8_t10', 's8_t34', 's9_t5'], 'and which can be bound by the anaphora'))]), 'candidates': ['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1', 's3_t3', 's3_t8', 's3_t10', 's4_t3', 's4_t5', 's4_t7', 's5_t3', 's5_t5', 's5_t10', 's5_t13', 's6_t5', 's6_t7', 's6_t9', 's6_t13', 's6_t20', 's8_t1', 's8_t5', 's8_t8', 's8_t10', 's8_t13', 's8_t15', 's8_t19', 's8_t21', 's8_t25', 's8_t27', 's8_t34', 's8_t42', 's8_t46', 's9_t2', 's9_t5']})
s3_t1 defaultdict(<type 'str'>, {'anaphora_type': 'pronominal', 'filtered_candidates': ['s1_t1', 's2_t4', 's2_t7'], 'filter_results': OrderedDict([('distance', (['s1_t1', 's2_t2', 's2_t4', 's2_t7'], 'Candidates mentioned no more than 4 sentences ago')), ('non_reflexive', (['s1_t1', 's2_t2', 's2_t4', 's2_t7'], ", which also don't represent reflexive personal pronouns, e.g. sich, einander, dich, mir")), ('agreement', (['s1_t1', 's2_t4', 's2_t7'], ', which are in morphological agreement with the anaphora')), ('binding', (['s1_t1', 's2_t4', 's2_t7'], 'and which can be bound by the anaphora'))]), 'candidates': ['s1_t1', 's2_t2', 's2_t4', 's2_t7']})
s3_t3 defaultdict(<type 'str'>, {'anaphora_type': 'pronominal', 'filtered_candidates': ['s2_t2'], 'filter_results': OrderedDict([('distance', (['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1'], 'Candidates mentioned no more than 4 sentences ago')), ('non_reflexive', (['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1'], ", which also don't represent reflexive personal pronouns, e.g. sich, einander, dich, mir")), ('agreement', (['s2_t2'], ', which are in morphological agreement with the anaphora')), ('binding', (['s2_t2'], 'and which can be bound by the anaphora'))]), 'candidates': ['s1_t1', 's2_t2', 's2_t4', 's2_t7', 's3_t1']})

In [21]:
for chain in get_pointing_chains(mdg):
    for node_id in chain:
        print node_id, spanstring2text(mdg, mdg.node[node_id]['mmax:span'])
        print '\t', mdg.node[node_id]['mmax:type']
    print '\n'
    

for chain in get_pointing_chains(p.document):
    for node_id in chain:
        print node_id,
        print '\t', p.document.node[node_id]['pocores:type']
    print '\n'


word_166
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-21-6aa46bf95fcb> in <module>()
      1 for chain in get_pointing_chains(mdg):
      2     for node_id in chain:
----> 3         print node_id, spanstring2text(mdg, mdg.node[node_id]['mmax:span'])
      4         print '\t', mdg.node[node_id]['mmax:type']
      5     print '\n'

KeyError: 'mmax:span'