In [1]:
import os
import glob
import tempfile
import sh

import discoursegraphs as dg
from discoursegraphs.readwrite import MMAXDocumentGraph, write_conll
from discoursekernels.util import draw_multiple_graphs


Couldn't import dot_parser, loading of dot files will not be possible.

In [2]:
%load_ext gvmagic
from discoursegraphs import print_dot

In [3]:
# wget http://www.ling.uni-potsdam.de/acl-lab/Forsch/pcc/potsdam-commentary-corpus-2.0.0.zip
# unzip potsdam-commentary-corpus-2.0.0.zip -d ~/corpora

In [4]:
MMAX_DIR = os.path.expanduser('~/repos/pcc-annis-merged/maz176/coreference')

In [5]:
# grab a copy of the official CoNLL scorer using git-svn instead of svn
# git svn clone http://reference-coreference-scorers.googlecode.com/svn/trunk/ ~/repos/reference-coreference-scorers

In [6]:
SCORER_PATH = '/home/arne/repos/reference-coreference-scorers/scorer.pl'

In [7]:
scorer = sh.Command(SCORER_PATH)
scorer()


Out[7]:
version: 8.01 /home/arne/repos/reference-coreference-scorers/lib/CorScorer.pm

use: scorer.pl <metric> <keys_file> <response_file> [name]

  metric: the metric desired to score the results:
    muc: MUCScorer (Vilain et al, 1995)
    bcub: B-Cubed (Bagga and Baldwin, 1998)
    ceafm: CEAF (Luo et al, 2005) using mention-based similarity
    ceafe: CEAF (Luo et al, 2005) using entity-based similarity
    blanc: BLANC
    all: uses all the metrics to score

  keys_file: file with expected coreference chains in SemEval format

  response_file: file with output of coreference system (SemEval format)

  name: [optional] the name of the document to score. If name is not
    given, all the documents in the dataset will be scored. If given
    name is "none" then all the documents are scored but only total
    results are shown.

Compare CoNLL files against themselves

We're now using the official CoNLL scorer to compare each
MAZ176 coreference annotated document against itself.
All comparisons should result in an F1 of 100%


In [8]:
import sys

def has_valid_annotation(mmax_file, scorer_path, metric, verbose=False):
    """
    Parameters
    ----------
    metric : str
        muc, bcub, ceafm, ceafe, blanc
    verbose : bool or str
        True, False or 'very'
    """
    scorer = sh.Command(scorer_path)
    mdg = MMAXDocumentGraph(mmax_file)
    conll_fname = '/tmp/{}.conll'.format(os.path.basename(mmax_file))
    write_conll(mdg, conll_fname)
    try:
        results = scorer(metric, conll_fname, conll_fname)
        scores_str = results.stdout.splitlines()[-2]
        if not scores_str.endswith('100%'):
            if verbose == 'very':
                sys.stderr.write("{}\n{}\n".format(conll_fname, results))
            elif verbose:
                sys.stderr.write("{}\n{}\n".format(conll_fname, scores_str))

            return False
                
    except sh.ErrorReturnCode as e:
        if verbose:
            sys.stderr.write("Error in '{}'\n{}".format(conll_fname, e))
        return False
    return True


def get_bad_scoring_files(mmax_dir, scorer_path, metric, verbose=False):
    """
    returns filepaths of MMAX2 coreference files which don't produce perfect
    results when testing them against themselves with scorer.pl
    """
    bad_files = []
    for mmax_file in glob.glob(os.path.join(mmax_dir, '*.mmax')):
        if not has_valid_annotation(mmax_file, scorer_path, metric, verbose=verbose):
            bad_files.append(mmax_file)
    return bad_files

In [9]:
blanc_errors = get_bad_scoring_files(MMAX_DIR, SCORER_PATH, 'blanc', verbose=True)


/tmp/maz-9884.mmax.conll
BLANC: Recall: (0.965336804537853 / 1) 96.53%	Precision: (1 / 1) 100%	F1: 98.21%
/tmp/maz-11735.mmax.conll
BLANC: Recall: (0.998168498168498 / 1) 99.81%	Precision: (1 / 1) 100%	F1: 99.9%
/tmp/maz-15347.mmax.conll
BLANC: Recall: (0.9989816700611 / 1) 99.89%	Precision: (1 / 1) 100%	F1: 99.94%
/tmp/maz-5012.mmax.conll
BLANC: Recall: (0.997727272727273 / 1) 99.77%	Precision: (1 / 1) 100%	F1: 99.88%
/tmp/maz-17953.mmax.conll
BLANC: Recall: (0.991241039426523 / 1) 99.12%	Precision: (1 / 1) 100%	F1: 99.55%
/tmp/maz-4472.mmax.conll
BLANC: Recall: (0.999315068493151 / 1) 99.93%	Precision: (1 / 1) 100%	F1: 99.96%
/tmp/maz-5297.mmax.conll
BLANC: Recall: (0.998806682577566 / 1) 99.88%	Precision: (1 / 1) 100%	F1: 99.94%
/tmp/maz-4282.mmax.conll
BLANC: Recall: (0.957070707070707 / 1) 95.7%	Precision: (1 / 1) 100%	F1: 97.76%
/tmp/maz-12383.mmax.conll
BLANC: Recall: (0.998820754716981 / 1) 99.88%	Precision: (1 / 1) 100%	F1: 99.94%
/tmp/maz-8134.mmax.conll
BLANC: Recall: (0.999399759903962 / 1) 99.93%	Precision: (1 / 1) 100%	F1: 99.96%
/tmp/maz-8361.mmax.conll
BLANC: Recall: (0.999300699300699 / 1) 99.93%	Precision: (1 / 1) 100%	F1: 99.96%
/tmp/maz-3367.mmax.conll
BLANC: Recall: (0.999040307101727 / 1) 99.9%	Precision: (1 / 1) 100%	F1: 99.95%
/tmp/maz-3415.mmax.conll
BLANC: Recall: (0.99966078697422 / 1) 99.96%	Precision: (1 / 1) 100%	F1: 99.98%
/tmp/maz-11299.mmax.conll
BLANC: Recall: (0.999518768046198 / 1) 99.95%	Precision: (1 / 1) 100%	F1: 99.97%
/tmp/maz-14590.mmax.conll
BLANC: Recall: (0.99803536345776 / 1) 99.8%	Precision: (1 / 1) 100%	F1: 99.9%
/tmp/maz-15734.mmax.conll
BLANC: Recall: (0.998756218905473 / 1) 99.87%	Precision: (1 / 1) 100%	F1: 99.93%
/tmp/maz-12510.mmax.conll
BLANC: Recall: (0.998296422487223 / 1) 99.82%	Precision: (1 / 1) 100%	F1: 99.91%
/tmp/maz-14047.mmax.conll
BLANC: Recall: (0.999248120300752 / 1) 99.92%	Precision: (1 / 1) 100%	F1: 99.96%
/tmp/maz-17254.mmax.conll
BLANC: Recall: (0.979511867442902 / 1) 97.95%	Precision: (1 / 1) 100%	F1: 98.95%
/tmp/maz-6993.mmax.conll
BLANC: Recall: (0.996495327102804 / 1) 99.64%	Precision: (1 / 1) 100%	F1: 99.82%

Do all metrics choke on the same files?


In [10]:
bad_scoring_files = {}
for metric in ('muc', 'bcub', 'ceafm', 'ceafe', 'blanc'):
    bad_scoring_files[metric] = get_bad_scoring_files(MMAX_DIR, SCORER_PATH, metric, verbose=False)

In [11]:
for metric in bad_scoring_files:
    print "number of erroneous files found by '{}': {}".format(metric, len(bad_scoring_files[metric]))

all_bad_files = set()
for metric in bad_scoring_files:
    all_bad_files.update(bad_scoring_files[metric])

print "total number of erroneous files:", len(all_bad_files)


number of erroneous files found by 'bcub': 20
number of erroneous files found by 'ceafe': 20
number of erroneous files found by 'ceafm': 20
number of erroneous files found by 'muc': 6
number of erroneous files found by 'blanc': 20
total number of erroneous files: 20

In [12]:
from discoursegraphs import get_pointing_chains

In [13]:
from discoursegraphs.readwrite.mmax2 import spanstring2text

def print_all_chains(docgraph):
    """
    print a list of all pointing chains (i.e coreference chains)
    contained in a document graph
    """
    for chain in get_pointing_chains(docgraph):
        for node_id in chain:
            print node_id, spanstring2text(docgraph, docgraph.node[node_id][docgraph.ns+':span'])
        print '\n'

In [14]:
mdg = MMAXDocumentGraph(os.path.join(MMAX_DIR, 'maz-3377.mmax'))
print_all_chains(mdg)


markable_16 dieser
markable_15 den Bürgermeister


markable_14 sie
markable_13 sie
markable_9 die SPD-Fraktion
markable_3 alle
markable_1 unter den Dallgower Kommunalpolitikern


markable_38 sie
markable_36 Die anderen


markable_29 der Freien Wählergemeinschaft
markable_30 eine Fraktion


markable_28 ihrer
markable_26 sie
markable_20 die Christdemokraten
markable_19 die CDU


Hypothesis 1: all markables occurring in more than one coreference chain produce scoring errors


In [15]:
from itertools import combinations

def get_ambiguous_markables(mmax_docgraph):
    """returns a list of markables that occur in more than one coreference chain"""
    ambiguous_markables = []
    chain_sets = (set(chain) for chain in get_pointing_chains(mmax_docgraph))
    for chain1, chain2 in combinations(chain_sets, 2):
        chain_intersect = chain1.intersection(chain2)
        if chain_intersect:
            ambiguous_markables.extend(chain_intersect)
    return ambiguous_markables

In [16]:
files_with_ambigious_chains = []
for mmax_file in glob.glob(os.path.join(MMAX_DIR, '*.mmax')):
    mdg = MMAXDocumentGraph(mmax_file)
    if get_ambiguous_markables(mdg):
        files_with_ambigious_chains.append(mmax_file)

In [17]:
print "# of files with ambiguous coreference chains: ", len(files_with_ambigious_chains)
print "# of files scorer.pl doesn't like: ", len(bad_scoring_files)
if len(files_with_ambigious_chains) > 0:
    print "percent of files w/ ambiguous chains that scorer.pl doesn't like:", \
        len( set(files_with_ambigious_chains).intersection(set(bad_scoring_files)) ) / len(files_with_ambigious_chains) * 100


# of files with ambiguous coreference chains:  0
# of files scorer.pl doesn't like:  5

Initially, this was true. After Markus fixed a bunch of annotations,
Hypothesis 1 could not be validated any longer.

Hypothesis 2: non-contiguous markables cause trouble


In [18]:
# test this with
# markable_32 auf beiden Seiten
# markable_56 Arafat Scharon


mdg = MMAXDocumentGraph(os.path.join(MMAX_DIR, 'maz-19074.mmax'))

In [19]:
mdg.node['markable_56']


Out[19]:
{'label': 'markable_56:groups',
 'layers': {'mmax',
  'mmax:groups',
  'mmax:markable',
  'mmax:primmark',
  'mmax:secmark'},
 'mmax:id': 'markable_56',
 'mmax:mmax_level': 'groups',
 'mmax:span': 'word_42,word_45'}

potential error

  • markable is both primmark and secmark

In [20]:
from discoursegraphs import get_span, select_nodes_by_layer

def get_noncontiguous_markables(docgraph):
    """return all markables that don't represent adjacent tokens"""
    noncontiguous_markables = []
    id2index = {tok_id:i for i, tok_id in enumerate(docgraph.tokens)}
    for markable in select_nodes_by_layer(docgraph, docgraph.ns+':markable'):
        span_token_ids = get_span(docgraph, markable)
        for span_index, tok_id in enumerate(span_token_ids[:-1]):
            tok_index = id2index[tok_id]
            next_tok_id = span_token_ids[span_index+1]
            next_tok_index = id2index[next_tok_id]
            if next_tok_index - tok_index != 1:
                noncontiguous_markables.append(markable)
    return noncontiguous_markables

In [21]:
files_with_noncontiguous_markables = []
for mmax_file in glob.glob(os.path.join(MMAX_DIR, '*.mmax')):
    mdg = MMAXDocumentGraph(mmax_file)
    if get_noncontiguous_markables(mdg):
        files_with_noncontiguous_markables.append(mmax_file)

In [22]:
print "# of files with non-continuous markables: ", len(files_with_noncontiguous_markables)
print "# of files scorer.pl doesn't like: ", len(bad_scoring_files)
print "percent of files w/ non-continuous markables that scorer.pl doesn't like:", \
    len( set(files_with_noncontiguous_markables).intersection(set(bad_scoring_files)) ) / len(files_with_noncontiguous_markables) * 100


# of files with non-continuous markables:  133
# of files scorer.pl doesn't like:  5
percent of files w/ non-continuous markables that scorer.pl doesn't like: 0

Hypothesis 2 doesn't hold.

Let's check files w/out ambiguous coreference chains that scorer.pl doesn't like


In [23]:
mysterious_files = [os.path.basename(fname)
                    for fname in set(all_bad_files).difference(set(files_with_ambigious_chains))]

In [24]:
len(mysterious_files)


Out[24]:
20

In [25]:
# for fname in mysterious_files:
#     mdg = MMAXDocumentGraph(os.path.join(MMAX_DIR, fname))
#     print fname, '\n==============\n\n'
#     try:
#         print_all_chains(mdg)
#     except Exception as e:
#         print "\n{} FAILED: {}".format(fname, e)

Visualizing ambiguous coreference annotations with discoursegraphs

fortunately, the current version doesn't have any


In [26]:
from collections import defaultdict
import networkx as nx

from discoursegraphs import get_text

def get_ambiguous_chains(mmax_docgraph, token_labels=False):
    """
    Returns a list of networkx graphs that represent ambiguous
    coreference chains. An ambiguous chain represents two or more
    coreference chains that share at least one markable.
    
    There should be no ambiguous coreference chains, but the
    current version of our annotation guidelines allow them. // SRSLY?
    """
    ambiguous_markables = get_ambiguous_markables(mmax_docgraph)    
    coreference_chains = get_pointing_chains(mmax_docgraph)
    markable2chain = defaultdict(list)
    for i, chain in enumerate(coreference_chains):
        for markable in chain:
            if markable in ambiguous_markables:
                markable2chain[markable].append(i)
    
    chain_graphs = []
    for markable in markable2chain:
        ambig_chain_ids = markable2chain[markable]
        chain_graph = nx.MultiDiGraph()
        chain_graph.name = mmax_docgraph.name
        for chain_id in ambig_chain_ids:
            ambig_chain = coreference_chains[chain_id]
            for i, markable in enumerate(ambig_chain[:-1]):
                chain_graph.add_edge(markable, ambig_chain[i+1])

        if token_labels:
            for markable in chain_graph.nodes_iter():
                markable_text = get_text(mmax_docgraph, markable)
                chain_graph.node[markable]['label'] = markable_text
        
        chain_graphs.append(chain_graph)
    return chain_graphs

In [27]:
def merge_ambiguous_chains(ambiguous_chains):
    """
    Parameters
    ----------
    ambiguous_chains : list of MultiDiGraph
        a list of graphs, each representing an ambiguous coreference chain
    """
    merged_chain = nx.DiGraph(nx.compose_all(ambiguous_chains))
    merged_chain.add_node('name', shape='tab',
                          color='blue',
                          label=ambiguous_chains[0].name)
    for node in merged_chain:
        if merged_chain.in_degree(node) > 1 \
        or merged_chain.out_degree(node) > 1:
            merged_chain.node[node]['color'] = 'red'
    return merged_chain

In [28]:
len(files_with_ambigious_chains) # nothing to see here, move on!


Out[28]:
0

Are there any files without chains? no!


In [29]:
from discoursegraphs import info

files_without_chains = []
for mmax_file in glob.glob(os.path.join(MMAX_DIR, '*.mmax')):
    mdg = MMAXDocumentGraph(mmax_file)
    if not get_pointing_chains(mdg):
        files_without_chains.append(os.path.basename(mmax_file))
#         info(mdg)
#         print '\n\n'
        
print files_without_chains


[]

What's wrong with the remaining files?

  • files that produces an F1 of less than 100%
  • most of them are just off by one allegedly 'invented' entity

In [30]:
for fname in mysterious_files:
    has_valid_annotation(os.path.join(MMAX_DIR, fname), SCORER_PATH, 'muc', verbose='very')


/tmp/maz-6993.mmax.conll
version: 8.01 /home/arne/repos/reference-coreference-scorers/lib/CorScorer.pm
====> (__); __:
File (__); __:
Entity 0: (3,3) (73,76) (154,154)
Entity 1: (12,12)
Entity 2: (15,24)
Entity 3: (16,22) (139,147)
Entity 4: (20,22)
Entity 5: (31,32) (49,50) (109,110)
Entity 6: (39,39) (151,154)
Entity 7: (45,50) (106,110)
Entity 8: (54,59)
Entity 9: (63,64)
Entity 10: (69,76)
Entity 11: (79,80) (92,92)
Entity 12: (82,94)
Entity 13: (96,96)
Entity 14: (104,110)
Entity 15: (115,116)
Entity 16: (127,134)
Entity 17: (130,130)
Entity 18: (132,132)
Entity 19: (134,134)
Entity 20: (136,154)
Entity 21: (136,154)
Entity 22: (139,147)
Entity 23: (139,147)
Entity 24: (145,147)
====> (__); __:
File (__); __:
Entity 0: (3,3) (73,76) (154,154)
Entity 1: (12,12)
Entity 2: (15,24)
Entity 3: (16,22) (139,147)
Entity 4: (20,22)
Entity 5: (31,32) (49,50) (109,110)
Entity 6: (39,39) (151,154)
Entity 7: (45,50) (106,110)
Entity 8: (54,59)
Entity 9: (63,64)
Entity 10: (69,76)
Entity 11: (79,80) (92,92)
Entity 12: (82,94)
Entity 13: (96,96)
Entity 14: (104,110)
Entity 15: (115,116)
Entity 16: (127,134)
Entity 17: (130,130)
Entity 18: (132,132)
Entity 19: (134,134)
Entity 20: (136,154)
Entity 21: (136,154)
Entity 22: (139,147)
Entity 23: (139,147)
Entity 24: (145,147)
(__); __:
Repeated mention in the key: 136, 154 2829
Repeated mention in the key: 139, 147 630
Repeated mention in the key: 139, 147 3031
Repeated mention in the response: 136, 154 2929
Repeated mention in the response: 139, 147 3131
Repeated mention in the response: 139, 147 3131
Total key mentions: 30
Total response mentions: 30
Strictly correct identified mentions: 30
Partially correct identified mentions: 0
No identified: 0
Invented: 3
Recall: (7 / 8) 87.5%	Precision: (7 / 8) 87.5%	F1: 87.5%
--------------------------------------------------------------------------

====== TOTALS =======
Identification of Mentions: Recall: (30 / 30) 100%	Precision: (30 / 30) 100%	F1: 100%
--------------------------------------------------------------------------
Coreference: Recall: (7 / 8) 87.5%	Precision: (7 / 8) 87.5%	F1: 87.5%
--------------------------------------------------------------------------

/tmp/maz-17254.mmax.conll
version: 8.01 /home/arne/repos/reference-coreference-scorers/lib/CorScorer.pm
====> (__); __:
File (__); __:
Entity 0: (1,8)
Entity 1: (4,6) (41,42) (121,121) (179,179)
Entity 2: (4,8) (64,64) (92,92) (126,126) (137,137) (161,161)
Entity 3: (4,8) (46,47)
Entity 4: (11,20)
Entity 5: (16,18) (32,33) (48,48)
Entity 6: (29,33)
Entity 7: (37,47)
Entity 8: (39,42)
Entity 9: (43,47)
Entity 10: (53,55)
Entity 11: (62,65)
Entity 12: (66,68)
Entity 13: (74,76) (78,78)
Entity 14: (77,79) (108,110)
Entity 15: (84,84)
Entity 16: (86,90)
Entity 17: (95,96)
Entity 18: (101,105)
Entity 19: (121,123) (130,130)
Entity 20: (127,128)
Entity 21: (129,131)
Entity 22: (141,142)
Entity 23: (150,156) (158,158)
Entity 24: (154,154)
Entity 25: (156,156)
Entity 26: (169,175)
Entity 27: (173,173)
Entity 28: (175,175)
Entity 29: (178,180)
====> (__); __:
File (__); __:
Entity 0: (1,8)
Entity 1: (4,6) (41,42) (121,121) (179,179)
Entity 2: (4,8) (64,64) (92,92) (126,126) (137,137) (161,161)
Entity 3: (4,8) (46,47)
Entity 4: (11,20)
Entity 5: (16,18) (32,33) (48,48)
Entity 6: (29,33)
Entity 7: (37,47)
Entity 8: (39,42)
Entity 9: (43,47)
Entity 10: (53,55)
Entity 11: (62,65)
Entity 12: (66,68)
Entity 13: (74,76) (78,78)
Entity 14: (77,79) (108,110)
Entity 15: (84,84)
Entity 16: (86,90)
Entity 17: (95,96)
Entity 18: (101,105)
Entity 19: (121,123) (130,130)
Entity 20: (127,128)
Entity 21: (129,131)
Entity 22: (141,142)
Entity 23: (150,156) (158,158)
Entity 24: (154,154)
Entity 25: (156,156)
Entity 26: (169,175)
Entity 27: (173,173)
Entity 28: (175,175)
Entity 29: (178,180)
(__); __:
Repeated mention in the key: 4, 8 511
Repeated mention in the response: 4, 8 1111
Total key mentions: 44
Total response mentions: 44
Strictly correct identified mentions: 44
Partially correct identified mentions: 0
No identified: 0
Invented: 1
Recall: (13 / 15) 86.66%	Precision: (13 / 14) 92.85%	F1: 89.65%
--------------------------------------------------------------------------

====== TOTALS =======
Identification of Mentions: Recall: (44 / 44) 100%	Precision: (44 / 44) 100%	F1: 100%
--------------------------------------------------------------------------
Coreference: Recall: (13 / 15) 86.66%	Precision: (13 / 14) 92.85%	F1: 89.65%
--------------------------------------------------------------------------

/tmp/maz-4282.mmax.conll
version: 8.01 /home/arne/repos/reference-coreference-scorers/lib/CorScorer.pm
====> (__); __:
File (__); __:
Entity 0: (8,10)
Entity 1: (9,9) (61,61)
Entity 2: (9,43) (127,130)
Entity 3: (15,20)
Entity 4: (19,20) (114,117)
Entity 5: (25,27)
Entity 6: (28,31) (44,44)
Entity 7: (28,46) (90,91)
Entity 8: (39,43)
Entity 9: (39,43) (65,66)
Entity 10: (39,43)
Entity 11: (60,62)
Entity 12: (65,69) (85,85) (89,89)
Entity 13: (71,72)
Entity 14: (99,100)
Entity 15: (105,108)
Entity 16: (125,126) (134,134) (141,141)
Entity 17: (147,148)
Entity 18: (153,153)
Entity 19: (161,163)
Entity 20: (171,173)
====> (__); __:
File (__); __:
Entity 0: (8,10)
Entity 1: (9,9) (61,61)
Entity 2: (9,43) (127,130)
Entity 3: (15,20)
Entity 4: (19,20) (114,117)
Entity 5: (25,27)
Entity 6: (28,31) (44,44)
Entity 7: (28,46) (90,91)
Entity 8: (39,43)
Entity 9: (39,43) (65,66)
Entity 10: (39,43)
Entity 11: (60,62)
Entity 12: (65,69) (85,85) (89,89)
Entity 13: (71,72)
Entity 14: (99,100)
Entity 15: (105,108)
Entity 16: (125,126) (134,134) (141,141)
Entity 17: (147,148)
Entity 18: (153,153)
Entity 19: (161,163)
Entity 20: (171,173)
(__); __:
Repeated mention in the key: 39, 43 1314
Repeated mention in the key: 39, 43 1416
Repeated mention in the response: 39, 43 1616
Repeated mention in the response: 39, 43 1616
Total key mentions: 29
Total response mentions: 29
Strictly correct identified mentions: 29
Partially correct identified mentions: 0
No identified: 0
Invented: 2
Recall: (9 / 10) 90%	Precision: (9 / 9) 100%	F1: 94.73%
--------------------------------------------------------------------------

====== TOTALS =======
Identification of Mentions: Recall: (29 / 29) 100%	Precision: (29 / 29) 100%	F1: 100%
--------------------------------------------------------------------------
Coreference: Recall: (9 / 10) 90%	Precision: (9 / 9) 100%	F1: 94.73%
--------------------------------------------------------------------------

/tmp/maz-14590.mmax.conll
version: 8.01 /home/arne/repos/reference-coreference-scorers/lib/CorScorer.pm
====> (__); __:
File (__); __:
Entity 0: (4,5)
Entity 1: (6,8) (11,16) (106,107)
Entity 2: (18,19)
Entity 3: (26,29)
Entity 4: (34,35) (77,80) (135,136)
Entity 5: (37,46) (53,53)
Entity 6: (37,103) (116,116) (121,121)
Entity 7: (43,46)
Entity 8: (44,46)
Entity 9: (46,46) (59,59) (73,73) (90,91) (109,110)
Entity 10: (53,73)
Entity 11: (55,60)
Entity 12: (55,65)
Entity 13: (55,73) (85,87)
Entity 14: (55,73)
Entity 15: (55,69)
Entity 16: (58,60)
Entity 17: (79,80)
Entity 18: (94,103)
Entity 19: (100,103)
Entity 20: (101,103)
Entity 21: (130,131)
====> (__); __:
File (__); __:
Entity 0: (4,5)
Entity 1: (6,8) (11,16) (106,107)
Entity 2: (18,19)
Entity 3: (26,29)
Entity 4: (34,35) (77,80) (135,136)
Entity 5: (37,46) (53,53)
Entity 6: (37,103) (116,116) (121,121)
Entity 7: (43,46)
Entity 8: (44,46)
Entity 9: (46,46) (59,59) (73,73) (90,91) (109,110)
Entity 10: (53,73)
Entity 11: (55,60)
Entity 12: (55,65)
Entity 13: (55,73) (85,87)
Entity 14: (55,73)
Entity 15: (55,69)
Entity 16: (58,60)
Entity 17: (79,80)
Entity 18: (94,103)
Entity 19: (100,103)
Entity 20: (101,103)
Entity 21: (130,131)
(__); __:
Repeated mention in the key: 55, 73 2426
Repeated mention in the response: 55, 73 2626
Total key mentions: 33
Total response mentions: 33
Strictly correct identified mentions: 33
Partially correct identified mentions: 0
No identified: 0
Invented: 1
Recall: (11 / 12) 91.66%	Precision: (11 / 12) 91.66%	F1: 91.66%
--------------------------------------------------------------------------

====== TOTALS =======
Identification of Mentions: Recall: (33 / 33) 100%	Precision: (33 / 33) 100%	F1: 100%
--------------------------------------------------------------------------
Coreference: Recall: (11 / 12) 91.66%	Precision: (11 / 12) 91.66%	F1: 91.66%
--------------------------------------------------------------------------

/tmp/maz-17953.mmax.conll
version: 8.01 /home/arne/repos/reference-coreference-scorers/lib/CorScorer.pm
====> (__); __:
File (__); __:
Entity 0: (4,18)
Entity 1: (4,7)
Entity 2: (6,7)
Entity 3: (6,18) (178,181)
Entity 4: (9,12)
Entity 5: (11,12)
Entity 6: (14,18)
Entity 7: (16,18)
Entity 8: (20,23) (28,28) (61,61) (66,66) (113,115) (122,122) (164,164) (169,169) (177,177) (179,179) (196,196)
Entity 9: (22,23)
Entity 10: (24,25)
Entity 11: (30,52) (159,161)
Entity 12: (35,37)
Entity 13: (44,48)
Entity 14: (46,48)
Entity 15: (49,52)
Entity 16: (73,73)
Entity 17: (80,89)
Entity 18: (84,86) (97,98) (107,107)
Entity 19: (84,89)
Entity 20: (84,89) (102,105)
Entity 21: (92,93)
Entity 22: (155,162)
Entity 23: (190,192) (198,200)
Entity 24: (192,192)
====> (__); __:
File (__); __:
Entity 0: (4,18)
Entity 1: (4,7)
Entity 2: (6,7)
Entity 3: (6,18) (178,181)
Entity 4: (9,12)
Entity 5: (11,12)
Entity 6: (14,18)
Entity 7: (16,18)
Entity 8: (20,23) (28,28) (61,61) (66,66) (113,115) (122,122) (164,164) (169,169) (177,177) (179,179) (196,196)
Entity 9: (22,23)
Entity 10: (24,25)
Entity 11: (30,52) (159,161)
Entity 12: (35,37)
Entity 13: (44,48)
Entity 14: (46,48)
Entity 15: (49,52)
Entity 16: (73,73)
Entity 17: (80,89)
Entity 18: (84,86) (97,98) (107,107)
Entity 19: (84,89)
Entity 20: (84,89) (102,105)
Entity 21: (92,93)
Entity 22: (155,162)
Entity 23: (190,192) (198,200)
Entity 24: (192,192)
(__); __:
Repeated mention in the key: 84, 89 3334
Repeated mention in the response: 84, 89 3434
Total key mentions: 40
Total response mentions: 40
Strictly correct identified mentions: 40
Partially correct identified mentions: 0
No identified: 0
Invented: 1
Recall: (15 / 16) 93.75%	Precision: (15 / 15) 100%	F1: 96.77%
--------------------------------------------------------------------------

====== TOTALS =======
Identification of Mentions: Recall: (40 / 40) 100%	Precision: (40 / 40) 100%	F1: 100%
--------------------------------------------------------------------------
Coreference: Recall: (15 / 16) 93.75%	Precision: (15 / 15) 100%	F1: 96.77%
--------------------------------------------------------------------------

/tmp/maz-9884.mmax.conll
version: 8.01 /home/arne/repos/reference-coreference-scorers/lib/CorScorer.pm
====> (__); __:
File (__); __:
Entity 0: (4,4)
Entity 1: (13,14)
Entity 2: (17,18)
Entity 3: (24,27) (33,33) (37,37) (151,154) (163,163)
Entity 4: (24,25) (151,152) (180,180)
Entity 5: (24,27) (151,154) (188,188)
Entity 6: (36,38)
Entity 7: (42,47)
Entity 8: (44,47)
Entity 9: (50,75)
Entity 10: (55,56) (63,63) (142,143) (144,144)
Entity 11: (63,64)
Entity 12: (65,65)
Entity 13: (68,72)
Entity 14: (71,72)
Entity 15: (79,79) (86,86)
Entity 16: (80,82)
Entity 17: (93,94)
Entity 18: (98,98)
Entity 19: (100,103)
Entity 20: (102,103)
Entity 21: (105,105) (111,111)
Entity 22: (106,116)
Entity 23: (117,130)
Entity 24: (124,127)
Entity 25: (128,129)
Entity 26: (131,131) (202,202)
Entity 27: (131,135) (168,194)
Entity 28: (133,133) (204,204)
Entity 29: (135,135)
Entity 30: (144,145)
Entity 31: (148,154)
Entity 32: (166,194)
Entity 33: (172,173)
Entity 34: (182,182) (184,184) (200,200)
Entity 35: (183,185)
Entity 36: (189,190)
Entity 37: (191,193)
Entity 38: (200,204) (219,219)
Entity 39: (206,207)
Entity 40: (221,224) (235,236)
Entity 41: (230,230)
Entity 42: (231,231)
Entity 43: (241,243)
====> (__); __:
File (__); __:
Entity 0: (4,4)
Entity 1: (13,14)
Entity 2: (17,18)
Entity 3: (24,27) (33,33) (37,37) (151,154) (163,163)
Entity 4: (24,25) (151,152) (180,180)
Entity 5: (24,27) (151,154) (188,188)
Entity 6: (36,38)
Entity 7: (42,47)
Entity 8: (44,47)
Entity 9: (50,75)
Entity 10: (55,56) (63,63) (142,143) (144,144)
Entity 11: (63,64)
Entity 12: (65,65)
Entity 13: (68,72)
Entity 14: (71,72)
Entity 15: (79,79) (86,86)
Entity 16: (80,82)
Entity 17: (93,94)
Entity 18: (98,98)
Entity 19: (100,103)
Entity 20: (102,103)
Entity 21: (105,105) (111,111)
Entity 22: (106,116)
Entity 23: (117,130)
Entity 24: (124,127)
Entity 25: (128,129)
Entity 26: (131,131) (202,202)
Entity 27: (131,135) (168,194)
Entity 28: (133,133) (204,204)
Entity 29: (135,135)
Entity 30: (144,145)
Entity 31: (148,154)
Entity 32: (166,194)
Entity 33: (172,173)
Entity 34: (182,182) (184,184) (200,200)
Entity 35: (183,185)
Entity 36: (189,190)
Entity 37: (191,193)
Entity 38: (200,204) (219,219)
Entity 39: (206,207)
Entity 40: (221,224) (235,236)
Entity 41: (230,230)
Entity 42: (231,231)
Entity 43: (241,243)
(__); __:
Repeated mention in the key: 24, 27 311
Repeated mention in the key: 151, 154 612
Repeated mention in the response: 24, 27 1111
Repeated mention in the response: 151, 154 1212
Total key mentions: 62
Total response mentions: 62
Strictly correct identified mentions: 62
Partially correct identified mentions: 0
No identified: 0
Invented: 2
Recall: (17 / 20) 85%	Precision: (17 / 18) 94.44%	F1: 89.47%
--------------------------------------------------------------------------

====== TOTALS =======
Identification of Mentions: Recall: (62 / 62) 100%	Precision: (62 / 62) 100%	F1: 100%
--------------------------------------------------------------------------
Coreference: Recall: (17 / 20) 85%	Precision: (17 / 18) 94.44%	F1: 89.47%
--------------------------------------------------------------------------

Are the coreferences of the remaining files okay?

  • they seem okay, but contain numerous near-identity relations

In [31]:
def get_all_good_scoring_files(mmax_dir, scorer_path, verbose=False):
    """
    returns filepaths of MMAX2 coreference files which don't produce perfect
    results when testing them against themselves with scorer.pl
    """
    all_mmax_files = glob.glob(os.path.join(mmax_dir, '*.mmax'))
    all_bad_files = set()
    metrics = ['muc', 'bcub', 'ceafm', 'ceafe', 'blanc']
    
    for mmax_file in all_mmax_files:
        for metric in metrics:
            if not has_valid_annotation(mmax_file, scorer_path, metric, verbose=verbose):
                all_bad_files.add(mmax_file)
                break # continue with next mmax file
    
    return set(all_mmax_files).difference(all_bad_files)

In [32]:
all_good_scoring_files = get_all_good_scoring_files(MMAX_DIR, SCORER_PATH)

In [33]:
len(all_good_scoring_files)


Out[33]:
156

In [34]:
# for fname in all_good_scoring_files:
#     mdg = dg.read_mmax2(fname)
#     bname = os.path.basename(fname)
#     print bname, '\n==============\n\n'
#     try:
#         # [the dog]_{markable_23} means that [the dog] is part of a
#         # coreference chain whose first element is markable_23
#         print dg.readwrite.brackets.gen_bracketed_output(mdg), '\n\n'
#     except KeyError as e:
#         print "Error in {}: {}".format(bname, e)
#         print dg.get_text(mdg)
#     try:
#         print_all_chains(mdg)
#     except Exception as e:
#         print "\n{} FAILED: {}".format(fname, e)

TODO: check this rare key error


In [40]:
MMAX_DIR


Out[40]:
'/home/arne/repos/pcc-annis-merged/maz176/coreference'


In [36]:
mmax_14172 = os.path.join(MMAX_DIR, 'maz-14172.mmax')

In [37]:
mdg = dg.read_mmax2(mmax_14172)

In [39]:
print_all_chains(mdg)


markable_100068 Zigan
markable_100038 er
markable_100025 seine
markable_100024 SPD-Mann Zigan
markable_100011 seinem
markable_1 Dietmar Zigan


markable_1000190 Petrys
markable_78 der richtige Mann für einen Aufschwung Wittenberges
markable_1000168 er
markable_1000150 Petrys
markable_1000118 Petry
markable_100081 Petry
markable_5 zu seinem Wittenberger Amts- und Parteikollegen Klaus Petry


markable_1000177 Wittenberges
markable_1000194 Wittenberger


markable_1000195 Petrys Kontrahenten
markable_1000196
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-39-c87d94e4bbb5> in <module>()
----> 1 print_all_chains(mdg)

<ipython-input-13-77c7307d3dc8> in print_all_chains(docgraph)
      8     for chain in get_pointing_chains(docgraph):
      9         for node_id in chain:
---> 10             print node_id, spanstring2text(docgraph, docgraph.node[node_id][docgraph.ns+':span'])
     11         print '\n'

KeyError: 'mmax:span'

In [ ]: