Comparison of anaphoricity annotations (Dittrich vs. Tosik)


In [1]:
import os
from glob import glob
from collections import defaultdict
from discoursegraphs.readwrite import AnaphoraDocumentGraph
from discoursegraphs import print_dot, info, select_nodes_by_layer

In [2]:
TOSIK_DIR = os.path.expanduser('~/repos/pcc-annis-merged/maz176/anaphora/tosik/')
DITTRICH_DIR = os.path.expanduser('~/repos/pcc-annis-merged/maz176/anaphora/dittrich/')
KOBOLD_DIR = os.path.expanduser('~/repos/pcc-annis-merged/maz176/anaphora/kobold/')

In [3]:
def have_same_annotation(graph1, graph2, node_id):
    return graph1.node[node_id][graph1.ns+':annotation'] == \
        graph2.node[node_id][graph2.ns+':annotation']

In [4]:
def get_anaphoricity(graph, token_id):
    return graph.node[token_id][graph.ns+':annotation'], graph.node[token_id][graph.ns+':certainty']

In [10]:
def anaphoricity_agreement(annotator1_dir, annotator2_dir, anaphora):
    """
    calculates the inter-annotator agreement between two annotators for
    'das'/'es' anaphoricity annotation.
    
    Parameters
    ----------
    annotator1_dir : str
        root directory of the annotation files of the first annotator
    annotator2_dir : str
        root directory of the annotation files of the second annotator
    anaphora : str
        'das' or 'es'
    """
    annotator1_files = glob(os.path.join(annotator1_dir, anaphora, '*.txt'))
    annotator2_files = glob(os.path.join(annotator2_dir, anaphora, '*.txt'))

    num_of_annotated_tokens = 0
    num_of_identical_annotations = 0
    
    annotations = defaultdict(lambda: defaultdict(dict))

    for annotator1_file in annotator1_files:
        annotator1_graph = AnaphoraDocumentGraph(annotator1_file, namespace='annotator1')
        fname = os.path.basename(annotator1_file)
        
        try:
            annotator2_graph = AnaphoraDocumentGraph(os.path.join(annotator2_dir, anaphora, fname), namespace='annotator2')
        except:
            continue

        annotator1_anaphora = set(select_nodes_by_layer(annotator1_graph, 'annotator1:annotated'))
        annotator2_anaphora = set(select_nodes_by_layer(annotator2_graph, 'annotator2:annotated'))
        assert annotator1_anaphora == annotator2_anaphora

        num_of_annotated_tokens += len(annotator1_anaphora)
        for node_id in annotator1_anaphora:
            if have_same_annotation(annotator1_graph, annotator2_graph, node_id):
                num_of_identical_annotations += 1
            
            ns1, ns2 = annotator1_graph.ns, annotator2_graph.ns
            annotations[(fname, node_id)][ns1] = get_anaphoricity(annotator1_graph, node_id)
            annotations[(fname, node_id)][ns2] = get_anaphoricity(annotator2_graph, node_id)
                                            
    title = "'{}' ANNOTATION".format(anaphora.upper())
    print "{0}\n{1}\n".format(title, len(title)*'=')
    print "annotated tokens: ", num_of_annotated_tokens
    print "identically annotatated tokens: ", num_of_identical_annotations
    print "agreement: ", num_of_identical_annotations / float(num_of_annotated_tokens) * 100
    return annotations

Comparison of 'es' annotations

  • agreement is much worse than for 'das'

In [11]:
es_annotations = anaphoricity_agreement(TOSIK_DIR, DITTRICH_DIR, 'es')


'ES' ANNOTATION
===============

annotated tokens:  284
identically annotatated tokens:  219
agreement:  77.1126760563

In [12]:
es_annotations = anaphoricity_agreement(KOBOLD_DIR, TOSIK_DIR, 'das')


---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-12-5fb2468e735b> in <module>()
----> 1 es_annotations = anaphoricity_agreement(KOBOLD_DIR, TOSIK_DIR, 'das')

<ipython-input-10-1e881227443b> in anaphoricity_agreement(annotator1_dir, annotator2_dir, anaphora)
     32         annotator1_anaphora = set(select_nodes_by_layer(annotator1_graph, 'annotator1:annotated'))
     33         annotator2_anaphora = set(select_nodes_by_layer(annotator2_graph, 'annotator2:annotated'))
---> 34         assert annotator1_anaphora == annotator2_anaphora
     35 
     36         num_of_annotated_tokens += len(annotator1_anaphora)

AssertionError: 
  • distinguishing between pleonastic and abstract es seems hard

In [ ]:
diverging_annotations = defaultdict(lambda: defaultdict(dict))

for anno in es_annotations:
    # if annotations are different
    anno1 = es_annotations[anno]['annotator1'][0]
    anno2 = es_annotations[anno]['annotator2'][0]
    if anno1 != anno2:
        if anno2 in diverging_annotations[anno1]:
            diverging_annotations[anno1][anno2] += 1
        else:
            diverging_annotations[anno1][anno2] = 1

In [ ]:
for anno1 in diverging_annotations:
    print anno1, diverging_annotations[anno1]

In [ ]:
diverging_annotations = defaultdict(lambda: defaultdict(dict))

for anno in es_annotations:
    # if annotations are different
    anno1 = es_annotations[anno]['annotator1'][0]
    anno2 = es_annotations[anno]['annotator2'][0]
    if anno1 != anno2:
        if anno1 in diverging_annotations[anno2]:
            diverging_annotations[anno2][anno1] += 1
        else:
            diverging_annotations[anno2][anno1] = 1
            
for anno2 in diverging_annotations:
    print anno2, diverging_annotations[anno2]

Comparison of 'das' annotations


In [ ]:
das_annotations = anaphoricity_agreement(TOSIK_DIR, DITTRICH_DIR, 'das')

In [ ]:
diverging_annotations = defaultdict(lambda: defaultdict(dict))

for anno in das_annotations:
    # if annotations are different
    anno1 = das_annotations[anno]['annotator1'][0]
    anno2 = das_annotations[anno]['annotator2'][0]
    if anno1 != anno2:
        if anno2 in diverging_annotations[anno1]:
            diverging_annotations[anno1][anno2] += 1
        else:
            diverging_annotations[anno1][anno2] = 1

for anno1 in diverging_annotations:
    print anno1, '-->', diverging_annotations[anno1]

In [ ]:
diverging_annotations = defaultdict(lambda: defaultdict(dict))

for anno in das_annotations:
    # if annotations are different
    anno1 = das_annotations[anno]['annotator1'][0]
    anno2 = das_annotations[anno]['annotator2'][0]
    if anno1 != anno2:
        if anno1 in diverging_annotations[anno2]:
            diverging_annotations[anno2][anno1] += 1
        else:
            diverging_annotations[anno2][anno1] = 1

for anno2 in diverging_annotations:
    print anno2, '-->', diverging_annotations[anno2]

In [ ]:
%matplotlib inline

import numpy as np
import pandas as pd

In [ ]:
def generate_disagreement_matrices(annotations):
    """
    Parameters
    ----------
    annotations : defaultdict(dict) or dict of dicts
        maps from a (filename str, token_id int) tuple to a
        dict with the keys 'annotator1' and 'annotator2' and
        (annotation str, certainty float) tuples as values.
    """

    one2two = defaultdict(lambda: defaultdict(dict))
    two2one = defaultdict(lambda: defaultdict(dict))

    for anno in annotations:
        anno1 = annotations[anno]['annotator1'][0]
        anno2 = annotations[anno]['annotator2'][0]
        if anno1 != anno2:
            if anno2 in one2two[anno1]:
                one2two[anno1][anno2] += 1
            else:
                one2two[anno1][anno2] = 1
            
            if anno1 in two2one[anno2]:
                two2one[anno2][anno1] += 1
            else:
                two2one[anno2][anno1] = 1

    return one2two, two2one

In [ ]:
das_annotations = anaphoricity_agreement(TOSIK_DIR, DITTRICH_DIR, 'das')
one2two, two2one = generate_disagreement_matrices(das_annotations)

In [ ]:
one2two

rows = one2two.keys()
print rows

cols = set(k for v in one2two.values() for k in v.keys())
print cols

In [ ]:
one2two_df = pd.DataFrame(np.zeros((len(rows), len(cols)), dtype=np.int), columns=cols, index=rows)
one2two_df

In [ ]:
for anno in one2two:
    for disagree_anno, disagree_count in one2two[anno].items():
        one2two_df[disagree_anno][anno] = disagree_count

In [ ]:
one2two_plot = one2two_df.plot(kind='bar', stacked=True)
one2two_plot.set_xlabel("Annotator 1 annotated ...")
one2two_plot.set_ylabel("but Annotator 2 disagreed")

In [ ]:
two2one
rows = two2one.keys()
cols = set(k for v in two2one.values() for k in v.keys())
two2one_df = pd.DataFrame(np.zeros((len(rows), len(cols)), dtype=np.int), columns=cols, index=rows)
for anno in two2one:
    for disagree_anno, disagree_count in two2one[anno].items():
        two2one_df[disagree_anno][anno] = disagree_count

two2one_plot = two2one_df.plot(kind='bar', stacked=True)
two2one_plot.set_xlabel("Annotator 2 annotated ...")
two2one_plot.set_ylabel("but Annotator 1 disagreed")

Possible improvements


In [ ]: