In [1]:
from collections import defaultdict, OrderedDict
from lxml import etree
from discoursegraphs import t
from discoursegraphs.readwrite.rst.rs3 import RSTTree, dt, extract_relationtypes, get_rs3_data
from discoursegraphs.readwrite.tree import get_position
from rstviewer import embed_rs3_image, embed_rs3str_image

In [2]:
RS3_EXAMPLE_FILE = '/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-1818.rs3'

Testing


In [3]:
embed_rs3_image(RS3_EXAMPLE_FILE)



In [4]:
RSTTree(RS3_EXAMPLE_FILE)


Out[4]:

In [5]:
RSTTree(RS3_EXAMPLE_FILE, word_wrap=10)


Out[5]:

In [6]:
RSTTree(RS3_EXAMPLE_FILE, word_wrap=15)[1][0]


Out[6]:

In [7]:
cdict, edict, ordered_edus = get_rs3_data(RS3_EXAMPLE_FILE)
edu_set = set(ordered_edus)

In [8]:
dt(cdict, edict, ordered_edus, start_node=None)


Out[8]:

In [9]:
dt(cdict, edict, ordered_edus, start_node='1') # OK


Out[9]:

In [10]:
dt(cdict, edict, ordered_edus, start_node='11') # OK


Out[10]:

In [11]:
dt(cdict, edict, ordered_edus, start_node='2') # OK


Out[11]:

In [12]:
dt(cdict, edict, ordered_edus, start_node='3') # OK


Out[12]:

In [13]:
dt(cdict, edict, ordered_edus, start_node='4') # OK


Out[13]:

In [14]:
dt(cdict, edict, ordered_edus, start_node='5') # OK


Out[14]:

In [15]:
dt(cdict, edict, ordered_edus, start_node='6') # OK


Out[15]:

In [16]:
dt(cdict, edict, ordered_edus, start_node='7') # OK


Out[16]:

In [17]:
dt(cdict, edict, ordered_edus, start_node='8') # OK


Out[17]:

In [18]:
dt(cdict, edict, ordered_edus, start_node='9') # OK


Out[18]:

In [19]:
dt(cdict, edict, ordered_edus, start_node='10') # OK


Out[19]:

In [20]:
dt(cdict, edict, ordered_edus, start_node='12') # OK


Out[20]:

In [21]:
dt(cdict, edict, ordered_edus, start_node='13') # OK


Out[21]:

In [22]:
dt(cdict, edict, ordered_edus, start_node='14') # OK


Out[22]:

In [23]:
dt(cdict, edict, ordered_edus, start_node='15') # OK


Out[23]:

In [24]:
dt(cdict, edict, ordered_edus, start_node='16') # OK


Out[24]:

In [25]:
dt(cdict, edict, ordered_edus, start_node='17') # OK


Out[25]:

In [26]:
dt(cdict, edict, ordered_edus, start_node='18') # OK


Out[26]:

In [27]:
dt(cdict, edict, ordered_edus, start_node='19') # OK


Out[27]:

In [28]:
dt(cdict, edict, ordered_edus, start_node='20') # OK


Out[28]:

TODOs

implement word-wrap

only show node number in debug mode (otherwise: 'N' or 'S')


In [29]:
RSTTree(RS3_EXAMPLE_FILE)


Out[29]:

TODO: illegal trees for Manfred


In [30]:
maz_4031 = '/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-4031.rs3'

In [31]:
# TODO: show Manfred. Should be illegal: concession(6-9 -> 5) and concession(10-11 -> 5)!
embed_rs3_image(maz_4031)



In [32]:
# maz_6728 = '/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-6728.rs3'

In [33]:
# TODO: show Manfred. Should be illegal: reason(6-8 -> 2-5) AND evidence(9-11 -> 2-5)
# embed_rs3_image(maz_6728)

In [34]:
maz_18377 = '/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-18377.rs3'

In [35]:
# embed_rs3_image(maz_18377)

In [36]:
RSTTree(maz_18377)


Out[36]:

In [37]:
maz_4031 = '/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-4031.rs3'

In [38]:
# embed_rs3_image(maz_4031)

In [39]:
# AssertionError: A multinuc group (33) should not have > 1 non-multinuc children:
# ['26', '30', '32', '22', '36']
maz_2669 = '/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-2669.rs3'

In [40]:
# embed_rs3_image(maz_2669)

In [41]:
# AssertionError: A multinuc group (19) should not have > 1 non-multinuc children: []
maz_14813 = '/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-14813.rs3'

In [42]:
cdict, edict, ordered_edus = get_rs3_data(maz_14813, word_wrap=20)
edu_set = set(ordered_edus)

In [43]:
# embed_rs3_image(maz_14813)

In [44]:
# AssertionError: A multinuc group (19) should not have > 1 non-multinuc children: []
maz_7220 = '/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-7220.rs3'

In [45]:
cdict, edict, ordered_edus = get_rs3_data(maz_7220, word_wrap=10)
edu_set = set(ordered_edus)

In [46]:
# embed_rs3_image(maz_7220)

In [47]:
dt(cdict, edict, ordered_edus, start_node='19')


Out[47]:

TODO: maybe implement simple tree unification for URML first

TODO: check all routines, that create t(foo, subtrees)

assert that all subtrees are in the same relname/reltype with the tree

  • if so --> t(foo, [(relname, subtrees)])

TODO: add code for node with more than one non-multinuc children

implement "relation stacking"


In [48]:
# FAIL: name of relation missing: conjunction(8, 11)
dt(cdict, edict, ordered_edus, start_node='12')


Out[48]:

In [49]:
cdict['12']


Out[49]:
['8', '11']

In [50]:
edict['12']


Out[50]:
defaultdict(str,
            {'element_type': 'group',
             'group_type': 'multinuc',
             'nuclearity': 'nucleus',
             'parent': '19',
             'relname': 'joint',
             'reltype': 'multinuc'})

In [51]:
edict['8']


Out[51]:
defaultdict(str,
            {'element_type': 'segment',
             'nuclearity': 'nucleus',
             'parent': '12',
             'relname': 'conjunction',
             'reltype': 'multinuc',
             'text': 'Weder\nmotivieren\nsolche Ein\nlassungen\ndie\neigenen\nSoldaten ,'})

In [52]:
import os
import traceback
from collections import Counter

from discoursegraphs.corpora import pcc

counter = Counter()
exceptions = Counter()
for rs3_file in pcc.rst:
    try:
        rt = RSTTree(rs3_file)
        counter["wins"] += 1
    except Exception as e:
        print rs3_file
        print("FAIL")
#         print traceback.format_exc()
        print e
        counter["fails"] += 1
#         exceptions[e.__class__.__name__] += 1
        exceptions[e] += 1
        print


/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-4031.rs3
FAIL
A span segment (5) should have one child: ['17', '20']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-6728.rs3
FAIL
A span group ('24') should not have > 2 children: ['10', '19', '23']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-16987.rs3
FAIL
A multinuc group (22) should not have > 1 non-multinuc children: ['6', '7']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-5873.rs3
FAIL
A span group ('25') should not have > 2 children: ['3', '19', '23']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-12666.rs3
FAIL
A multinuc group (25) should not have > 1 non-multinuc children: ['24', '26']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-11280.rs3
FAIL
A span segment (10) should have one child: ['9', '29']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-5297.rs3
FAIL
A span segment (3) should have one child: ['2', '17']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-5007.rs3
FAIL
A span group ('24') should not have > 2 children: ['13', '14', '25']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-10207.rs3
FAIL
A span segment (7) should have one child: ['6', '26']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-7690.rs3
FAIL
A span group ('23') should not have > 2 children: ['12', '24', '22']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-13125.rs3
FAIL
A span segment (4) should have one child: ['24', '25']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-8509.rs3
FAIL
A span group ('32') should not have > 2 children: ['26', '28', '31']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-15702.rs3
FAIL
A span segment (7) should have one child: ['22', '28']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-3367.rs3
FAIL
A span group ('32') should not have > 2 children: ['19', '30', '33', '36']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-11299.rs3
FAIL
A multinuc group (32) should not have > 1 non-multinuc children: ['25', '35']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-14590.rs3
FAIL
A span group ('19') should not have > 2 children: ['17', '18', '24']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-2669.rs3
FAIL
A multinuc group (33) should not have > 1 non-multinuc children: ['32', '36']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-12576.rs3
FAIL
A span segment (11) should have one child: ['32', '33']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-10902.rs3
FAIL
A multinuc group (21) should not have > 1 non-multinuc children: ['23', '27']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-5012.rs3
FAIL
A span group ('22') should not have > 2 children: ['13', '16', '17', '26']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-15609.rs3
FAIL
A multinuc group (23) should not have > 1 non-multinuc children: ['22', '24']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-14047.rs3
FAIL
A span group ('24') should not have > 2 children: ['22', '23', '28']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-00001.rs3
FAIL
A multinuc segment (18) should not have children: ['40']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-8361.rs3
FAIL
A span group ('21') should not have > 2 children: ['4', '19', '20']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-15734.rs3
FAIL
A multinuc group (23) should not have > 1 non-multinuc children: ['22', '30']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-12188.rs3
FAIL
A span group ('28') should not have > 2 children: ['23', '24', '26', '29']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-11735.rs3
FAIL
A span segment (2) should have one child: ['12', '16']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-16177.rs3
FAIL
A span group ('27') should not have > 2 children: ['17', '20', '24']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-11766.rs3
FAIL
A span group ('26') should not have > 2 children: ['2', '14', '21', '23', '25']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-14654.rs3
FAIL
A multinuc group (28) should not have > 1 non-multinuc children: ['25', '30', '31']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-15347.rs3
FAIL
A span segment (10) should have one child: ['9', '11']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-4403.rs3
FAIL
A span segment (6) should have one child: ['7', '18']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-6918.rs3
FAIL
A span segment (5) should have one child: ['6', '13', '16', '17']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-10205.rs3
FAIL
A span segment (5) should have one child: ['6', '23']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-2609.rs3
FAIL
A span segment (5) should have one child: ['22', '23']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-7967.rs3
FAIL
A span group ('32') should not have > 2 children: ['9', '29', '35']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-17539.rs3
FAIL
A span group ('17') should not have > 2 children: ['7', '16', '21']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-11916.rs3
FAIL
A multinuc group (16) should not have > 1 non-multinuc children: ['11', '19']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-11177.rs3
FAIL
A span group ('20') should not have > 2 children: ['6', '21', '25']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-6993.rs3
FAIL
A span segment (11) should have one child: ['10', '12']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-15970.rs3
FAIL
A span segment (6) should have one child: ['20', '28']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-9335.rs3
FAIL
A span group ('25') should not have > 2 children: ['8', '10', '11']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-13915.rs3
FAIL
A span group ('25') should not have > 2 children: ['5', '19', '23', '28']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-10110.rs3
FAIL
A span group ('19') should not have > 2 children: ['7', '18', '23']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-11670.rs3
FAIL
A span group ('28') should not have > 2 children: ['10', '17', '27']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-6159.rs3
FAIL
A span group ('37') should not have > 2 children: ['3', '5', '38']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-5144.rs3
FAIL
A span group ('26') should not have > 2 children: ['22', '25', '28']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-19436.rs3
FAIL
A span group ('28') should not have > 2 children: ['12', '27', '30']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-8665.rs3
FAIL
A multinuc group (26) should not have > 1 non-multinuc children: ['18', '29']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-14881.rs3
FAIL
A multinuc group (29) should not have > 1 non-multinuc children: ['25', '26']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-8838.rs3
FAIL
A span group ('19') should not have > 2 children: ['11', '25', '30']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-12473.rs3
FAIL
A span segment (10) should have one child: ['18', '20']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-9207.rs3
FAIL
A span segment (9) should have one child: ['8', '18']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-6165.rs3
FAIL
A span segment (7) should have one child: ['27', '28']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-1757.rs3
FAIL
A span segment (8) should have one child: ['18', '24']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-4472.rs3
FAIL
A multinuc group (15) should not have > 1 non-multinuc children: ['13', '19', '21']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-14853.rs3
FAIL
A span group ('23') should not have > 2 children: ['4', '28', '30']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-19372.rs3
FAIL
A span group ('39') should not have > 2 children: ['36', '38', '42']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-6046.rs3
FAIL
A span group ('35') should not have > 2 children: ['33', '34', '37']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-18354.rs3
FAIL
A multinuc group (27) should not have > 1 non-multinuc children: ['12', '20']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-9891.rs3
FAIL
A span group ('27') should not have > 2 children: ['19', '23', '26', '30']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-8981.rs3
FAIL
A multinuc group (25) should not have > 1 non-multinuc children: ['12', '20']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-9884.rs3
FAIL
A multinuc group (29) should not have > 1 non-multinuc children: ['18', '24']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-12383.rs3
FAIL
A span segment (5) should have one child: ['22', '28']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-8563.rs3
FAIL
A span segment (3) should have one child: ['24', '35']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-18333.rs3
FAIL
A span group ('22') should not have > 2 children: ['8', '9', '11']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-10575.rs3
FAIL
A multinuc group (29) should not have > 1 non-multinuc children: ['8', '10']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-9538.rs3
FAIL
A span segment (27) should have one child: ['10', '17']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-19012.rs3
FAIL
A span segment (13) should have one child: ['27', '34']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-16153.rs3
FAIL
A span group ('24') should not have > 2 children: ['6', '8', '22']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-8288.rs3
FAIL
A span group ('28') should not have > 2 children: ['8', '26', '27']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-17698.rs3
FAIL
A span segment (12) should have one child: ['21', '24']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-14055.rs3
FAIL
A span segment (11) should have one child: ['2', '25']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-15155.rs3
FAIL
A multinuc group (27) should not have > 1 non-multinuc children: ['10', '29']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-11279.rs3
FAIL
A span segment (7) should have one child: []

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-13758.rs3
FAIL
A span group ('32') should not have > 2 children: ['25', '27', '11']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-00002.rs3
FAIL
A multinuc group (20) should not have > 1 non-multinuc children: ['19', '5']

/usr/local/lib/python2.7/dist-packages/discoursegraphs-0.3.2-py2.7.egg/discoursegraphs/data/potsdam-commentary-corpus-2.0.0/rst/maz-18750.rs3
FAIL
A span group ('24') should not have > 2 children: ['10', '17', '21', '27']


In [53]:
print counter, "\n"
for e, count in exceptions.most_common():
    print e, count


Counter({'wins': 98, 'fails': 78}) 

A span segment (5) should have one child: ['22', '23'] 1
A span group ('35') should not have > 2 children: ['33', '34', '37'] 1
A span group ('28') should not have > 2 children: ['23', '24', '26', '29'] 1
A span segment (7) should have one child: ['6', '26'] 1
A span segment (10) should have one child: ['9', '29'] 1
A multinuc group (21) should not have > 1 non-multinuc children: ['23', '27'] 1
A span group ('32') should not have > 2 children: ['25', '27', '11'] 1
A span group ('22') should not have > 2 children: ['13', '16', '17', '26'] 1
A span segment (6) should have one child: ['7', '18'] 1
A multinuc group (23) should not have > 1 non-multinuc children: ['22', '30'] 1
A span segment (12) should have one child: ['21', '24'] 1
A span group ('22') should not have > 2 children: ['8', '9', '11'] 1
A span group ('26') should not have > 2 children: ['2', '14', '21', '23', '25'] 1
A multinuc segment (18) should not have children: ['40'] 1
A span segment (8) should have one child: ['18', '24'] 1
A span group ('27') should not have > 2 children: ['17', '20', '24'] 1
A span group ('39') should not have > 2 children: ['36', '38', '42'] 1
A multinuc group (20) should not have > 1 non-multinuc children: ['19', '5'] 1
A multinuc group (25) should not have > 1 non-multinuc children: ['24', '26'] 1
A span group ('27') should not have > 2 children: ['19', '23', '26', '30'] 1
A multinuc group (28) should not have > 1 non-multinuc children: ['25', '30', '31'] 1
A span group ('23') should not have > 2 children: ['4', '28', '30'] 1
A span group ('25') should not have > 2 children: ['3', '19', '23'] 1
A span segment (10) should have one child: ['9', '11'] 1
A span group ('24') should not have > 2 children: ['6', '8', '22'] 1
A span segment (7) should have one child: ['27', '28'] 1
A span group ('32') should not have > 2 children: ['26', '28', '31'] 1
A span segment (3) should have one child: ['24', '35'] 1
A span group ('17') should not have > 2 children: ['7', '16', '21'] 1
A multinuc group (23) should not have > 1 non-multinuc children: ['22', '24'] 1
A span group ('32') should not have > 2 children: ['19', '30', '33', '36'] 1
A span group ('19') should not have > 2 children: ['7', '18', '23'] 1
A multinuc group (32) should not have > 1 non-multinuc children: ['25', '35'] 1
A span segment (27) should have one child: ['10', '17'] 1
A span group ('28') should not have > 2 children: ['12', '27', '30'] 1
A span segment (11) should have one child: ['2', '25'] 1
A span group ('19') should not have > 2 children: ['17', '18', '24'] 1
A multinuc group (16) should not have > 1 non-multinuc children: ['11', '19'] 1
A span segment (10) should have one child: ['18', '20'] 1
A multinuc group (27) should not have > 1 non-multinuc children: ['12', '20'] 1
A span segment (5) should have one child: ['6', '23'] 1
A span group ('28') should not have > 2 children: ['10', '17', '27'] 1
A span segment (6) should have one child: ['20', '28'] 1
A multinuc group (27) should not have > 1 non-multinuc children: ['10', '29'] 1
A span group ('25') should not have > 2 children: ['8', '10', '11'] 1
A span segment (11) should have one child: ['32', '33'] 1
A multinuc group (15) should not have > 1 non-multinuc children: ['13', '19', '21'] 1
A span group ('23') should not have > 2 children: ['12', '24', '22'] 1
A span segment (4) should have one child: ['24', '25'] 1
A multinuc group (22) should not have > 1 non-multinuc children: ['6', '7'] 1
A span group ('24') should not have > 2 children: ['10', '17', '21', '27'] 1
A multinuc group (25) should not have > 1 non-multinuc children: ['12', '20'] 1
A span group ('24') should not have > 2 children: ['10', '19', '23'] 1
A span segment (5) should have one child: ['6', '13', '16', '17'] 1
A multinuc group (29) should not have > 1 non-multinuc children: ['18', '24'] 1
A multinuc group (29) should not have > 1 non-multinuc children: ['8', '10'] 1
A span segment (7) should have one child: ['22', '28'] 1
A span group ('37') should not have > 2 children: ['3', '5', '38'] 1
A span group ('24') should not have > 2 children: ['13', '14', '25'] 1
A span segment (2) should have one child: ['12', '16'] 1
A span group ('21') should not have > 2 children: ['4', '19', '20'] 1
A span group ('25') should not have > 2 children: ['5', '19', '23', '28'] 1
A span group ('20') should not have > 2 children: ['6', '21', '25'] 1
A span segment (3) should have one child: ['2', '17'] 1
A multinuc group (29) should not have > 1 non-multinuc children: ['25', '26'] 1
A span segment (11) should have one child: ['10', '12'] 1
A span segment (5) should have one child: ['22', '28'] 1
A span group ('24') should not have > 2 children: ['22', '23', '28'] 1
A span group ('26') should not have > 2 children: ['22', '25', '28'] 1
A span segment (5) should have one child: ['17', '20'] 1
A multinuc group (33) should not have > 1 non-multinuc children: ['32', '36'] 1
A span group ('19') should not have > 2 children: ['11', '25', '30'] 1
A span group ('32') should not have > 2 children: ['9', '29', '35'] 1
A span segment (13) should have one child: ['27', '34'] 1
A multinuc group (26) should not have > 1 non-multinuc children: ['18', '29'] 1
A span segment (9) should have one child: ['8', '18'] 1
A span segment (7) should have one child: [] 1
A span group ('28') should not have > 2 children: ['8', '26', '27'] 1

TODO: errors to look at

AssertionError("A span segment (5) should have one child: ['17', '20']",) AssertionError("A multinuc group (27) should not have > 1 non-multinuc children: ['5', '6', '7', '10', '28', '29']",)

bad annotation; show Manfred

ValueError("A span group ('21') should not have > 2 children: ['4', '19', '20']",)


In [ ]: