In [1]:
import os
from lxml import etree
urml_filepath = 'urml-pq-041104.xml'
# urml_filepath = os.path.join(dg.DATA_ROOT_DIR, 'urml-example.xml')
In [2]:
%load_ext gvmagic
In [3]:
import discoursegraphs as dg
In [16]:
urml_corpus = dg.read_urml(urml_filepath, tokenize=False)
In [17]:
len(urml_corpus)
Out[17]:
In [6]:
udg = urml_corpus.next()
%dotstr dg.print_dot(udg)
In [7]:
udg.node[udg.root]
Out[7]:
In [8]:
udg.root
Out[8]:
In [10]:
urml_minimal_doc = """
<document id="maz3377">
<info>
</info>
<text>
<segment id="maz3377.1">Erst rührt sich niemand unter den Dallgower Kommunalpolitikern , </segment>
<segment id="maz3377.2">nun überschlagen sich alle mit Anträgen zur Gemeindereform und den vorausgehenden Verhandlungen mit den südlichen Nachbarn . </segment>
</text>
<analysis status="interpretation">
<parRelation id="maz3377.1000" type="sequential">
<nucleus id="maz3377.1"/>
<nucleus id="maz3377.2"/>
</parRelation>
</analysis>
</document>
"""
In [11]:
from lxml import etree
urml_min_doc_etree = etree.fromstring(urml_minimal_doc)
udg_min = dg.readwrite.URMLDocumentGraph(urml_min_doc_etree, tokenize=False)
In [12]:
%dotstr dg.print_dot(udg_min)
In [ ]: