In [7]:
import os
import sys
import glob
import discoursegraphs as dg

In [9]:
MMAX_TESTFILE = os.path.expanduser('~/corpora/potsdam-commentary-corpus-2.0.0/coreference/maz-1423.mmax')
mdg = dg.read_mmax2(MMAX_TESTFILE)

In [13]:
MMAX_DIR = os.path.expanduser('~/corpora/potsdam-commentary-corpus-2.0.0/coreference/')
CONLL_DIR = os.path.expanduser('/home/arne/repos/pcc-annis-merged/maz176/pocores/expected-coreference-conll/')

In [12]:
# for mmax_file in glob.glob(os.path.join(MMAX_DIR, '*.mmax')):
#     dg.write_brat(mdg, '/tmp/brat/mmax2brat', show_relations=False)

In [18]:
# for conll_file in glob.glob(os.path.join(CONLL_DIR, '*.expected')):
#     cdg = dg.read_conll(conll_file)
#     dg.write_brat(cdg, '/tmp/brat/conll2brat', show_relations=False)

In [19]:
CONLL_TESTFILE = os.path.join(CONLL_DIR, 'maz-1423.expected')

In [20]:
cdg = dg.read_conll(CONLL_TESTFILE)

In [21]:
dg.info(cdg)


Name: maz-1423.expected
Type: ConllDocumentGraph
Number of nodes: 218
Number of edges: 201
Average in degree:   0.9220
Average out degree:   0.9220 

Node statistics
===============

number of nodes with layers
	conll - 217
	conll:token - 185
	conll:sentence - 16
	discoursegraph - 1

number of nodes with attributes
	layers - 218
	plemma - 185
	word_pos - 185
	conll:token - 185
	head - 185
	pred - 185
	pos - 185
	fillpred - 185
	pfeat - 185
	lemma - 185
	token - 185
	phead - 185
	label - 185
	sent_pos - 185
	ppos - 185
	pdeprel - 185
	feat - 185
	deprel - 185
	word_id - 185
	tokens - 16
	metadata - 1

Edge statistics
===============

number of edges with layers
	conll - 201
	conll:dependency - 185
	conll:sentence - 16

number of edges with attributes
	layers - 201
	edge_type - 201
	relation_type - 185
	label - 185

most common source edges
	s1_t_ - 24
	s13_t_ - 20
	s14_t_ - 20
	conll:root_node - 16
	s6_t_ - 15

most common target edges
	s7_t12 - 1
	s7_t10 - 1
	s7_t11 - 1
	s7_t4 - 1
	s7_t5 - 1

In [22]:
dg.get_pointing_chains(cdg)


Out[22]:
[]

In [ ]: