In [3]:
%load_ext autoreload
%autoreload 2
In [12]:
import lxml.etree as et
from treedlib_util import load_sentences, tag_candidate
from tree_structs_ipynb import sentence_to_xmltree, XMLTree
dts = map(sentence_to_xmltree, load_sentences('test/test1.parsed.tsv'))
dt = dts[1]
tag_candidate(dt.root, ['Autosomal', 'dominant', 'polycystic', 'kidney', 'disease'], 'P1')
tag_candidate(dt.root, ['PKD1'], 'G1')
tag_candidate(dt.root, ['PKD2'], 'G2')
dt.to_str()
dt.render_tree()
root = dt.root
In [13]:
def root_tree_at(new_root):
"""
Given a node, remove all parents and add as children
so that this node becomes the new root
"""
# Check to see if the new root has any parents...
parents = new_root.xpath("..")
if len(parents) > 0:
p = root_tree_at(parents[0])
p.remove(new_root)
new_root.append(p)
return new_root
In [14]:
root = root_tree_at(root.xpath("//*[@cid='P1'][1]")[0])
t = XMLTree(root)
t.render_tree()
In [15]:
def flat_tree(root):
if root.get('dep_label') is not None:
s = '--%s--> %s' % (root.get('dep_label'), root.get('word'))
else:
s = root.get('word')
if len(root) > 0:
s += ' ( %s )' % ', '.join(filter(lambda x : x is not None, [flat_tree(c) for c in root]))
return s
In [16]:
ft = flat_tree(t.root)
print ft
In [81]:
re.search(r'[G](\s*--\w+-->\s*){0,5}[P]', ft)
In [ ]: