In [8]:
from collections import defaultdict, namedtuple

class Node:
    def __init__(self, index, word):
        self.is_root = True
        self.has_child = False
        self.index = index
        self.word = word
        self.parent = 0
        self.children = []
        
    def set_parent(self, parent):
        assert isinstance(parent, Node)
        self.parent = parent
        self.is_root = False
        
    def add_children(self, children):
        assert isinstance(children, Node)
        children.set_parent(self)
        self.children.append(children)
        self.has_child = True
     
    @property
    def show_parent(self):
        if self.is_root:
            print('This is the root node!')
        else:
            print('Parent of Node %s : %s' % (self.word, str(self.parent.word)))
            
    @property
    def show_children(self):
        if self.has_child:
            for node in self.children:
                print('Children of Node %s : %s' %(self.word, str(node.word)))
        else:
            print('This node have no children')
    
    @property
    def show(self):
        print('------------------------------------------')
        self.show_parent
        print('------------------------------------------')
        print('Current Word : ', self.word)
        print('------------------------------------------')
        self.show_children
        print('------------------------------------------')

    def get_parent(self):
        assert isinstance(parent, Node)
        return self.parent
    
    def get_children(self):
        return self.children

In [9]:
IndexWord = namedtuple('IndexWord', 'index word')

class ParseTree:
    def __init__(self):
        self.num_layer = 1
        self.root_node = None
        self.node_list = []
        self.rela_dict = {}
        
    def zip_dict(self, index_list, word_list):
        word_list = zip(range(len(word_list)), word_list)
        word_list = [IndexWord._make(x) for x in word_list]
        word_dict = defaultdict(list)
        for index,word in zip(index_list, word_list):
            word_dict[index].append(word)
        return word_dict
    def szip_dict(self, word_list, index_list):
        word_dict = defaultdict(list)
        for index,word in zip(index_list, word_list):
            word_dict[index].append(word)
        return word_dict
    
    def trans_index(self, index_list):
        return [x-1 for x in index_list]

    def parse(self, index_list, word_list):
        self.node_list = [Node(i, word_list[i]) for i in range(len(word_list))]
        index_list = self.trans_index(index_list) 
        self.rela_dict = self.szip_dict(range(len(index_list)), index_list)
        for i in self.rela_dict.keys():
            if i!=-1:
                children_index = self.rela_dict[i]
                for children in children_index:
                    self.node_list[i].add_children(self.node_list[children])
        self.root_node = [x for x in self.node_list if x.is_root][0]

In [10]:
a = Node(1, "我们")
b = Node(2, '你们')
c = Node(3, '他们')
d = Node(4, '出去玩')

a.add_children(b)
a.add_children(c)
b.add_children(d)
a.show


------------------------------------------
This is the root node!
------------------------------------------
Current Word :  我们
------------------------------------------
Children of Node 我们 : 你们
Children of Node 我们 : 他们
------------------------------------------

In [11]:
def show_children_tree(node):
    assert isinstance(node, Node)
    for child in node.children:
        if child.has_child:
            return show_children_tree(child)
        else:
            child.show
            
show_children_tree(a)


------------------------------------------
Parent of Node 出去玩 : 你们
------------------------------------------
Current Word :  出去玩
------------------------------------------
This node have no children
------------------------------------------

In [12]:
word_list = ['蛮', '好', '一直', '信赖', '宝宝', '很', '喜欢']
index_list = [4, 4, 4, 0, 4, 7, 5]

a = ParseTree()
a.parse(index_list, word_list)

In [13]:
a.rela_dict


Out[13]:
defaultdict(list, {-1: [3], 3: [0, 1, 2, 4], 4: [6], 6: [5]})

In [14]:
a.root_node.show


------------------------------------------
This is the root node!
------------------------------------------
Current Word :  信赖
------------------------------------------
Children of Node 信赖 : 蛮
Children of Node 信赖 : 好
Children of Node 信赖 : 一直
Children of Node 信赖 : 宝宝
------------------------------------------

In [15]:
for node in a.node_list:
    node.show


------------------------------------------
Parent of Node 蛮 : 信赖
------------------------------------------
Current Word :  蛮
------------------------------------------
This node have no children
------------------------------------------
------------------------------------------
Parent of Node 好 : 信赖
------------------------------------------
Current Word :  好
------------------------------------------
This node have no children
------------------------------------------
------------------------------------------
Parent of Node 一直 : 信赖
------------------------------------------
Current Word :  一直
------------------------------------------
This node have no children
------------------------------------------
------------------------------------------
This is the root node!
------------------------------------------
Current Word :  信赖
------------------------------------------
Children of Node 信赖 : 蛮
Children of Node 信赖 : 好
Children of Node 信赖 : 一直
Children of Node 信赖 : 宝宝
------------------------------------------
------------------------------------------
Parent of Node 宝宝 : 信赖
------------------------------------------
Current Word :  宝宝
------------------------------------------
Children of Node 宝宝 : 喜欢
------------------------------------------
------------------------------------------
Parent of Node 很 : 喜欢
------------------------------------------
Current Word :  很
------------------------------------------
This node have no children
------------------------------------------
------------------------------------------
Parent of Node 喜欢 : 宝宝
------------------------------------------
Current Word :  喜欢
------------------------------------------
Children of Node 喜欢 : 很
------------------------------------------

In [9]:
isinstance(a.node_list[6].children[0], Node)
# a.root_node.children[0].show_children()
a.root_node.children[3].children[0].show()


Parent of Node 喜欢 : 宝宝
Word :  喜欢
children of Node 喜欢 : 很

In [183]:
import pandas as pd

data = pd.read_table('/home/jeffmxh/models/syntaxnet/parse_output.conll', header = None)

In [189]:
data = data.iloc[:,[0,1,3,4,5,6,7]]
# data.columns = ['index', 'word', 'pos', '']
data.head(10)


Out[189]:
0 1 3 4 5 6 7
0 1 PROPN NNP fPOS=PROPN++NNP 4 nsubj
1 2 ADV RB fPOS=ADV++RB 4 advmod
2 3 一直 ADV RB fPOS=ADV++RB 4 advmod
3 4 信赖 VERB VV fPOS=VERB++VV 0 ROOT
4 5 宝宝 VERB VV fPOS=VERB++VV 4 xcomp
5 6 ADV RB fPOS=ADV++RB 7 advmod
6 7 喜欢 VERB VV fPOS=ADJ++JJ 5 xcomp
7 1 最初 NOUN NN fPOS=NOUN++NN 2 nmod:tmod
8 2 VERB VC fPOS=VERB++VC 0 ROOT
9 3 VERB VV fPOS=VERB++VV 7 acl

In [191]:
a = list(data.loc[:,7])
set(a)


Out[191]:
{'ROOT',
 'acl',
 'acl:relcl',
 'advcl',
 'advmod',
 'amod',
 'appos',
 'aux',
 'aux:caus',
 'auxpass',
 'case',
 'case:aspect',
 'case:dec',
 'case:pref',
 'case:suff',
 'cc',
 'ccomp',
 'conj',
 'cop',
 'csubj',
 'dep',
 'det',
 'discourse',
 'dislocated',
 'dobj',
 'foreign',
 'iobj',
 'mark',
 'neg',
 'nmod',
 'nmod:tmod',
 'nsubj',
 'nsubjpass',
 'nummod',
 'punct',
 'root',
 'vocative',
 'xcomp'}