In [8]:
from collections import defaultdict, namedtuple
class Node:
def __init__(self, index, word):
self.is_root = True
self.has_child = False
self.index = index
self.word = word
self.parent = 0
self.children = []
def set_parent(self, parent):
assert isinstance(parent, Node)
self.parent = parent
self.is_root = False
def add_children(self, children):
assert isinstance(children, Node)
children.set_parent(self)
self.children.append(children)
self.has_child = True
@property
def show_parent(self):
if self.is_root:
print('This is the root node!')
else:
print('Parent of Node %s : %s' % (self.word, str(self.parent.word)))
@property
def show_children(self):
if self.has_child:
for node in self.children:
print('Children of Node %s : %s' %(self.word, str(node.word)))
else:
print('This node have no children')
@property
def show(self):
print('------------------------------------------')
self.show_parent
print('------------------------------------------')
print('Current Word : ', self.word)
print('------------------------------------------')
self.show_children
print('------------------------------------------')
def get_parent(self):
assert isinstance(parent, Node)
return self.parent
def get_children(self):
return self.children
In [9]:
IndexWord = namedtuple('IndexWord', 'index word')
class ParseTree:
def __init__(self):
self.num_layer = 1
self.root_node = None
self.node_list = []
self.rela_dict = {}
def zip_dict(self, index_list, word_list):
word_list = zip(range(len(word_list)), word_list)
word_list = [IndexWord._make(x) for x in word_list]
word_dict = defaultdict(list)
for index,word in zip(index_list, word_list):
word_dict[index].append(word)
return word_dict
def szip_dict(self, word_list, index_list):
word_dict = defaultdict(list)
for index,word in zip(index_list, word_list):
word_dict[index].append(word)
return word_dict
def trans_index(self, index_list):
return [x-1 for x in index_list]
def parse(self, index_list, word_list):
self.node_list = [Node(i, word_list[i]) for i in range(len(word_list))]
index_list = self.trans_index(index_list)
self.rela_dict = self.szip_dict(range(len(index_list)), index_list)
for i in self.rela_dict.keys():
if i!=-1:
children_index = self.rela_dict[i]
for children in children_index:
self.node_list[i].add_children(self.node_list[children])
self.root_node = [x for x in self.node_list if x.is_root][0]
In [10]:
a = Node(1, "我们")
b = Node(2, '你们')
c = Node(3, '他们')
d = Node(4, '出去玩')
a.add_children(b)
a.add_children(c)
b.add_children(d)
a.show
In [11]:
def show_children_tree(node):
assert isinstance(node, Node)
for child in node.children:
if child.has_child:
return show_children_tree(child)
else:
child.show
show_children_tree(a)
In [12]:
word_list = ['蛮', '好', '一直', '信赖', '宝宝', '很', '喜欢']
index_list = [4, 4, 4, 0, 4, 7, 5]
a = ParseTree()
a.parse(index_list, word_list)
In [13]:
a.rela_dict
Out[13]:
In [14]:
a.root_node.show
In [15]:
for node in a.node_list:
node.show
In [9]:
isinstance(a.node_list[6].children[0], Node)
# a.root_node.children[0].show_children()
a.root_node.children[3].children[0].show()
In [183]:
import pandas as pd
data = pd.read_table('/home/jeffmxh/models/syntaxnet/parse_output.conll', header = None)
In [189]:
data = data.iloc[:,[0,1,3,4,5,6,7]]
# data.columns = ['index', 'word', 'pos', '']
data.head(10)
Out[189]:
In [191]:
a = list(data.loc[:,7])
set(a)
Out[191]: