In [1]:
from pyknp import Juman,Jumanpp,KNP
In [2]:
# Create instance of Juman
juman = Juman()
In [3]:
str = "巨大なマーライオンがセントーサ島にいた"
result = juman.analysis(str)
In [4]:
for mp in result.mrph_list():
print("見出し:{}, 読み:{}, 原形:{}, 品詞:{}, 品詞細分類:{}, 活用型:{}, 活用形:{}, 意味情報:{}, 代表表記:{}".format(
mp.midasi, mp.yomi, mp.genkei, mp.hinsi, mp.bunrui, mp.katuyou1, mp.katuyou2, mp.imis, mp.repname))
In [5]:
import pandas as pd
mps = [(mp.midasi, mp.yomi, mp.genkei, mp.hinsi, mp.bunrui, mp.katuyou1, mp.katuyou2, mp.imis, mp.repname) for mp in result.mrph_list()]
clmns = ('見出し','読み', '原形', '品詞', '品詞細分類', '活用型', '活用形', '意味情報', '代表表記')
pd.DataFrame(mps,columns=clmns)
Out[5]:
In [6]:
# Create instance of KNP
knp = KNP(option = '-tab -anaphora')
In [7]:
# Generates phrase objects
result = knp.parse(str)
ph_objs = result.bnst_list()
In [8]:
# Create phrase dictionary
ph_dic = {}
for ph_obj in ph_objs:
phrase_list = []
for mrph in ph_obj._mrph_list._mrph:
phrase_list.append(mrph.midasi)
ph_dic[ph_obj.bnst_id] = {"phrase_list":phrase_list, "phrase":''.join(phrase_list), "parent_id": ph_obj.parent_id}
In [9]:
# Show dependency of phrases
for p in ph_dic.items():
if p[1]['parent_id'] > 0:
print(p[1]['phrase'], '=>', ph_dic[p[1]['parent_id']]['phrase'])
In [10]:
# Show dependency of phrases (list representation)
for p in ph_dic.items():
if p[1]['parent_id'] > 0:
print(p[1]['phrase_list'], '=>', ph_dic[p[1]['parent_id']]['phrase_list'])