In [1]:
from janome.tokenizer import Tokenizer
In [2]:
t = Tokenizer()
In [3]:
s = 'すもももももももものうち'
In [4]:
print(type(t.tokenize(s)))
In [5]:
print(type(t.tokenize(s)[0]))
In [6]:
for token in t.tokenize(s):
print(token)
In [7]:
print(type(t.tokenize(s, stream=True)))
In [8]:
for token in t.tokenize(s, stream=True):
print(token)
In [9]:
token = t.tokenize('走れ')[0]
In [10]:
print(type(token))
In [11]:
print(token)
In [12]:
print(token.surface)
In [13]:
print(token.part_of_speech)
In [14]:
print(token.part_of_speech.split(','))
In [15]:
print(token.part_of_speech.split(',')[0])
In [16]:
print(token.infl_type)
In [17]:
print(token.infl_form)
In [18]:
print(token.base_form)
In [19]:
print(token.reading)
In [20]:
print(token.phonetic)
In [21]:
s = '走れと言われたので走ると言った'
In [22]:
for token in t.tokenize(s):
print(token)
In [23]:
print(t.tokenize(s, wakati=True))
In [24]:
t_wakati = Tokenizer(wakati=True)
In [25]:
print(t_wakati.tokenize(s))
In [26]:
print([token.surface for token in t.tokenize(s)])
In [27]:
print([token.base_form for token in t.tokenize(s)])
In [28]:
print([token.part_of_speech.split(',')[0] for token in t.tokenize(s)])
In [29]:
print([token.surface for token in t.tokenize(s)
if token.part_of_speech.startswith('動詞')])
In [30]:
print([token.surface for token in t.tokenize(s)
if not token.part_of_speech.startswith('動詞')])
In [31]:
print([token.surface for token in t.tokenize(s)
if token.part_of_speech.startswith('動詞,自立')])
In [32]:
print([token.surface for token in t.tokenize(s)
if token.part_of_speech.split(',')[0] in ['動詞', '助動詞']])