notebook.community

Edit and run



In [1]:

    
from janome.tokenizer import Tokenizer
import collections



In [2]:

    
t = Tokenizer()



In [3]:

    
s = '人民の人民による人民のための政治'



In [4]:

    
for token in t.tokenize(s):
    print(token)









    



人民	名詞,一般,*,*,*,*,人民,ジンミン,ジンミン
の	助詞,連体化,*,*,*,*,の,ノ,ノ
人民	名詞,一般,*,*,*,*,人民,ジンミン,ジンミン
による	助詞,格助詞,連語,*,*,*,による,ニヨル,ニヨル
人民	名詞,一般,*,*,*,*,人民,ジンミン,ジンミン
の	助詞,連体化,*,*,*,*,の,ノ,ノ
ため	名詞,非自立,副詞可能,*,*,*,ため,タメ,タメ
の	助詞,連体化,*,*,*,*,の,ノ,ノ
政治	名詞,一般,*,*,*,*,政治,セイジ,セイジ



In [5]:

    
c = collections.Counter(t.tokenize(s, wakati=True))



In [6]:

    
print(type(c))









    



<class 'collections.Counter'>



In [7]:

    
print(c)









    



Counter({'人民': 3, 'の': 3, 'による': 1, 'ため': 1, '政治': 1})



In [8]:

    
print(c['人民'])



In [9]:

    
print(c['国民'])



In [10]:

    
mc = c.most_common()
print(mc)









    



[('人民', 3), ('の', 3), ('による', 1), ('ため', 1), ('政治', 1)]



In [11]:

    
print(mc[0][0])



In [12]:

    
print(mc[0][1])



In [13]:

    
words, counts = zip(*c.most_common())



In [14]:

    
print(words)









    



('人民', 'の', 'による', 'ため', '政治')



In [15]:

    
print(counts)









    



(3, 3, 1, 1, 1)



In [16]:

    
s = '走れと言われたので走ると言った'



In [17]:

    
print(collections.Counter(t.tokenize(s, wakati=True)))









    



Counter({'と': 2, 'た': 2, '走れ': 1, '言わ': 1, 'れ': 1, 'ので': 1, '走る': 1, '言っ': 1})



In [18]:

    
print(collections.Counter(token.base_form for token in t.tokenize(s)))









    



Counter({'走る': 2, 'と': 2, '言う': 2, 'た': 2, 'れる': 1, 'ので': 1})



In [19]:

    
print(type(token.base_form for token in t.tokenize(s)))









    



<class 'generator'>



In [20]:

    
print(collections.Counter(token.base_form for token in t.tokenize(s)
                          if token.part_of_speech.startswith('動詞,自立')))









    



Counter({'走る': 2, '言う': 2})



In [21]:

    
print(collections.Counter(token.part_of_speech.split(',')[0] for token in t.tokenize(s)))









    



Counter({'動詞': 5, '助詞': 3, '助動詞': 2})