In [3]:
from konlpy.tag import Twitter
result = Twitter().pos("나무위키 말뭉치를 만들어보자")
for pos in result:
    print(pos[0] + ' ' + pos[1])


나무 Noun
위키 Noun
말뭉치 Noun
를 Josa
만들어 Verb
보자 Verb

In [9]:
def flat(content):
    return ["{}/{}".format(word, tag) for word, tag in tagger.pos(content)]

In [13]:
tagged = flat(" 나무위키 말뭉치를 만들어보자")

In [14]:
' '.join(tagged)


Out[14]:
'나무/Noun 위키/Noun 말뭉치/Noun 를/Josa 만들어/Verb 보자/Verb'

In [4]:
input_filename = '/Users/swkim/Data/namuwiki180326/_namuwiki_20180326_mini.txt'
output_filename = '/Users/swkim/Data/namuwiki180326/_namuwiki_20180326_mini_pos_tagged_corpus.txt'

In [5]:
tagger = Twitter()

In [ ]:
with open(output_filename, 'w', encoding='utf-8') as output_file:
    for line in open(input_filename, 'r', encoding='utf-8'):
        for sentence in line.split('.'):
            tagged = flat(sentence)
            if len(tagged) > 1:
                a_line = ' '.join(tagged)
                output_file.write(a_line + '\n')

In [ ]: