In [1]:
from nltk.corpus.reader import TaggedCorpusReader
from nltk.tag import AffixTagger
import pickle

In [2]:
#trainer
reader = TaggedCorpusReader('.', 'greek_training_set.pos')
train_sents = reader.tagged_sents()

2-char prefix tagger


In [3]:
two_prefix_tagger = AffixTagger(train_sents, affix_length=2)

In [4]:
two_prefix_tagger.evaluate(train_sents)


Out[4]:
0.1399196687464037

3-char prefix tagger


In [5]:
three_prefix_tagger = AffixTagger(train_sents, affix_length=3)

In [6]:
three_prefix_tagger.evaluate(train_sents)


Out[6]:
0.166630938815114

4-char prefix tagger


In [7]:
four_prefix_tagger = AffixTagger(train_sents, affix_length=4)

In [8]:
four_prefix_tagger.evaluate(train_sents)


Out[8]:
0.16541243103584444