In [26]:
from nltk.corpus.reader import TaggedCorpusReader
from nltk.tag import BigramTagger
from nltk.tag import TrigramTagger
from nltk.tag import UnigramTagger
from nltk.tokenize import wordpunct_tokenize

In [28]:
#trainer
reader = TaggedCorpusReader('.', 'latin_training_set.pos')
train_sents = reader.tagged_sents()

In [31]:
tagger1 = UnigramTagger(train_sents)

In [32]:
tagger2 = BigramTagger(train_sents, backoff=tagger1)

In [33]:
tagger3 = TrigramTagger(train_sents, backoff=tagger2)

In [35]:
tagger3.evaluate(train_sents)


Out[35]:
0.9796586568315676