Polyglot is a natural language pipeline that supports massive multilingual applications.
In [9]:
import polyglot
from polyglot.text import Text, Word
In [10]:
text = Text("Bonjour, Mesdames.")
print("Language Detected: Code={}, Name={}\n".format(text.language.code, text.language.name))
In [11]:
zen = Text("Beautiful is better than ugly. "
"Explicit is better than implicit. "
"Simple is better than complex.")
print(zen.words)
In [12]:
print(zen.sentences)
In [13]:
text = Text(u"O primeiro uso de desobediência civil em massa ocorreu em setembro de 1906.")
print("{:<16}{}".format("Word", "POS Tag")+"\n"+"-"*30)
for word, tag in text.pos_tags:
print(u"{:<16}{:>2}".format(word, tag))
In [14]:
text = Text(u"In Großbritannien war Gandhi mit dem westlichen Lebensstil vertraut geworden")
print(text.entities)
In [15]:
print("{:<16}{}".format("Word", "Polarity")+"\n"+"-"*30)
for w in zen.words[:6]:
print("{:<16}{:>2}".format(w, w.polarity))
In [19]:
word = Word("Obama", language="en")
print("Neighbors (Synonms) of {}".format(word)+"\n"+"-"*30)
for w in word.neighbors:
print("{:<16}".format(w))
print("\n\nThe first 10 dimensions out the {} dimensions\n".format(word.vector.shape[0]))
print(word.vector[:10])
In [17]:
word = Text("Preprocessing is an essential step.").words[0]
print(word.morphemes)
In [18]:
from polyglot.transliteration import Transliterator
transliterator = Transliterator(source_lang="en", target_lang="ru")
print(transliterator.transliterate(u"preprocessing"))