In [1]:
from nltk.stem.snowball import SnowballStemmer
See which languages are supported.
In [2]:
print(" ".join(SnowballStemmer.languages))
Create a new instance of a language specific subclass.
In [3]:
stemmer = SnowballStemmer("english")
Stem a word.
In [4]:
print(stemmer.stem("running"))
Decide not to stem stopwords.
In [7]:
stemmer2 = SnowballStemmer("english", ignore_stopwords=True)
# if you get a Resource not found error, follow these steps http://www.nltk.org/data.html
# The easiest thing to do is to just download all of the packages ... but it will take awhile
In [8]:
print(stemmer.stem("having"))
print(stemmer2.stem("having")) #ignores stopwords
The 'english' stemmer is better than the original 'porter' stemmer.
In [10]:
print(SnowballStemmer("english").stem("generously"))
print(SnowballStemmer("porter").stem("generously"))