In [2]:

    
# lets start with the nltk package
from nltk.stem.snowball import SnowballStemmer
import nltk

Stemming- Snowball, try Porter



In [3]:

    
# Create a language specific stemmer instance
stemmer = SnowballStemmer("english")



In [4]:

    
print('running ---> {}'.format(stemmer.stem("running")))
print('expedition ---> {}'.format(stemmer.stem("expedition")))
print('resignation ---> {}'.format(stemmer.stem("resignation")))
print('Windows ---> {}'.format(stemmer.stem("Windows")))
print('assination ---> {}'.format(stemmer.stem("assination")))









    



running ---> run
expedition ---> expedit
resignation ---> resign
Windows ---> window
assination ---> assin

POS Tagging



In [ ]:

    
# A basic POS tagging
from nltk import pos_tag, word_tokenize
# sentence:
sentence = "Information Retrieval is the best course ever"
# 1. tokenize the words
text = word_tokenize(sentence)
# 2. Generate the Parts of Speech
print(pos_tag(text))



In [23]:

    
print('NN --> {}'.format("Noun"))
print('NNP --> {}'.format("Proper Noun"))
print('VBZ --> {}'.format("Verb"))
print('DT --> {}'.format("Determiner"))
print('JJS --> {}'.format("Adjective"))
print('RB --> {}'.format("Adverb"))









    



NN --> Noun
NNP --> Proper Noun
VBZ --> Verb
DT --> Determiner
JJS --> Adjective
RB --> Adverb

Chunking:



In [6]:

    
sentence = [("the", "DT"), ("little", "JJ"), ("black", "JJ"), ("cat", "NN"), ("meowed", "VBD"), 
            ("at", "IN"),  ("the", "DT"), ("dog", "NN")]
# Lets define a simple grammar using regular experssions
# Rule: 
## determiner (DT) is optional, it could be any number of adjectives (JJ) and a noun (NN)
grammar = "NP: {<DT>?<JJ>*<NN>}"

# Invoke the RegexParser and parse the sentence
cp = nltk.RegexpParser(grammar)
result = cp.parse(sentence)



In [14]:

    
print(result)









    



(S
  (NP the/DT little/JJ black/JJ cat/NN)
  meowed/VBD
  at/IN
  (NP the/DT dog/NN))