In [1]:
# Bigrams are the words that come next to each other or two consecutive words in a sentence
# Same as Bigrams, trigrams are those whose 3 words are consecutive in a sentnce
# Now for ngrams, those are n consecutive words in a given sentence

In [2]:
# Importing nltk
import nltk

In [3]:
# Taking the text/sentence to process
text1 = "I think it might rain today."

In [4]:
# Tokenizing all the words in the text
tokens = nltk.word_tokenize(text1)

In [5]:
# Getting the bigrams for the tokens
bigrams = nltk.bigrams(tokens)

In [6]:
# Iterating through bigrams and printing the same
# Obs: We can see that each item is a tuple with two strings, which are consecutive
for item in bigrams:
    print(item)


('I', 'think')
('think', 'it')
('it', 'might')
('might', 'rain')
('rain', 'today')
('today', '.')

In [7]:
# Now lets see the trigrams for same text, words of tokens
trigrams = nltk.trigrams(tokens)

In [8]:
# Iterating through the trigrams and printing the same
# Obs: We can see that each item is a tuple with 3 strings, which are consecutive
for item in trigrams:
    print(item)


('I', 'think', 'it')
('think', 'it', 'might')
('it', 'might', 'rain')
('might', 'rain', 'today')
('rain', 'today', '.')

In [9]:
# Now using the generic ngrams
from nltk.util import ngrams

In [10]:
# ngrams methods accepts tokens and number Ex: for bigrams-> 2, trigrams -> 3
# getting bigrams from ngrams
bigrams = ngrams(tokens, 2)

In [11]:
for item in bigrams:
    print(item)


('I', 'think')
('think', 'it')
('it', 'might')
('might', 'rain')
('rain', 'today')
('today', '.')

In [12]:
# getting trigrams from ngrams
trigrams = ngrams(tokens, 3)

In [13]:
for item in trigrams:
    print(item)


('I', 'think', 'it')
('think', 'it', 'might')
('it', 'might', 'rain')
('might', 'rain', 'today')
('rain', 'today', '.')

Like this you can go from bigrams to ngrams, by giving numbers from 2 to n.