In [1]:
# Conditional Frequency Distribution
# on nltk for a given list of tuples gives the 
import nltk

In [2]:
names = [('Group A', 'Hari'), ('Group A', 'Bhavu'), ('Group A', 'Siri'), 
         ('Group B', 'Ajith'), ('Group B', 'Kittu'), ('Group B', 'Kittu'), ('Group B', 'Laks')]

In [3]:
# When we run .FreqDist(), the distribution listed with by tuples
nltk.FreqDist(names)


Out[3]:
FreqDist({('Group A', 'Bhavu'): 1,
          ('Group A', 'Hari'): 1,
          ('Group A', 'Siri'): 1,
          ('Group B', 'Ajith'): 1,
          ('Group B', 'Kittu'): 2,
          ('Group B', 'Laks'): 1})

In [4]:
# When we used .ConditionalFreqDist() it grouped by first string with FreqDist as key with second string of tuple
# and value as the counter of it
nltk.ConditionalFreqDist(names)


Out[4]:
ConditionalFreqDist(nltk.probability.FreqDist,
                    {'Group A': FreqDist({'Bhavu': 1, 'Hari': 1, 'Siri': 1}),
                     'Group B': FreqDist({'Ajith': 1, 'Kittu': 2, 'Laks': 1})})

In [ ]: