notebook.community

Edit and run



In [1]:

    
# Conditional Frequency Distribution
# on nltk for a given list of tuples gives the 
import nltk



In [2]:

    
names = [('Group A', 'Hari'), ('Group A', 'Bhavu'), ('Group A', 'Siri'), 
         ('Group B', 'Ajith'), ('Group B', 'Kittu'), ('Group B', 'Kittu'), ('Group B', 'Laks')]



In [3]:

    
# When we run .FreqDist(), the distribution listed with by tuples
nltk.FreqDist(names)









    Out[3]:





FreqDist({('Group A', 'Bhavu'): 1,
          ('Group A', 'Hari'): 1,
          ('Group A', 'Siri'): 1,
          ('Group B', 'Ajith'): 1,
          ('Group B', 'Kittu'): 2,
          ('Group B', 'Laks'): 1})



In [4]:

    
# When we used .ConditionalFreqDist() it grouped by first string with FreqDist as key with second string of tuple
# and value as the counter of it
nltk.ConditionalFreqDist(names)









    Out[4]:





ConditionalFreqDist(nltk.probability.FreqDist,
                    {'Group A': FreqDist({'Bhavu': 1, 'Hari': 1, 'Siri': 1}),
                     'Group B': FreqDist({'Ajith': 1, 'Kittu': 2, 'Laks': 1})})



In [ ]: