In [ ]:
import math
import pandas as pd
In [ ]:
def entropy(df, attribute):
"""
Calculates the entropy of a dataframe for the passed attribute.
:param df: DataFrame
:attribute: the attribute we want to calculate the entropy on
returns the entropy of the attribute
"""
entropy = 0.0
freq = {}
for ii in range(len(df)):
row = df.iloc[[ii]]
attribute_val = row[attribute].values[0]
if (freq.has_key(attribute_val)):
freq[attribute_val] += 1.0
else:
freq[attribute_val] = 1.0
N = len(df)
for freqi in freq.values():
entropy += (-freqi/N) * math.log(freqi/N, 2)
return entropy
In [ ]: