In [1]:
from heapq import heapify, heappush, heappop
from collections import defaultdict
global codes
codes = dict()
class HuffNode(object):
def __lt__(self, other):
return self.weight < other.weight
def __init__(self, weight, ltr=None):
self.weight = weight
self.ltr = ltr
self.left = None
self.right = None
def calculate_frequencies(string):
freqs = defaultdict(int)
for c in string:
freqs[c] += 1
return freqs
def build_tree(frequencies):
heap = []
for ltr, weight in frequencies.items():
node = HuffNode(weight, ltr)
heappush(heap, node)
while len(heap) > 1:
n1 = heappop(heap)
n2 = heappop(heap)
weight = n1.weight + n2.weight
root = HuffNode(weight)
root.left = n1
root.right = n2
heappush(heap, root)
tree = heappop(heap)
return tree
def generate_codes(node, code=''):
if node.ltr: #leaf
codes[node.ltr] = (node.weight, code)
else:
code += '0'
generate_codes(node.left, code)
code = code[:-1]
code += '1'
generate_codes(node.right, code)
code = code[:-1]
def print_coding_table(code_dict):
print("Symbol\tWeight\tCode")
for symbol, code in sorted(code_dict.items(), key=lambda item: item[1][0], reverse=True):
print(symbol, *code, sep='\t')
def encode(code_dict, msg):
return ''.join([code_dict[ltr][1] for ltr in msg])
def decode(tree, compressed):
current_node = tree
uncompressed = ''
for bit in compressed:
if bit == '0':
current_node = current_node.left
elif bit == '1':
current_node = current_node.right
if current_node.ltr:
uncompressed += current_node.ltr
current_node = tree
return uncompressed
test_string = 'this is an example for huffman encoding'
freqs = calculate_frequencies(test_string)
tree = build_tree(freqs)
generate_codes(tree)
print_coding_table(codes)
compressed = encode(codes, test_string)
print("\nCompressed:\n", compressed, sep='')
decompressed = decode(tree, compressed)
print("\nDecompressed:\n", decompressed, sep='')