# Definition(s)

Huffman coding is a lossless data compression algorithm. The idea is to assign variable-length codes to input characters, lengths of the assigned codes are based on the frequencies of corresponding characters. The most frequent character gets the smallest code and the least frequent character gets the largest code.

# Algorithm(s)

``````

In [1]:

from collections import Counter
from operator import itemgetter

``````
``````

In [2]:

def find_min(queue):
elem = min(queue, key=itemgetter(0))
queue.remove(elem)
return elem

def huffman_coding(text):
queue = [(c, e) for (e, c) in Counter(text).items()]

while len(queue) > 1:
c1, e1 = find_min(queue)
c2, e2 = find_min(queue)
queue.append((c1 + c2, (e1, e2)))

assert len(queue) == 1
mapping = dict()
create_codes(queue[0][1], mapping)
return mapping

``````
``````

In [3]:

def create_codes(node, mapping, code=''):
if isinstance(node, tuple):
create_codes(node[0], mapping, code + '0')
create_codes(node[1], mapping, code + '1')
else:
mapping[node] = code

``````
``````

In [4]:

def encode(text):
mapping = huffman_coding(text)
encoded = ''.join(map(lambda c: mapping[c], text))
return encoded

def decode(encoded, mapping):
rev_mapping = dict((m, c) for c, m in mapping.items())
decoded = []
word = ''

for c in encoded:
word += c
if word in rev_mapping:
decoded.append(rev_mapping[word])
word = ''

return ''.join(decoded)

``````

# Run(s)

``````

In [5]:

text = 'alexandru'
mapping = huffman_coding(text)

print("Huffman codes")
for (e, m) in sorted(mapping.items()):
print(e, m)

print()
print("Encoded text:", encode(text))
print("Decoded text:", decode(encode(text), mapping))

``````
``````

Huffman codes
a 111
d 100
e 001
l 000
n 011
r 101
u 110
x 010

Encoded text: 111000001010111011100101110
Decoded text: alexandru

``````
``````

In [6]:

text = 'abca'
mapping = huffman_coding(text)

print("Huffman codes")
for (e, m) in sorted(mapping.items()):
print(e, m)

print()
print("Encoded text:", encode(text))
print("Decoded text:", decode(encode(text), mapping))

``````
``````

Huffman codes
a 0
b 10
c 11

Encoded text: 010110
Decoded text: abca

``````
``````

In [7]:

text = 'abca'
mapping = huffman_coding(text)

print("Huffman codes")
for (e, m) in sorted(mapping.items()):
print(e, m)

print()
print("Encoded text:", encode(text))
print("Decoded text:", decode(encode(text), mapping))

``````
``````

Huffman codes
a 0
b 10
c 11

Encoded text: 010110
Decoded text: abca

``````
``````

In [8]:

text = 'abcaabccabda'
mapping = huffman_coding(text)

print("Huffman codes")
for (e, m) in sorted(mapping.items()):
print(e, m)

print()
print("Encoded text:", encode(text))
print("Decoded text:", decode(encode(text), mapping))

``````
``````

Huffman codes
a 0
b 111
c 10
d 110

Encoded text: 01111000111101001111100
Decoded text: abcaabccabda

``````