In [1]:
import dahuffman

Usage

Basic usage example, where the code table is built based on given symbol frequencies:


In [2]:
codec = dahuffman.HuffmanCodec.from_frequencies({'e': 100, 'n':20, 'x':1, 'i': 40, 'q':3})

In [3]:
encoded = codec.encode('exeneeeexniqneieini')
print(encoded)
print(encoded.hex())
print(len(encoded))


b'\x86|%\x13i@'
867c25136940
6

In [4]:
codec.decode(encoded)


Out[4]:
'exeneeeexniqneieini'

In [5]:
codec.print_code_table()


Bits Code  Value Symbol
   5 00000     0 _EOF
   5 00001     1 'x'
   4 0001      1 'q'
   3 001       1 'n'
   2 01        1 'i'
   1 1         1 'e'

You can also "train" the codec by providing it data directly:


In [6]:
codec = dahuffman.HuffmanCodec.from_data('hello world how are you doing today foo bar lorem ipsum')

In [7]:
len(codec.encode('do lo er ad od'))


Out[7]:
6

Non-string sequences

Using dahuffman with sequences of symbols, e.g. country codes:


In [8]:
countries = ['FR', 'UK', 'BE', 'IT', 'FR', 'IT', 'GR', 'FR', 'NL', 'BE', 'DE']
codec = dahuffman.HuffmanCodec.from_data(countries)

In [9]:
encoded = codec.encode(['FR', 'IT', 'BE', 'FR', 'UK'])
len(encoded), encoded.hex()


Out[9]:
(2, '4cca')

In [10]:
codec.decode(encoded)


Out[10]:
['FR', 'IT', 'BE', 'FR', 'UK']

Pre-trained codecs


In [11]:
codecs = {
    'shakespeare': dahuffman.load_shakespeare(),
    'json': dahuffman.load_json(),
    'xml': dahuffman.load_xml()
}

def try_codecs(data):
    print("{n:12s} {s:5d} bytes".format(n="original", s=len(data)))
    for name, codec in codecs.items():
        try:
            encoded = codec.encode(data)
        except KeyError:
            continue
        print("{n:12s} {s:5d} bytes ({p:.1f}%)".format(n=name, s=len(encoded), p=100.0*len(encoded)/len(data)))

In [12]:
try_codecs("""To be, or not to be; that is the question;
    Whether 'tis nobler in the mind to suffer
    The slings and arrows of outrageous fortune,
    Or to take arms against a sea of troubles,
    And by opposing, end them. To die, to sleep""")


original       232 bytes
shakespeare    128 bytes (55.2%)
json           155 bytes (66.8%)
xml            156 bytes (67.2%)

In [13]:
try_codecs('''{
  "firstName": "John",
  "lastName": "Smith",
  "isAlive": true,
  "age": 27,
  "children": [],
  "spouse": null
}''')


original       116 bytes
json            77 bytes (66.4%)
xml             90 bytes (77.6%)

In [14]:
try_codecs('''<?xml version="1.0"?>
<catalog>
   <book id="bk101">
      <author>Gambardella, Matthew</author>
      <title>XML Developer's Guide</title>
      <price>44.95</price>
   </book>
   <book id="bk102">
      <author>Ralls, Kim</author>
      <title>Midnight Rain</title>
      <price>5.95</price>
   </book>
</catalog>''')


original       315 bytes
json           270 bytes (85.7%)
xml            222 bytes (70.5%)

In [ ]:


In [ ]:


In [ ]:


In [ ]: