In [1]:
import dahuffman
Basic usage example, where the code table is built based on given symbol frequencies:
In [2]:
codec = dahuffman.HuffmanCodec.from_frequencies({'e': 100, 'n':20, 'x':1, 'i': 40, 'q':3})
In [3]:
encoded = codec.encode('exeneeeexniqneieini')
print(encoded)
print(encoded.hex())
print(len(encoded))
In [4]:
codec.decode(encoded)
Out[4]:
In [5]:
codec.print_code_table()
You can also "train" the codec by providing it data directly:
In [6]:
codec = dahuffman.HuffmanCodec.from_data('hello world how are you doing today foo bar lorem ipsum')
In [7]:
len(codec.encode('do lo er ad od'))
Out[7]:
Using dahuffman with sequences of symbols, e.g. country codes:
In [8]:
countries = ['FR', 'UK', 'BE', 'IT', 'FR', 'IT', 'GR', 'FR', 'NL', 'BE', 'DE']
codec = dahuffman.HuffmanCodec.from_data(countries)
In [9]:
encoded = codec.encode(['FR', 'IT', 'BE', 'FR', 'UK'])
len(encoded), encoded.hex()
Out[9]:
In [10]:
codec.decode(encoded)
Out[10]:
In [11]:
codecs = {
'shakespeare': dahuffman.load_shakespeare(),
'json': dahuffman.load_json(),
'xml': dahuffman.load_xml()
}
def try_codecs(data):
print("{n:12s} {s:5d} bytes".format(n="original", s=len(data)))
for name, codec in codecs.items():
try:
encoded = codec.encode(data)
except KeyError:
continue
print("{n:12s} {s:5d} bytes ({p:.1f}%)".format(n=name, s=len(encoded), p=100.0*len(encoded)/len(data)))
In [12]:
try_codecs("""To be, or not to be; that is the question;
Whether 'tis nobler in the mind to suffer
The slings and arrows of outrageous fortune,
Or to take arms against a sea of troubles,
And by opposing, end them. To die, to sleep""")
In [13]:
try_codecs('''{
"firstName": "John",
"lastName": "Smith",
"isAlive": true,
"age": 27,
"children": [],
"spouse": null
}''')
In [14]:
try_codecs('''<?xml version="1.0"?>
<catalog>
<book id="bk101">
<author>Gambardella, Matthew</author>
<title>XML Developer's Guide</title>
<price>44.95</price>
</book>
<book id="bk102">
<author>Ralls, Kim</author>
<title>Midnight Rain</title>
<price>5.95</price>
</book>
</catalog>''')
In [ ]:
In [ ]:
In [ ]:
In [ ]: