In [22]:
import math
import string
def sign_entropy(sign_probability):
return -math.log(sign_probability, 2)
def sign_mean_entropy(sign_probability):
return sign_probability * sign_entropy(sign_probability)
class LZW:
def __init__(self, input_):
self.orig_input = input_
self.output = []
self.dict_ = {}
self.dict_value = 256
self.reset()
def reset(self):
self.prefix = None
self.code = None
self.code_old = None
def init_dict(self, ord_as_index=False):
self.dict_ = {}
self.dict_value = 256
for char in self.orig_input:
if ord_as_index:
self.dict_[ord(char)] = char
else:
self.dict_[char] = ord(char)
def add_to_dict(self, char, value_as_index=False):
if value_as_index:
self.dict_[self.dict_value] = char
else:
self.dict_[char] = self.dict_value
self.dict_value += 1
def encode(self):
self.reset()
self.output = []
self.prefix = self.orig_input[0]
self.init_dict()
for i in range(1, len(self.orig_input)):
current_sign = self.orig_input[i]
lookahead = ''.join([self.prefix, current_sign])
if lookahead in self.dict_:
self.prefix = lookahead
else:
print 'output: {0}; ascii: {1}'.format(self.dict_[self.prefix], self.prefix)
self.output.append(self.dict_[self.prefix])
self.add_to_dict(lookahead)
self.prefix = current_sign
if len(self.prefix) > 0:
print 'output: {0}; ascii: {1}'.format(ord(self.prefix), self.prefix)
self.output.append(ord(self.prefix))
return self.output
def decode(self, init_dict=True):
self.reset()
self.code = self.output[0]
print 'In: {0}; Out: {1}'.format(self.code, chr(self.code))
if init_dict:
self.init_dict(ord_as_index=True)
for i in self.output[1::]:
self.code_old = self.code
self.code = i
if self.code in self.dict_:
print 'In: {0}; Out: {1}'.format(self.code, self.dict_[self.code])
self.prefix = self.code_old
sign = self.dict_[self.code][:1]
self.add_to_dict(''.join([self.dict_[self.prefix], sign]), value_as_index=True)
else:
self.prefix = self.code_old
sign = self.dict_[self.code_old][:1]
print 'In: {0}; Out: {1}'.format(self.prefix, self.dict_[self.prefix])
def print_statistics(self):
print 'Input "{0}" has {1} Bytes'.format(self.orig_input, len(self.orig_input))
print 'Ouput has {0} Bytes.'.format(len(self.output) * 2)
print 'Reduction: {0} %'.format(100 - (100 / len(self.orig_input) * (len(self.output) * 2)))
In [23]:
input_string = 'BLAH_BLAH_BLAH_BLAH_BLAH_BLAH_BLAH_BLAH'
lzw = LZW(input_string)
In [24]:
output_stream = lzw.encode()
In [25]:
lzw.decode()
In [26]:
lzw.print_statistics()
In [30]:
input_ = [77, 73, 78, 85, 83, 257, 259, 256, 258, 260, 262, 83, 264, 259, 261, 265]
lzw_2 = LZW(None)
lzw_2.dict_ = {}
lzw_2.dict_[77] = 'M'
lzw_2.dict_[73] = 'I'
lzw_2.dict_[78] = 'N'
lzw_2.dict_[85] = 'U'
lzw_2.dict_[83] = 'S'
lzw_2.output = input_
lzw_2.decode(init_dict=False)
for o in lzw_2.output:
print lzw_2.dict_[o],
In [ ]: