In [55]:
import math
import string

def sign_entropy(sign_probability):
    return -math.log(sign_probability, 2)

def sign_mean_entropy(sign_probability):
    return sign_probability * sign_entropy(sign_probability)

def encode_lzw(input_):
    dict_ = {}
    dict_value = 256
    prefix = input_[0]
    
    # Init dictionary
    for char in range(ord('A'), ord('Z')+1):
        dict_[chr(char)] = char
    dict_['_'] = ord('_')
        
    def add_to_dict(char, dict_value):
        dict_[char] = dict_value
        return dict_value + 1
    
    for i in range(1, len(input_)):
        #print 'dict: {0}'.format(dict_)
        current_sign = input_[i]
        #print 'current sign: {0}'.format(current_sign)
        lookahead = ''.join([prefix, current_sign])
        #print 'lookahead: {0}'.format(lookahead)
        
        if lookahead in dict_:
            prefix = lookahead
            #print '{0} in dict. Set prefix to {1}'.format(lookahead, prefix)
        else:
            #print '{0} not in dict_, added it'.format(lookahead)
            print 'output: {0}; ascii: {1}'.format(dict_[prefix], prefix)
            add_to_dict(lookahead, dict_value)
            prefix = current_sign
            #print 'Set prefix to {0}'.format(prefix)
        
        #print ''
    
    if len(prefix) > 0:
        print 'output: {0}; ascii: {1}'.format(ord(prefix), prefix)

In [56]:
encode_lzw('BLAH_BLAH_BLAH_BLAH_BLAH_BLAH_BLAH_BLAH')


output: 66; ascii: B
output: 76; ascii: L
output: 65; ascii: A
output: 72; ascii: H
output: 95; ascii: _
output: 256; ascii: BL
output: 256; ascii: AH
output: 256; ascii: _B
output: 256; ascii: LA
output: 256; ascii: H_
output: 256; ascii: BLA
output: 256; ascii: H_B
output: 256; ascii: LAH
output: 256; ascii: _BL
output: 256; ascii: AH_
output: 256; ascii: BLAH
output: 256; ascii: _BLA
output: 72; ascii: H

In [12]:
import string
''.join(['foo', 'bar'])


Out[12]:
'foobar'

In [ ]: