In [36]:
def compress_string_contiguous(string):
    """
    assuming any numeric character will not be present in the string.
    assuming same characters are contiguous.
    """
    compressed = []
    wordLength = len(string)
    starts = 0
    while starts < wordLength:
        c = string[starts]
        ends = string.rfind(c, starts+1)
        print "char, starts, ends ", c, starts, ends
        length = ends - starts + 1
        compressed.append(c + str(length if length > 1 else ""))
        if length > 1:
            starts = ends + 1  # seek to the next char different than c
        else:
            starts += 1
    return "".join(compressed)

In [37]:
def compress_string(string):
    if not string:
        return string
    
    compressed = []
    def insert(char, length):
        if length > 2:
            compressed.append(char + str(length))
        else:
            compressed.append(char * length)
    
    wordLength = len(string)
    previousC = ""
    length = 1
    i = 0
    while i < wordLength:
        currentC = string[i]
        if previousC != currentC:
            insert(previousC, length)
            previousC = currentC
            length = 0
            
        length += 1
        i += 1
    insert(previousC, length)
    return "".join(compressed)

In [43]:
s = "aaBCCEFFFFKKMMMMMMP taaammanlaarrrr seeeeeeeeek tooo"
print compress_string(s)


aaBCCEF4KKM6P ta3mmanlaar4 se9k to3

In [39]:
from nose.tools import assert_equal

assert_equal(compress_string(None), None)
assert_equal(compress_string(''), '')
assert_equal(compress_string('AABBCC'), 'AABBCC')

# I changed the compressed value from A3B1C2D4 to A3BCCD4,
# since it obviously looks better
assert_equal(compress_string('AAABCCDDDD'), 'A3BCCD4')
print('Success: test_compress')


Success: test_compress

In [ ]: