In [103]:
d = {0: '\x00', 1: 'in', 2: 'er', 3: 'an', 4: 'en', 5: 'te', 6: 'ti', 7: 're', 8: 'on', 9: 'es', 10: 'st', 11: 'at', 12: 'ra', 13: 'ri', 14: 'ar', 15: 'li', 16: 'le', 17: 'al', 18: 'is', 19: 'nt', 20: 'co', 21: 'ne', 22: 'ng', 23: 'de', 24: 'ch', 25: 'or', 26: 'it', 27: 'll', 28: 'se', 29: 'ic', 30: 'la', 31: 'ta', 32: ' ', 33: '!', 34: '"', 35: '#', 36: '$', 37: '%', 38: '&', 39: "'", 40: '(', 41: ')', 42: '*', 43: '+', 44: ',', 45: '-', 46: '.', 47: '/', 48: '0', 49: '1', 50: '2', 51: '3', 52: '4', 53: '5', 54: '6', 55: '7', 56: '8', 57: '9', 58: ':', 59: ';', 60: '<', 61: '=', 62: '>', 63: '?', 64: '@', 65: 'A', 66: 'B', 67: 'C', 68: 'D', 69: 'E', 70: 'F', 71: 'G', 72: 'H', 73: 'I', 74: 'J', 75: 'K', 76: 'L', 77: 'M', 78: 'N', 79: 'O', 80: 'P', 81: 'Q', 82: 'R', 83: 'S', 84: 'T', 85: 'U', 86: 'V', 87: 'W', 88: 'X', 89: 'Y', 90: 'Z', 91: '[', 92: '\\', 93: ']', 94: '^', 95: '_', 96: '`', 97: 'a', 98: 'b', 99: 'c', 100: 'd', 101: 'e', 102: 'f', 103: 'g', 104: 'h', 105: 'i', 106: 'j', 107: 'k', 108: 'l', 109: 'm', 110: 'n', 111: 'o', 112: 'p', 113: 'q', 114: 'r', 115: 's', 116: 't', 117: 'u', 118: 'v', 119: 'w', 120: 'x', 121: 'y', 122: 'z', 123: '{', 124: '|', 125: '}', 126: '~', 127: '\x7f', 128: '\x80', 129: '\x81', 130: '\x82', 131: '\x83', 132: '\x84', 133: '\x85', 134: '\x86', 135: '\x87', 136: '\x88', 137: '\x89', 138: '\x8a', 139: '\x8b', 140: '\x8c', 141: '\x8d', 142: '\x8e', 143: '\x8f', 144: '\x90', 145: '\x91', 146: '\x92', 147: '\x93', 148: '\x94', 149: '\x95', 150: '\x96', 151: '\x97', 152: '\x98', 153: '\x99', 154: '\x9a', 155: '\x9b', 156: '\x9c', 157: '\x9d', 158: '\x9e', 159: '\x9f', 160: '\xa0', 161: '¡', 162: '¢', 163: '£', 164: '¤', 165: '¥', 166: '¦', 167: '§', 168: '¨', 169: '©', 170: 'ª', 171: '«', 172: '¬', 173: '\xad', 174: '®', 175: '¯', 176: '°', 177: '±', 178: '²', 179: '³', 180: '´', 181: 'µ', 182: '¶', 183: '·', 184: '¸', 185: '¹', 186: 'º', 187: '»', 188: '¼', 189: '½', 190: '¾', 191: '¿', 192: 'À', 193: 'Á', 194: 'Â', 195: 'Ã', 196: 'Ä', 197: 'Å', 198: 'Æ', 199: 'Ç', 200: 'È', 201: 'É', 202: 'Ê', 203: 'Ë', 204: 'Ì', 205: 'Í', 206: 'Î', 207: 'Ï', 208: 'Ð', 209: 'Ñ', 210: 'Ò', 211: 'Ó', 212: 'Ô', 213: 'Õ', 214: 'Ö', 215: '×', 216: 'Ø', 217: 'Ù', 218: 'Ú', 219: 'Û', 220: 'Ü', 221: 'Ý', 222: 'Þ', 223: 'ß', 224: 'à', 225: 'á', 226: 'â', 227: 'ã', 228: 'ä', 229: 'å', 230: 'æ', 231: 'ç', 232: 'è', 233: 'é', 234: 'ê', 235: 'ë', 236: 'ì', 237: 'í', 238: 'î', 239: 'ï', 240: 'ð', 241: 'ñ', 242: 'ò', 243: 'ó', 244: 'ô', 245: 'õ', 246: 'ö', 247: '÷', 248: 'ø', 249: 'ù', 250: 'ú', 251: 'û', 252: 'ü', 253: 'ý', 254: 'þ', 255: 'ÿ'}

In [104]:
instr = "hello and the world"

In [105]:
def compress(instr, c):
    res = ""
    i = 0
    while i < len(instr):
        s = instr[i]
        step = 1
        for j in range(1, 33):
            sym = d[j]
            if instr[i:i+len(sym)] == sym:
                s = chr(j)
                step = len(sym)
        res += s
        i += step
    return res

In [106]:
def decompress(cstr, d):
    res = ""
    i = 0
    while i < len(cstr):
        res += d[ord(cstr[i])]
        i += 1
    return res

In [107]:
cstr = compress(instr, d)
print(instr)
print(cstr)
print(decompress(cstr, d))
print(["{:>3}".format(ord(x)) for x in instr])
print(["{:>3}".format(ord(x)) for x in cstr])
print("instr: {}".format(len(instr)))
print("cstr : {}".format(len(cstr)))
print("ratio: {}".format(len(cstr) / len(instr)))


hello and the world
heo d the wld
hello and the world
['104', '101', '108', '108', '111', ' 32', ' 97', '110', '100', ' 32', '116', '104', '101', ' 32', '119', '111', '114', '108', '100']
['104', '101', ' 27', '111', ' 32', '  3', '100', ' 32', '116', '104', '101', ' 32', '119', ' 25', '108', '100']
instr: 19
cstr : 16
ratio: 0.8421052631578947