encoder speed tests



In [ ]:
from io import StringIO
import numpy as np


class Encoder:
    BASE66_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_.~"
    BASE11_ALPHABET = "0123456789a"
    BASE = len(BASE66_ALPHABET)

    def __init__(self,
                 alphabet="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_.~"):
        self.alphabet = alphabet
        self.base = len(alphabet)
        self.source_length_matrix = None

    def encode(self, msg):

        msg = self.alphabet[self.base - 1] + msg

        i = 0

        for c in msg:
            i = i * self.base + self.alphabet.index(c)

        if i == 0:
            return Encoder.BASE66_ALPHABET[0]

        r = StringIO()
        while i:
            i, t = divmod(i, Encoder.BASE)
            r.write(Encoder.BASE66_ALPHABET[t])
        return r.getvalue()[::-1]

    def decode(self, alpha) -> str:
        i = 0

        for c in alpha:
            i = i * Encoder.BASE + Encoder.BASE66_ALPHABET.index(c)

        if i == 0:
            return Encoder.BASE66_ALPHABET[0]

        r = StringIO()
        while i:
            i, t = divmod(i, self.base)
            r.write(self.alphabet[t])
        return r.getvalue()[::-1][1:]

    def encode_games_array(self, arr):
        games_str = str(arr)
        games_str = games_str.lstrip('[')
        games_str = games_str.rstrip(']')
        games_str = games_str.replace(', ', 'a')
        return self.encode(games_str)

    def translate(
        self,
        phrase,
        source_alphabet, 
        target_alphabet) -> str:
        
#         def build_source_length_matrix():
#             arr = [len(source_alphabet) ** x for x in range(1000)]
#             source_length_matrix = np.asarray(arr)
#             self.source_length_matrix = source_length_matrix
        
#         if self.source_length_matrix is None:
#             build_source_length_matrix()
#         elif self.source_length_matrix[len(self.source_length_matrix) - 2] != len(source_alphabet):
#             build_source_length_matrix()
            
#         if len(phrase) < 1000:
#             c_arr = [source_alphabet.index(c) for c in phrase][::-1]
#             i = np.sum(self.source_length_matrix[:len(c_arr)] * c_arr)
        
        i = 0        
        source_base = len(source_alphabet)
        target_base = len(target_alphabet)
    
        for c in phrase:
            i *= source_base
            i += source_alphabet.find(c)
        
        r = StringIO()
        while i:
            i, t = divmod(i, target_base)
            r.write(target_alphabet[t])
        return r.getvalue()[::-1]
    
    def decode_user_games_alpha(self, alpha):
        string = self.decode(alpha)
        arr = string.split("a")
        int_arr = [int(x) for x in arr]
        return int_arr

In [ ]:
import time

encoder = Encoder(Encoder.BASE11_ALPHABET)
game_string = "4M3k24J45Fq-HXuOP6O.FxnYuq9h0~uQoDCx9u6hehwvAzSEtIoOZ35gUW.2G1sjpyvJAxIYIZPj2fRBoGbNuANIbGTFNTyVw~dAhKZ9KyncpZV1E8dfXC1eu7V4FgPlP7kQC0yzCVPDLL7goWWtkRyYZAB08PQIIjsnk0an.wr4KKFJtSsvSPU42P8bISjsnWz~qmbyW1K5HUBTNRNvWelT3vOdUWjF3X4GYNPFqxVex--V8VDKN7Vf0QYt0S1O1NGD9hJJPvq~FI-NyEzZw1zwT9rKV~-00Akw52gTesL491z9FBrCZUxgyTFkUe2V0xI3_ADSXTChvVQTUvTLEMtqQ.0gqN7AO-C84UFDmnv9VKXjNkG95HS5Kc3GsjBadwR8PKe-6KGGy8Wokb3MscAsvg7msQju_HZQzt7GJBW_G7r1cGdUE42-ubyqIE38sSeAFv9Ao8qACpvPXwfpL2ThgDPqQJeUHvxJKgCmuVcQQE8bWMnV_F8TPdfdeHdtqA7Fn4zY7bPF2.5~gCq55ljp8kaf4kqh0cnx9a327JSsZR-HJdamDtCdspZ1dCid2XlOj3GSuVeAs7_xykZbglr1jvERa6HXK6ix_wrIf-AF5KY5M6FFXgaAd006eYj400aJfJyMt_r90hCkR6gCIFzVFmsy5EmMonijOPB0"

t = time.time()
for _ in range(1000):
    encoder.decode_user_games_alpha(game_string)
print('\n base method: ', time.time() - t)

old = encoder.decode(game_string)

new = encoder.translate(
    game_string, 
    source_alphabet=Encoder.BASE66_ALPHABET, 
    target_alphabet=Encoder.BASE11_ALPHABET)[1:]

print('\n', old)
print('\n', new)

print('methods print same output:', old == new)

t = time.time()
for _ in range(1000):
    encoder.translate(
        game_string, 
        source_alphabet=Encoder.BASE66_ALPHABET, 
        target_alphabet=Encoder.BASE11_ALPHABET)
print('\n supposedly faster method:', time.time() - t)

test = encoder.translate(
    "6",
    source_alphabet="0123456789",
    target_alphabet="01")

print('\n', test)

In [2]:
import warnings
from io import StringIO


class Encoder:
    BASE66_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_.~"
    BASE11_ALPHABET = "0123456789a"
    BASE = len(BASE66_ALPHABET)

    def __init__(self,
                 alphabet="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_.~"):
        self.alphabet = alphabet
        self.base = len(alphabet)


    def encode(self, msg):
        warnings.warn("this func is deprecated, use translate instead")

        msg = self.alphabet[self.base - 1] + msg

        i = 0

        for c in msg:
            i = i * self.base + self.alphabet.index(c)

        if i == 0:
            return Encoder.BASE66_ALPHABET[0]

        r = StringIO()
        while i:
            i, t = divmod(i, Encoder.BASE)
            r.write(Encoder.BASE66_ALPHABET[t])
        return r.getvalue()[::-1]

    def decode(self, alpha) -> str:
        warnings.warn("this func is deprecated, use translate instead")

        i = 0

        for c in alpha:
            i = i * Encoder.BASE + Encoder.BASE66_ALPHABET.index(c)

        if i == 0:
            return Encoder.BASE66_ALPHABET[0]

        r = StringIO()
        while i:
            i, t = divmod(i, self.base)
            r.write(self.alphabet[t])
        return r.getvalue()[::-1][1:]

    @staticmethod
    def translate(phrase,
                  source_alphabet,
                  target_alphabet) -> str:

        i = 0
        source_base = len(source_alphabet)
        target_base = len(target_alphabet)

        for c in phrase:
            i *= source_base
            i += source_alphabet.find(c)

        r = StringIO()
        while i:
            i, t = divmod(i, target_base)
            r.write(target_alphabet[t])
        return r.getvalue()[::-1]

    def encode_games_array(self, arr):
        games_str = str(arr)
        games_str = games_str.lstrip('[')
        games_str = games_str.rstrip(']')
        games_str = games_str.replace(', ', 'a')
        return self.encode(games_str)

    def decode_user_games_alpha(self, alpha):
        string = self.translate(phrase=alpha,
                                source_alphabet=self.BASE66_ALPHABET,
                                target_alphabet=self.BASE11_ALPHABET)[1:]
        arr = string.split("a")
        int_arr = [int(x) for x in arr]
        return int_arr

encoder = Encoder()
games = [100, 200, 300, 400, 11111, 22222, 12840]
alpha = encoder.encode_games_array(games)

print(alpha)

games_returned = encoder.decode_user_games_alpha(alpha)

assert alpha
assert len(games) == len(games_returned)
assert games == games_returned


~100a200a300a400a11111a22222a12840
C:\Users\Admin\Anaconda3\lib\site-packages\ipykernel_launcher.py:17: UserWarning: this func is deprecated, use translate instead
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-2-6dd18d039934> in <module>()
     90 print(alpha)
     91 
---> 92 games_returned = encoder.decode_user_games_alpha(alpha)
     93 
     94 assert alpha

<ipython-input-2-6dd18d039934> in decode_user_games_alpha(self, alpha)
     81                                 target_alphabet=self.BASE11_ALPHABET)[1:]
     82         arr = string.split("a")
---> 83         int_arr = [int(x) for x in arr]
     84         return int_arr
     85 

<ipython-input-2-6dd18d039934> in <listcomp>(.0)
     81                                 target_alphabet=self.BASE11_ALPHABET)[1:]
     82         arr = string.split("a")
---> 83         int_arr = [int(x) for x in arr]
     84         return int_arr
     85 

ValueError: invalid literal for int() with base 10: ''

In [ ]: