In [ ]:
%%HTML
<style>
.container { width: 100% } 
</style>

Hash Tables (Open Hashing)

The class ListMap from the notebook ListMap.ipynb implements a map as a linked list.


In [ ]:
%run ListMap.ipynb

In [ ]:
ord('B')

Given a string $w$ and the size $n$ of the hash table, the function $\texttt{hash_code}(w, n)$ calculates the hash code of $w$. For a string $w = c_0c_1\cdots c_{m-1}$ of length $m$, this function is defined as follows: $$ \texttt{hash_code}(w, n) = \left(\sum\limits_{i=0}^{m-1} \texttt{ord}(c_i) \cdot 128^i\right) \;\texttt{%}\; n $$ In order to prevent overflows when computing the numbers $128^i$ we can define the partial sum $s_k$ for $k=0,1,\cdots,m-1$ by induction:

  • $s_{0} = \texttt{ord}(c_{m-1}) \;\texttt{%}\; n$,
  • $s_{k+1} = \bigl(s_k \cdot 128 + \texttt{ord}(c_{k}) \bigr) \;\texttt{%}\; n$.

Then we have $$ s_{m-1} = \left(\sum\limits_{i=0}^{m-1} \texttt{ord}(c_i) \cdot 128^i\right) \;\texttt{%}\; n. $$


In [ ]:
def hash_code(w, n):
    m = len(w)
    s = 0
    for k in range(m-1, -1, -1):
        s = (s * 128 + ord(w[k])) % n
    return s

Let us test this function.


In [ ]:
hash_code('George W. Bush', 6761)

In [ ]:
class HashTable:
    def __init__(self, n, code=hash_code):
        self.mSize    = n
        self.mEntries = 0                                        # number of entries
        self.mArray   = [ ListMap() for i in range(self.mSize) ] # array of empty ListMap objects
        self.mAlpha   = 2                                        # load factor
        self.mCode    = code

Hash tables work best if their size is a prime number. Therefore, the variable Primes stores a list of prime numbers.
These numbers are organized so that Primes[i+1] is roughly twice as big as Primes[i].


In [ ]:
HashTable.Primes = [ 3, 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 
                     8191, 16381, 32749, 65521, 131071, 262139, 524287, 
                     1048573, 2097143, 4194301, 8388593, 16777213, 
                     33554393, 67108859, 134217689, 268435399, 
                     536870909, 1073741789, 2147483647 
                   ]

In [ ]:
def find(self, key):
    index = self.mCode(key, self.mSize)
    aList = self.mArray[index]
    for k, v in aList:
        if k == key:
            return v

HashTable.find = find
del find

In [ ]:
def insert(self, key, value):
    if self.mEntries >= self.mSize * self.mAlpha:
        self._rehash()
    index = self.mCode(key, self.mSize)
    aList = self.mArray[index]
    self.mEntries += aList.insert(key, value)
    
HashTable.insert = insert
del insert

In [ ]:
def _rehash(self):
    for p in HashTable.Primes:
        if p * self.mAlpha > self.mEntries:
            prime = p
            break
    newTable = HashTable(prime, self.mCode)
    for aList in self.mArray:
        for k, v in aList:
            newTable.insert(k, v)
    self.mSize  = prime
    self.mArray = newTable.mArray
    
HashTable._rehash = _rehash
del _rehash

In [ ]:
def delete(self, key):
    if 2 * self.mEntries <= self.mSize * self.mAlpha:
        self._rehash()
    index = self.mCode(key, self.mSize)
    aList = self.mArray[index]
    self.mEntries -= aList.delete(key)

HashTable.delete = delete
del delete

In [ ]:
def allKeys(self):
    Result = []
    for L in self.mArray:
        for key, _ in L:
            Result.append(key)
    return Result
            
HashTable.allKeys = allKeys
del allKeys

In [ ]:
def __repr__(self):
    result = ''
    for i, aList in enumerate(self.mArray):
        result += f'{i}: {aList}\n'
    return result

HashTable.__repr__ = __repr__
del __repr__

In [ ]:
t = HashTable(3)
t.insert('Adrian', 8)
t

In [ ]:
t.insert('Benjamin', 24)
t

In [ ]:
t.insert('Bereket', 1)
t

In [ ]:
t.insert('Christian', 13)
t

In [ ]:
t.insert('Christian', 14)
t

In [ ]:
t.find('Adrian'), t.find('Christian'), t.find('Benjamin')

In [ ]:
t.insert('David', 22)
t

In [ ]:
t.insert('Ephraim', 19)
t

In [ ]:
t.insert('Erwin', 26)
t

In [ ]:
t.insert('Felix', 4)
t

In [ ]:
t.insert('Florian', 9)
t

In [ ]:
t.insert('Giorgio', 20)
t

In [ ]:
t.insert('Jan', 7)
t

In [ ]:
t.insert('Janis', 16)
t

In [ ]:
t.insert('Josia', 18)
t

In [ ]:
t.insert('Kai', 3)
t

In [ ]:
t.insert('Lars', 21)
t

In [ ]:
t.insert('Lucas', 0)
t

In [ ]:
t.insert('Marcel', 5)
t

In [ ]:
t.insert('Marius', 6)
t

In [ ]:
t.insert('Markus', 17)
t

In [ ]:
t.insert('Matthias', 10)
t

In [ ]:
t.insert('Nick', 11)
t

In [ ]:
t.insert('Patrick', 23)
t

In [ ]:
t.insert('Petra', 27)
t

In [ ]:
t.insert('Rene', 15)
t

In [ ]:
t.insert('Sebastian', 25)
t

In [ ]:
t.insert('Stefan', 2)
t

In [ ]:
t.delete('Adrian')
t

In [ ]:
t.delete('Adrian')
t.delete('Benjamin')
t.delete('Bereket')
t.delete('Christian')
t.delete('Christian')
t.delete('David')
t.delete('Ephraim')
t.delete('Erwin')
t.delete('Felix')
t.delete('Florian')
t.delete('Giorgio')
t.delete('Jan')
t.delete('Janis')
t.delete('Josia')
t.delete('Kai')
t.delete('Lars')
t.delete('Lucas')
t.delete('Marcel')
t.delete('Marius')
t.delete('Markus')
t.delete('Matthias')
t.delete('Nick')
t.delete('Patrick')
t.delete('Petra')
t.delete('Rene')
t.delete('Sebastian')
t.delete('Stefan')
t

In [ ]:
def primes(n = 100):
    S = HashTable(20, code=lambda x, n: x % n)
    for i in range(2, n + 1):
        S.insert(i, i)
    for i in range(2, n // 2 + 1):
        for j in range(i, n // i + 1):
            S.delete(i * j)
    return S.allKeys()

In [ ]:
L = primes()
print(L)
print(sorted(L))

In [ ]: