In [ ]:
%%HTML
<style>
.container { width: 100% }
</style>
The class ListMap from the notebook ListMap.ipynb implements a map as a linked list.
In [ ]:
%run ListMap.ipynb
In [ ]:
ord('B')
Given a string $w$ and the size $n$ of the hash table, the function $\texttt{hash_code}(w, n)$ calculates the hash code of $w$. For a string $w = c_0c_1\cdots c_{m-1}$ of length $m$, this function is defined as follows: $$ \texttt{hash_code}(w, n) = \left(\sum\limits_{i=0}^{m-1} \texttt{ord}(c_i) \cdot 128^i\right) \;\texttt{%}\; n $$ In order to prevent overflows when computing the numbers $128^i$ we can define the partial sum $s_k$ for $k=0,1,\cdots,m-1$ by induction:
Then we have $$ s_{m-1} = \left(\sum\limits_{i=0}^{m-1} \texttt{ord}(c_i) \cdot 128^i\right) \;\texttt{%}\; n. $$
In [ ]:
def hash_code(w, n):
m = len(w)
s = 0
for k in range(m-1, -1, -1):
s = (s * 128 + ord(w[k])) % n
return s
Let us test this function.
In [ ]:
hash_code('George W. Bush', 6761)
In [ ]:
class HashTable:
def __init__(self, n, code=hash_code):
self.mSize = n
self.mEntries = 0 # number of entries
self.mArray = [ ListMap() for i in range(self.mSize) ] # array of empty ListMap objects
self.mAlpha = 2 # load factor
self.mCode = code
Hash tables work best if their size is a prime number. Therefore, the variable Primes stores a list of prime numbers.
These numbers are organized so that Primes[i+1] is roughly twice as big as Primes[i].
In [ ]:
HashTable.Primes = [ 3, 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093,
8191, 16381, 32749, 65521, 131071, 262139, 524287,
1048573, 2097143, 4194301, 8388593, 16777213,
33554393, 67108859, 134217689, 268435399,
536870909, 1073741789, 2147483647
]
In [ ]:
def find(self, key):
index = self.mCode(key, self.mSize)
aList = self.mArray[index]
for k, v in aList:
if k == key:
return v
HashTable.find = find
del find
In [ ]:
def insert(self, key, value):
if self.mEntries >= self.mSize * self.mAlpha:
self._rehash()
index = self.mCode(key, self.mSize)
aList = self.mArray[index]
self.mEntries += aList.insert(key, value)
HashTable.insert = insert
del insert
In [ ]:
def _rehash(self):
for p in HashTable.Primes:
if p * self.mAlpha > self.mEntries:
prime = p
break
newTable = HashTable(prime, self.mCode)
for aList in self.mArray:
for k, v in aList:
newTable.insert(k, v)
self.mSize = prime
self.mArray = newTable.mArray
HashTable._rehash = _rehash
del _rehash
In [ ]:
def delete(self, key):
if 2 * self.mEntries <= self.mSize * self.mAlpha:
self._rehash()
index = self.mCode(key, self.mSize)
aList = self.mArray[index]
self.mEntries -= aList.delete(key)
HashTable.delete = delete
del delete
In [ ]:
def allKeys(self):
Result = []
for L in self.mArray:
for key, _ in L:
Result.append(key)
return Result
HashTable.allKeys = allKeys
del allKeys
In [ ]:
def __repr__(self):
result = ''
for i, aList in enumerate(self.mArray):
result += f'{i}: {aList}\n'
return result
HashTable.__repr__ = __repr__
del __repr__
In [ ]:
t = HashTable(3)
t.insert('Adrian', 8)
t
In [ ]:
t.insert('Benjamin', 24)
t
In [ ]:
t.insert('Bereket', 1)
t
In [ ]:
t.insert('Christian', 13)
t
In [ ]:
t.insert('Christian', 14)
t
In [ ]:
t.find('Adrian'), t.find('Christian'), t.find('Benjamin')
In [ ]:
t.insert('David', 22)
t
In [ ]:
t.insert('Ephraim', 19)
t
In [ ]:
t.insert('Erwin', 26)
t
In [ ]:
t.insert('Felix', 4)
t
In [ ]:
t.insert('Florian', 9)
t
In [ ]:
t.insert('Giorgio', 20)
t
In [ ]:
t.insert('Jan', 7)
t
In [ ]:
t.insert('Janis', 16)
t
In [ ]:
t.insert('Josia', 18)
t
In [ ]:
t.insert('Kai', 3)
t
In [ ]:
t.insert('Lars', 21)
t
In [ ]:
t.insert('Lucas', 0)
t
In [ ]:
t.insert('Marcel', 5)
t
In [ ]:
t.insert('Marius', 6)
t
In [ ]:
t.insert('Markus', 17)
t
In [ ]:
t.insert('Matthias', 10)
t
In [ ]:
t.insert('Nick', 11)
t
In [ ]:
t.insert('Patrick', 23)
t
In [ ]:
t.insert('Petra', 27)
t
In [ ]:
t.insert('Rene', 15)
t
In [ ]:
t.insert('Sebastian', 25)
t
In [ ]:
t.insert('Stefan', 2)
t
In [ ]:
t.delete('Adrian')
t
In [ ]:
t.delete('Adrian')
t.delete('Benjamin')
t.delete('Bereket')
t.delete('Christian')
t.delete('Christian')
t.delete('David')
t.delete('Ephraim')
t.delete('Erwin')
t.delete('Felix')
t.delete('Florian')
t.delete('Giorgio')
t.delete('Jan')
t.delete('Janis')
t.delete('Josia')
t.delete('Kai')
t.delete('Lars')
t.delete('Lucas')
t.delete('Marcel')
t.delete('Marius')
t.delete('Markus')
t.delete('Matthias')
t.delete('Nick')
t.delete('Patrick')
t.delete('Petra')
t.delete('Rene')
t.delete('Sebastian')
t.delete('Stefan')
t
In [ ]:
def primes(n = 100):
S = HashTable(20, code=lambda x, n: x % n)
for i in range(2, n + 1):
S.insert(i, i)
for i in range(2, n // 2 + 1):
for j in range(i, n // i + 1):
S.delete(i * j)
return S.allKeys()
In [ ]:
L = primes()
print(L)
print(sorted(L))
In [ ]: