In [ ]:
%%HTML
<style>
.container { width:100% }
</style>
In [ ]:
import graphviz as gv
This notebook presents 2-3 trees. We define these trees inductively as follows:
In order to keep 2-3 trees balanced when inserting new keys, we use a fourth constructor of the form $\texttt{Four}(l,k_1,m_l, k_2, m_r, k_3, r)$. A term of the form $\texttt{Four}(l,k_1,m_l, k_2, m_r, k_3, r)$ is a 2-3-4 tree iff
Nodes of this form are called 4-nodes and the key $k_m$ is called the middle key. Trees containing 2-nodes, 3-node, and 4-nodes are called 2-3-4 trees.
The class TwoThreeTree
is a superclass for constructing the nodes of 2-3-4 trees. It has one static variable sNodeCount
. This variable is used to equip all nodes with a unique identifier. This identifier is used to draw the trees using graphviz
.
Every node has a uniques id mID
that is stored as a member variable. Furthermore, this class provides defaults for the functions isNil
, isTwo
, isThree
, and isFour
. These functions can be used to check the type of a node.
In [ ]:
class TwoThreeTree:
sNodeCount = 0 # only needed for graphviz
def __init__(self):
TwoThreeTree.sNodeCount += 1
self.mID = TwoThreeTree.sNodeCount # only for graphviz
def getID(self): # only needed for graphviz
return self.mID
def isNil(self):
return False
def isTwo(self):
return False
def isThree(self):
return False
def isFour(self):
return False
def isTree(self): # only needed for display of equations
return False
def insert(self, k):
return self._ins(k)._restore()._grow()
def _grow(self):
return self
The method $t.\texttt{toDot}()$ takes a 2-3-4 tree $t$ and returns a graph that depicts the tree $t$.
In [ ]:
def toDot(self):
dot = gv.Digraph(node_attr={'shape': 'record', 'style': 'rounded'})
nodeDict = {}
self._collectIDs(nodeDict)
for n, t in nodeDict.items():
if t.isNil():
dot.node(str(n), label='', shape='point')
elif t.isTwo():
dot.node(str(n), label=str(t.mKey))
elif t.isThree():
dot.node(str(n), label=str(t.mKeyL) + '|' + str(t.mKeyR))
elif t.isFour():
dot.node(str(n), label=str(t.mKeyL) + '|' + str(t.mKeyM) + '|' + str(t.mKeyR))
elif t.isTree():
dot.node(str(n), label=str(t.mName), shape='triangle')
else:
assert False, f'Unknown node {t}'
for n, t in nodeDict.items():
if t.isTwo():
dot.edge(str(n), str(t.mLeft .getID()))
dot.edge(str(n), str(t.mRight.getID()))
if t.isThree():
dot.edge(str(n), str(t.mLeft .getID()))
dot.edge(str(n), str(t.mMiddle.getID()))
dot.edge(str(n), str(t.mRight .getID()))
if t.isFour():
dot.edge(str(n), str(t.mLeft .getID()))
dot.edge(str(n), str(t.mMiddleL.getID()))
dot.edge(str(n), str(t.mMiddleR.getID()))
dot.edge(str(n), str(t.mRight .getID()))
return dot
TwoThreeTree.toDot = toDot
The method $t.\texttt{collectIDs}(d)$ takes a tree $t$ and a dictionary $d$ and updates the dictionary so that the following holds: $$ d[\texttt{id}] = n \quad \mbox{for every node $n$ in $t$.} $$ Here, $\texttt{id}$ is the unique identifier of the node $n$, i.e. $d$ associates the identifiers with the corresponding nodes.
In [ ]:
def _collectIDs(self, nodeDict):
nodeDict[self.getID()] = self
if self.isTwo():
self.mLeft ._collectIDs(nodeDict)
self.mRight._collectIDs(nodeDict)
elif self.isThree():
self.mLeft ._collectIDs(nodeDict)
self.mMiddle._collectIDs(nodeDict)
self.mRight ._collectIDs(nodeDict)
elif self.isFour():
self.mLeft ._collectIDs(nodeDict)
self.mMiddleL._collectIDs(nodeDict)
self.mMiddleR._collectIDs(nodeDict)
self.mRight ._collectIDs(nodeDict)
TwoThreeTree._collectIDs = _collectIDs
The function $\texttt{toDotList}(\texttt{NodeList})$ takes a list of trees and displays them one by one.
In [ ]:
def toDotList(NodeList):
dot = gv.Digraph(node_attr={'shape': 'record', 'style': 'rounded'})
nodeDict = {}
for node in NodeList:
node._collectIDs(nodeDict)
for n, t in nodeDict.items():
if t.isNil():
dot.node(str(n), label='', shape='point')
elif t.isTwo():
dot.node(str(n), label=str(t.mKey))
elif t.isThree():
dot.node(str(n), label=str(t.mKeyL) + '|' + str(t.mKeyR))
elif t.isFour():
dot.node(str(n), label=str(t.mKeyL) + '|' + str(t.mKeyM) + '|' + str(t.mKeyR))
elif t.isTree():
dot.node(str(n), label=str(t.mName), shape='triangle', style='solid')
elif t.isMethod():
dot.node(str(n), label=str(t.mLabel), shape='rectangle', style='dotted')
else:
assert False, f'Unknown node {t}'
for n, t in nodeDict.items():
if t.isTwo():
dot.edge(str(n), str(t.mLeft .getID()))
dot.edge(str(n), str(t.mRight.getID()))
if t.isThree():
dot.edge(str(n), str(t.mLeft .getID()))
dot.edge(str(n), str(t.mMiddle.getID()))
dot.edge(str(n), str(t.mRight .getID()))
if t.isFour():
dot.edge(str(n), str(t.mLeft .getID()))
dot.edge(str(n), str(t.mMiddleL.getID()))
dot.edge(str(n), str(t.mMiddleR.getID()))
dot.edge(str(n), str(t.mRight .getID()))
return dot
The class Tree
is not used in the implementation of 2-3 trees. It is only used for displaying abstract subtrees in equations.
In [ ]:
class Tree(TwoThreeTree):
def __init__(self, name):
TwoThreeTree.__init__(self)
self.mName = name
def __str__(self):
return self.mName
def isTree(self):
return True
The class Method
is not used in the implementation of 2-3 trees. It is only used for displaying the name of methods in equations.
In [ ]:
class Method(TwoThreeTree):
def __init__(self, label):
TwoThreeTree.__init__(self)
self.mLabel = label
def __str__(self):
return self.mLabel
def isMethod(self):
return True
make_string
is an auxiliary function that makes implementation of __str__
methods simpler.
In [ ]:
def make_string(obj, type_name, attributes):
# map the function __str__ to all attributes and join them with a comma
return f"{type_name}({', '.join(map(str, [getattr(obj, at) for at in attributes]))})"
The class Nil
represents an empty tree.
In [ ]:
class Nil(TwoThreeTree):
def __init__(self):
TwoThreeTree.__init__(self)
def __str__(self):
return make_string(self, 'Nil', [])
def isNil(self):
return True
The class Two
represents a 2-node.
In [ ]:
class Two(TwoThreeTree):
def __init__(self, left, key, right):
TwoThreeTree.__init__(self)
self.mLeft = left
self.mKey = key
self.mRight = right
def isTwo(self):
return True
def __str__(self):
return make_string(self, 'Two', ['mLeft', 'mKey', 'mRight'])
The class Three
represents a 3-node.
In [ ]:
class Three(TwoThreeTree):
def __init__(self, left, keyL, middle, keyR, right):
TwoThreeTree.__init__(self)
self.mLeft = left
self.mKeyL = keyL
self.mMiddle = middle
self.mKeyR = keyR
self.mRight = right
def __str__(self):
return make_string(self, 'Three', ['mLeft', 'mKeyL', 'mMiddle', 'mKeyR', 'mRight'])
def isThree(self):
return True
The class Four
represents a 4-node.
In [ ]:
class Four(TwoThreeTree):
def __init__(self, l, kl, ml, km, mr, kr, r):
TwoThreeTree.__init__(self)
self.mLeft = l
self.mKeyL = kl
self.mMiddleL = ml
self.mKeyM = km
self.mMiddleR = mr
self.mKeyR = kr
self.mRight = r
def __str__(self):
return make_string(self, 'Four',
['mLeft', 'mKeyL', 'mMiddleL', 'mKeyM', 'mMiddleR', 'mKeyR', 'mRight'])
def isFour(self):
return True
The empty tree does not contain any keys: $$ \texttt{Nil}.\texttt{member}(k) = \texttt{False} $$
In [ ]:
def member(self, k):
return "your code here"
Nil.member = member
Insertings a key $k$ into an empty node returns a 2-node. $$ \texttt{Nil}.\texttt{ins}(k) = \texttt{Two}(\texttt{Nil}, k, \texttt{Nil}) $$ Graphically, this equation looks like this:
In [ ]:
toDotList([Nil(), Method('._ins(k)'), Two(Nil(), 'k', Nil())])
In [ ]:
def _ins(self, k):
return "your code here"
Nil._ins = _ins
The method extract
returns the member variables stored in a 2-node.
In [ ]:
def _extract(self):
return self.mLeft, self.mKey, self.mRight
Two._extract = _extract
Given a 2-node $t$ and a key $k$, the method $t.\texttt{member}(k)$ checks whether the key $k$ occurs in $t$. It is specified as follows:
In [ ]:
def member(self, key):
l, k1, r = self._extract()
"your code here"
Two.member = member
The method $t.\texttt{ins}(k)$ takes a 2-3 tree $t$ and and a key $k$ and inserts the key $k$ into $t$. It returns a 2-3-4 tree that has at most one 4-node, which has to be a child of the root node. The function $\texttt{ins}$ is recursive and uses the function $\texttt{restore}$ defined below. It is defined as follows:
These equation are presented graphically below:
In [ ]:
toDotList([Two(Tree('l'), 'k', Tree('r')), Method('.ins(k)'), Two(Tree('l'), 'k', Tree('r')) ])
In [ ]:
toDotList([Method('k1 < k2:'), Two(Nil(), 'k1', Nil()), Method('.ins(k2)'), Three(Nil(), 'k1', Nil(), 'k2', Nil()) ])
In [ ]:
toDotList([Method('k2 < k1:'), Two(Nil(), 'k1', Nil()), Method('.ins(k2)'), Three(Nil(), 'k2', Nil(), 'k1', Nil()) ])
In [ ]:
toDotList([Method('k1 < k2:'), Two(Tree('l'), 'k1', Tree('r')), Method('.ins(k2)'), Two(Tree('l'), 'k1', Tree('r.ins(k2)')) ])
In [ ]:
toDotList([Method('k2 < k1:'), Two(Tree('l'), 'k1', Tree('r')), Method('.ins(k2)'), Two(Tree('l.ins(k2)'), 'k1', Tree('r')) ])
In [ ]:
def _ins(self, key):
"your code here"
Two._ins = _ins
The function call $t.\texttt{restore}()$ takes a 2-3-4 tree $t$ that has at most one 4-node. This 4-node has to be a child of the root. It returns a 2-3-4 tree that has at most one 4-node. This 4-node has to be the root node. It is specified as follows:
If neither the left nor the right child node of a 2-node is a 4-node, the node is returned unchanged.
In [ ]:
toDotList([Two(Four(Tree('l1'),'kl',Tree('ml'),'km', Tree('mr'),'kr',Tree('r1')), 'k', Tree('r')),
Method('.restore()'),
Three(Two(Tree('l1'),'kl',Tree('ml')), 'km', Two(Tree('mr'),'kr',Tree('r1')), 'k', Tree('r'))])
In [ ]:
toDotList([Two(Tree('l'), 'k', Four(Tree('l1'),'kl',Tree('ml'),'km',Tree('mr'),'kr',Tree('r1'))),
Method('.restore()'),
Three(Tree('l'), 'k', Two(Tree('l1'),'kl',Tree('ml')),'km',Two(Tree('mr'),'kr',Tree('r1')))
])
In [ ]:
def _restore(self):
"your code here"
Two._restore = _restore
The method extract
returns the member variables stored in a 3-node.
In [ ]:
def _extract(self):
return self.mLeft, self.mKeyL, self.mMiddle, self.mKeyR, self.mRight
Three._extract = _extract
Given a 3-node $t$ and a key $k$, the method $t.\texttt{member}(k)$ checks whether the key $k$ occurs in $t$. It is specified as follows:
In [ ]:
def member(self, key):
"your code here"
Three.member = member
The method $t.\texttt{ins}(k)$ takes a 2-3 tree $t$ and and a key $k$ and inserts the key $k$ into $t$. It returns a 2-3-4 tree that has at most one 4-node, which has to be a child of the root node. The function \texttt{ins} is recursive and uses the function $\texttt{restore}$ defined below. It is defined as follows:
\texttt{Four}(\texttt{Nil},k,\texttt{Nil},k_l,\texttt{Nil},k_r,\texttt{Nil})$
\texttt{Four}(\texttt{Nil},k_l,\texttt{Nil},k,\texttt{Nil},k_r,\texttt{Nil})$
$k_r < k \rightarrow \texttt{Three}(\texttt{Nil},k_l,\texttt{Nil},k_r,\texttt{Nil}).\texttt{ins}(k) =
\texttt{Four}(\texttt{Nil},k_l,\texttt{Nil},k_r,\texttt{Nil},k,\texttt{Nil})$
$k < k_l \wedge l \not= \texttt{Nil} \wedge m \not= \texttt{Nil}\wedge r \not= \texttt{Nil} \rightarrow \texttt{Three}(l,k_l,m,k_r,r).\texttt{ins}(k) = \texttt{Three}\bigl(l.\texttt{ins}(k),k_l,m,k_r,r\bigr).\texttt{restore}()$
In [ ]:
def _ins(self, key):
"your code here"
Three._ins = _ins
The function call $t.\texttt{restore}()$ takes a 2-3-4 tree $t$ that has at most one 4-node. This 4-node has to be a child of the root. It returns a 2-3-4 tree that has at most one 4-node. This 4-node has to be the root node. It is specified as follows:
If neither of the child nodes of a 3-node is a 4-node, the node is returned unchanged.
In [ ]:
toDotList([Three(Four(Tree('l1'), 'k1', Tree('ml'), 'k2', Tree('mr'), 'k3', Tree('r1')), 'kl', Tree('m'), 'kr', Tree('r')),
Method('.restore()'),
Four(Two(Tree('l1'), 'k1', Tree('ml')), 'k2', Two(Tree('mr'), 'k3', Tree('r1')), 'kl', Tree('m'), 'kr', Tree('r')),
])
In [ ]:
toDotList([Three(Tree('l'), 'kl', Four(Tree('l1'), 'k1', Tree('ml'), 'k2', Tree('mr'), 'k3', Tree('r1')), 'kr', Tree('r')),
Method('.restore()'),
Four(Tree('l'), 'kl', Two(Tree('l1'), 'k1', Tree('ml')), 'k2', Two(Tree('mr'), 'k3', Tree('r1')), 'kr', Tree('r'))
])
In [ ]:
toDotList([Three(Tree('l'), 'kl', Tree('m'), 'kr', Four(Tree('l1'), 'k1', Tree('ml'), 'k2', Tree('mr'), 'k3', Tree('r1'))),
Method('.restore()'),
Four(Tree('l'), 'kl', Tree('m'), 'kr', Two(Tree('l1'), 'k1', Tree('ml')), 'k2', Two(Tree('mr'), 'k3', Tree('r1')))
])
In [ ]:
def _restore(self):
"your code here"
Three._restore = _restore
The method extract returns the member variables stored in a 4-node.
In [ ]:
def _extract(self):
return self.mLeft, self.mKeyL, self.mMiddleL, self.mKeyM, self.mMiddleR, self.mKeyR, self.mRight
Four._extract = _extract
The method restore
returns a 4-node unchanged.
In [ ]:
def _restore(self):
return self
Four._restore = _restore
The function grow
turns one 4-node into three 2-nodes. It is specified as follows:
$$ \texttt{Four}(l, k_l, m_l, k_m, m_r, k_r, r).\texttt{grow}() =
\texttt{Two}\bigl(\texttt{Two}(l, k_l, m_l), k_m, \texttt{Two}(m_r, k_r, r)\bigr)
$$
In [ ]:
toDotList([Four(Tree('l'),'kl', Tree('ml'), 'km', Tree('mr'), 'kr', Tree('r')),
Method('.grow()'),
Two(Two(Tree('l'),'kl', Tree('ml')), 'km', Two(Tree('mr'), 'kr', Tree('r')))
])
In [ ]:
def _grow(self):
"your code here"
Four._grow = _grow
The function $\texttt{demo}()$ creates a small ordered binary tree.
In [ ]:
m = Nil()
m.isNil()
m.toDot()
In [ ]:
m = m.insert("anton")
m.toDot()
In [ ]:
m = m.insert("hugo" )
m.toDot()
In [ ]:
m = m.insert("gustav")
m.toDot()
In [ ]:
m = m.insert("jens")
m.toDot()
In [ ]:
m = m.insert("hubert")
m.toDot()
In [ ]:
m = m.insert("andre")
m.toDot()
In [ ]:
m = m.insert("philipp")
m.toDot()
In [ ]:
m = m.insert("rene")
m.toDot()
In [ ]:
m = m.insert("walter")
m.toDot()
Let's generate 2-3 tree with random keys.
In [ ]:
import random as rnd
In [ ]:
t = Nil()
for k in range(30):
k = rnd.randrange(100)
t = t.insert(k)
t.toDot()
Lets us try to create a tree by inserting sorted numbers because that resulted in linear complexity for ordered binary trees.
In [ ]:
M = Nil()
for k in range(38):
M = M.insert(k)
M.toDot()
Finally, we compute the set of prime numbers $\leq 100$. Mathematically, this set is given as follows: $$ \bigl\{2, \cdots, 100 \bigr\} - \bigl\{ i \cdot j \bigm| i, j \in \{2, \cdots, 100 \}\bigr\}$$ First, we compute the set of products $\bigl\{ i \cdot j \bigm| i, j \in \{2, \cdots, 100 \}\bigr\}$. Then, we insert all naturals numbers less than 100 that are not products into the set of primes.
In [ ]:
Products = Nil()
for i in range(2, 51):
for j in range(i, 100 // i + 1):
Products = Products.insert(i * j)
Primes = Nil()
for k in range(2, 101):
if not Products.member(k):
Primes = Primes.insert(k)
Primes.toDot()
In [ ]: