In [1]:
type Node struct {
child map[byte] *Node // child pointers, key= first char of edge label
id int
lab string // label on edge leading to this node
}
In [2]:
// Return new node with given id and incoming edge label
func NewNode(id int, lab string) *Node {
node := new (Node)
node.child = make(map[byte] *Node)
node.id = id
node.lab = lab
return node
}
In [3]:
// Points to particular place in tree; might be partway along an edge
type TreeCursor struct {
node *Node // node below end of walk
depth int // offset along incoming edge
}
// Return null tree cursor
func EmptyTreeCursor() *TreeCursor {
tc := new (TreeCursor)
tc.node = nil
tc.depth = 0
return tc
}
// Return tree cursor with particular node and offset
func NewTreeCursor(node *Node, depth int) *TreeCursor {
tc := new (TreeCursor)
tc.node = node
tc.depth = depth
return tc
}
In [4]:
// Suffix tree is simply a root node
type SuffixTree struct {
root *Node
}
// Construct suffix tree in quadratic time
func NewSuffixTree(t string) *SuffixTree {
t = t + "$"
st := new(SuffixTree)
st.root = NewNode(0, "")
nextId := 1
for i := 1; i < len(t); i++ {
cur := st.root
j := i
for j < len(t) {
c := t[j]
if child, ok := cur.child[c]; ok {
k := j+1
for k - j < len(child.lab) && t[k] == child.lab[k - j] {
k++
}
if k - j == len(child.lab) { // walked entire edge
cur = child
j = k
} else { // new internal node & leaf required
cExist := child.lab[k - j]
cNew := t[k]
mid := NewNode(nextId, t[j:k])
mid.child[cNew] = NewNode(nextId + 1, t[k:])
nextId += 2
mid.child[cExist] = child
child.lab = child.lab[k-j:]
cur.child[c] = mid
}
} else {
cur.child[c] = NewNode(nextId, t[j:])
nextId++
}
}
}
return st
}
// Follow path through tree and return tree cursor from end of walk
func (st *SuffixTree) followPath(s string) *TreeCursor {
cur := st.root
for i := 0; i < len(s); {
c := s[i]
if child, ok := cur.child[c]; ok {
j := i + 1
for j - i < len(child.lab) && j < len(s) && s[j] == child.lab[j - i] {
j++
}
if j - i == len(child.lab) {
// Exhausted edge; descend
cur = child
i = j
} else if j == len(s) {
// Exhausted query string in middle of edge
return NewTreeCursor(child, j-i)
} else {
// Fell off in middle of edge
return EmptyTreeCursor()
}
} else {
return EmptyTreeCursor()
}
}
// Exhausted query string at internal node
return NewTreeCursor(cur, 0)
}
// Return true iff string has 's' as substring
func (st *SuffixTree) hasSubstring(s string) bool {
tc := st.followPath(s)
return tc.node != nil
}
// Return true iff string has 's' as suffix
func (st *SuffixTree) hasSuffix(s string) bool {
tc := st.followPath(s)
if tc.node == nil {
return false
}
if tc.depth == 0 {
_, ok := tc.node.child['$']
return ok
} else {
return tc.node.lab[tc.depth] == '$'
}
}
In [5]:
stree := NewSuffixTree("there would have been a time for such a word")
In [6]:
stree.hasSubstring("nope")
In [7]:
stree.hasSubstring("would have been")
In [8]:
stree.hasSubstring("such a word")
In [9]:
stree.hasSubstring("")
In [10]:
stree.hasSuffix("would have been")
In [11]:
stree.hasSuffix("such a word")
In [12]:
stree.hasSuffix("")