In [1]:
// A simple example of a substring index; mirrors example from lecture notes
// we're going to extract 4 substrings like this:
// t: CGTGCCTACTTACTTACAT
// substring 1: CGTGC
// substring 2: CCTAC
// substring 3: CTTAC
// substring 4: CTTAC
t := "CGTGCCTACTTACTTACAT"
In [2]:
// From t, make list of pairs, where first pair item is substring, second is its offset
func substringize(t string, ln int, iv int) ([]string, []int) {
// ln = length of substrings to extract
// iv = distance between substings to extract; e.g. 1 means take *every* substring
strings := make([]string, 0)
offsets := make([]int, 0)
for i := 0; i < len(t) - ln + 1; i += iv {
strings = append(strings, t[i:i+ln])
offsets = append(offsets, i)
}
return strings, offsets
}
In [3]:
substringize("CGTGCCTACTTACTTACAT", 5, 4)
Out[3]:
In [4]:
// Like substringize, but uses a map data structure
func mapize(t string, ln int, iv int) map[string][]int {
index := make(map[string][]int)
for i := 0; i < len(t) - ln + 1; i += iv {
sub := t[i:i+ln]
index[sub] = append(index[sub], i)
}
return index
}
In [5]:
index := mapize("CGTGCCTACTTACTTACAT", 5, 4)
index
Out[5]:
In [6]:
p := "CTTACTTA"
In [7]:
// index: give me a hint where I should look for occurrences of p in t
elem, ok := index[p[:5]]
elem
Out[7]: