In [1]:
import sys, math, random, logging, time
import numpy as np
import pandas as pd
exinp = """1324
3415436
1023422
03517
3555""".split()
debug = True
logging.basicConfig(level=logging.INFO)
log = logging.getLogger('sorttest')

In [2]:
class MinLoc(object):
    def __init__(self):
        self.num = 20
        self.ind = -1

def count_sort(nums, reverse=False):
    counts = [0]*10
    minevn = MinLoc()
    minodd = MinLoc()
    for i, n in enumerate(nums):
        counts[n] += 1
        if n % 2 == 0 and n < minevn.num:
            minevn.num = n
            minevn.ind = i
        elif n % 2 == 1 and n < minodd.num:
            minodd.num = n
            minodd.ind = i
    minodd.num += 10
    minind = min(minevn, minodd, key=lambda a: a.num)
    counts[minind.num % 10] -= 1
    nums2 = list(nums)
    mn = nums2.pop(minind.ind)
    sm = 0
    if reverse:
        for n in range(10):
            i = 10 - (n+1)
            sm += counts[i]
            counts[i] = sm
    else:
        for n in range(10):
            sm += counts[n]
            counts[n] = sm
    log.debug("counts: %s", counts)
    log.debug("nums2: %s", nums2)
    ret = [0]*len(nums2)
    for n in nums2:
        ret[counts[n] - 1] = n
        counts[n] -= 1
    if reverse:
        ret.append(mn)
    else:
        ret.insert(0, mn)
    log.debug('ret: %s', ret)
    return ret

In [3]:
def count_sort_simpler(nums, dig=1):
    counts = [0]*10
    a = 10**(dig-1)
    for n in nums:
        d = (n // a)%10
        counts[d] += 1
    sm = 0
    for n in range(9):
        sm += counts[n]
        counts[n] = sm
    counts[9] = 0
    ret = [0]*len(nums)
    for n in nums:
        d = (n // a)%10 - 1
        ret[counts[d]] = n
        counts[d] += 1
    return ret

def radix_sort(nums, digs):
    for i in range(digs):
        nums = count_sort_simpler(nums, i+1)
    return nums

In [4]:
for ex in exinp:
    exar = [int(n) for n in ex]
    numarr = count_sort_simpler(exar)
    num = sum(c*10**i for i,c in enumerate(numarr))
    print(num)

arr = [random.randrange(10000) for _ in range(100)]
sarr = radix_sort(arr, 4)


4321
6544331
4322210
75310
5553

In [5]:
def quicksort(arr):
    _qs(arr, 0, len(arr) - 1)
    return arr
    
def _qs(arr, start, pivot):
    if (start >= pivot):
        return
    # i is the position to switch pivot to at the end,
    # anything before it is below pivot
    i = start
    for j in range(start, pivot):
        if arr[j] < arr[pivot]:
            arr[j], arr[i] = arr[i], arr[j]
            i += 1
    arr[i], arr[pivot] = arr[pivot], arr[i]
    
    _qs(arr, start, i-1)
    _qs(arr, i+1, pivot)

In [6]:
arr = np.random.randn(1000)
arr[0:5]
quicksort(arr)
print("Head:", arr[0:10], ", tail:", arr[-10:])

print(quicksort([2,1]))
print(quicksort([3,2,1]))
print(quicksort([0,1]))
print(quicksort([5,5,3,5,5]))
print(quicksort(np.random.randn(10)))


Head: [-3.1710042  -2.74716006 -2.7450569  -2.70060765 -2.64743484 -2.58090857
 -2.5727584  -2.56140395 -2.50064879 -2.35605027] , tail: [2.34059362 2.35850281 2.4169752  2.46877573 2.76212822 2.84053984
 2.90528896 3.00847958 3.03474865 3.21393233]
[1, 2]
[1, 2, 3]
[0, 1]
[3, 5, 5, 5, 5]
[-2.52989668 -1.2293751  -0.85946066 -0.50522481 -0.14295855  0.12256594
  0.24502217  0.98119222  1.0026667   1.08949702]

In [7]:
def mergesort(arr):
    ret = _ms(arr, 0, len(arr) - 1)
    return ret
    
def _ms(arr, start, end):
    if end > start:
        mid = (end + start)//2
        arr1 = _ms(arr, start, mid)
        arr2 = _ms(arr, mid + 1, end)
        i,j = 0, 0
        ret = []
        while i <= (mid - start) and j <= (end - mid - 1):
            if arr1[i] <= arr2[j]:
                ret.append(arr1[i])
                i += 1
            else:
                ret.append(arr2[j])
                j += 1
        if i <= (mid - start):
            ret.extend(arr1[i:])
        else:
            ret.extend(arr2[j:])
        return ret
    else:
        return [arr[start]]

In [8]:
def test_sort(arr):
    lastel = arr[0]
    for a in arr:
        if lastel > a:
            raise Exception("array is not actually sorted")
        lastel = a

In [9]:
print(mergesort(np.random.randn(10)))
test_sort(mergesort(np.random.randn(100000)));

test_sort(quicksort(np.random.randn(100000)))

test_sort(sorted(np.random.randn(100000)))


[-0.8662723503508571, -0.8038962839009093, -0.7911972014564661, -0.6688560372024959, -0.15387609305675792, 0.007821992829689728, 0.07876027886299873, 0.08725843034543967, 0.6851003600588579, 1.036660112600942]

In [10]:
def longest_proper_suffix(st):
    lps = [0]*len(st)
    l = 0
    for i in range(1, len(st)):
        if st[l] == st[i]:
            l += 1
        else:
            while l > 0 and st[0:l] != st[i-l+1:i+1]:
                l -= 1
        lps[i] = l
    
    return lps

print(longest_proper_suffix('abaaa'))
print(longest_proper_suffix('myprefixmypre'))
print(longest_proper_suffix('aaababaaabababababbbababaaaabb'))
print(longest_proper_suffix('aaaaa'))
print(longest_proper_suffix('ababababababa'))


[0, 0, 1, 1, 1]
[0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5]
[0, 1, 2, 0, 1, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 2, 3, 3, 4, 0]
[0, 1, 2, 3, 4]
[0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]

In [11]:
st = 'aaabaabaaa'
pt = 'abaaa'
log.setLevel(logging.INFO)

def string_search_kmp(st, pt):
    lps = longest_proper_suffix(pt)
    log.debug("suffixes: %s", lps)
    sti, pti, matches = 0, 0, []
    pl, sl = len(pt), len(st)
    while sti - pti <= sl - pl:
        log.debug("str index, pattern index: [%d, %d]", sti, pti)
        for i in range(pti, pl):
            if st[sti+i] != pt[i]:
                log.debug("index %d in string doesn't match %d in pattern, exiting",
                          sti+i, i)
                log.debug("matching pref length: %d", lps[i-1])
                pti = 0 if i == 0 else lps[i-1]
                sti += 1 if i == 0 else (i-pti)
                break
            if i + 1 == pl:
                log.debug("adding a match starting at place: %d", sti)
                matches.append(sti)
                pti = lps[i]
                sti += (i+1-pti)
    return matches

print(string_search_kmp(st, pt))


[5]

In [12]:
log.setLevel(logging.INFO)
print(string_search_kmp("AABAACAADAABAABA", "AABA"))
print(string_search_kmp("AAAAAAAAAAAAAAAAAB", "AAAAB"))
print(string_search_kmp("ABABABCABABABCABABABC", "ABABAC"))
# log.setLevel(logging.DEBUG)
print(string_search_kmp("AAAAABAAABA" , "AAAA"))


[0, 9, 12]
[13]
[]
[0, 1]

In [13]:
def fib_search(arr, x):
    al = len(arr)
    # avoid worst case
    if arr[-1] == x:
        return al - 1
    elif arr[-1] < x or arr[0] > x:
        return None
    elif arr[0] == x:
        return 0
    
    fibs, lf, fib_i = [1,1], 1, 1
    while lf < al:
        lf = fibs[fib_i] + fibs[fib_i-1]
        fibs.append(lf)
        fib_i += 1
    log.debug("Fibs: %s", fibs)
    
    return _fibsearch(arr, al, 0, fib_i-2, fibs, x)
    
def _fibsearch(arr, len_arr, start, fib_i, fibs, x):
    if fib_i < 0:
        return start if arr[start] == x else None
    arr_i_mid =  min(len_arr - 1, fibs[fib_i] + start)
    arr_v_mid = arr[arr_i_mid]
    if x == arr_v_mid:
        return arr_i_mid
    elif x > arr_v_mid:
        log.debug("Index value %d is less than x, reducing fib index to %d, setting start ind to %d",
                  arr_i_mid, fib_i - 1, arr_i_mid)
        return _fibsearch(arr, len_arr, arr_i_mid, fib_i - 1, fibs, x)
    else:
        log.debug("Index value %d is greater than x, reducing fib index to %d, setting start ind to %d",
                  arr_i_mid, fib_i - 2, start)
        return _fibsearch(arr, len_arr, start, fib_i - 2, fibs, x)

In [14]:
log.setLevel(logging.DEBUG)
ra = sorted(np.random.randint(100, size=101))
print(ra)
search_val = 20
ri = fib_search(ra, search_val)
print(ri)
try:
#     assert ri == ra.index(10), f"found index does not match value from .index call: {ri}"
    assert ra[ri] == search_val
except TypeError:
    assert search_val not in ra
print(fib_search([1,3,4,5,7,18,88], 1))


DEBUG:sorttest:Fibs: [1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144]
DEBUG:sorttest:Index value 55 is greater than x, reducing fib index to 7, setting start ind to 0
DEBUG:sorttest:Index value 21 is greater than x, reducing fib index to 5, setting start ind to 0
DEBUG:sorttest:Index value 8 is less than x, reducing fib index to 4, setting start ind to 8
DEBUG:sorttest:Index value 13 is less than x, reducing fib index to 3, setting start ind to 13
DEBUG:sorttest:Index value 16 is less than x, reducing fib index to 2, setting start ind to 16
[1, 2, 2, 4, 4, 5, 8, 8, 9, 12, 13, 15, 16, 16, 17, 19, 19, 19, 20, 21, 21, 21, 21, 22, 24, 25, 25, 26, 26, 27, 29, 29, 29, 31, 33, 35, 38, 38, 38, 40, 40, 41, 44, 46, 46, 46, 48, 48, 49, 51, 51, 51, 52, 53, 53, 53, 55, 56, 57, 57, 57, 59, 60, 63, 63, 65, 66, 67, 68, 70, 72, 72, 74, 76, 76, 77, 77, 78, 80, 81, 81, 82, 82, 83, 84, 84, 86, 86, 88, 90, 93, 94, 94, 95, 97, 97, 97, 98, 98, 99, 99]
18
0

In [15]:
def find_instances(arr, num):
    la = len(arr)
    l, r = -1, la
    return [i for i in range(
                _findlower(arr, num, l, r) + 1,
                _findupper(arr, num, l, r))]
    
def _findlower(arr, num, l, r):
    log.debug("find limit: l:%d, r:%d", l, r)
    if l == r:
        return l
    m = math.ceil((l + r)/2)
    if arr[m] < num:
        return _findlower(arr, num, m, r)
    else:
        return _findlower(arr, num, l, m-1)
    
def _findupper(arr, num, l, r):
    log.debug("find limit: l:%d, r:%d", l, r)
    if l == r:
        return l
    m = math.floor((l + r)/2)
    if arr[m] > num:
        return _findupper(arr, num, l, m)
    else:
        return _findupper(arr, num, m+1, r)

In [16]:
log.setLevel(logging.DEBUG)
print(find_instances([3,3,5,5,5,5,7], 5))
print(find_instances([3,3,5,5,5,5,7], 4))


DEBUG:sorttest:find limit: l:-1, r:7
DEBUG:sorttest:find limit: l:-1, r:2
DEBUG:sorttest:find limit: l:1, r:2
DEBUG:sorttest:find limit: l:1, r:1
DEBUG:sorttest:find limit: l:-1, r:7
DEBUG:sorttest:find limit: l:4, r:7
DEBUG:sorttest:find limit: l:6, r:7
DEBUG:sorttest:find limit: l:6, r:6
DEBUG:sorttest:find limit: l:-1, r:7
DEBUG:sorttest:find limit: l:-1, r:2
[2, 3, 4, 5]
DEBUG:sorttest:find limit: l:1, r:2
DEBUG:sorttest:find limit: l:1, r:1
DEBUG:sorttest:find limit: l:-1, r:7
DEBUG:sorttest:find limit: l:-1, r:3
DEBUG:sorttest:find limit: l:2, r:3
DEBUG:sorttest:find limit: l:2, r:2
[]

In [17]:
level = 0
def unsorted_search(arr, num):
    global level
    level = 0
    return _us(arr, num, 0, len(arr)-1, 0)

def _us(arr, num, l, r, lev):
    global level
    level = max(lev, level)
    if l > r:
        return -1
    
    m = (l+r)//2
    if arr[m] == num:
        return m
    else:
        step1 = _us(arr, num, l, m-1, lev+1)
        if step1 == -1:
            step2 = _us(arr, num, m+1, r, lev+1)
            return max(step1, step2)
        else:
            return step1
    
def recSearch( arr, x):
    global level
    level = 0
    return _recSearch(arr, 0, len(arr)-1, x, 0)
    
def _recSearch( arr, l, r, x, lev):
    global level
    level = max(level, lev)
    if r < l: 
        return -1
    if arr[l] == x: 
        return l 
    if arr[r] == x: 
        return r 
    return _recSearch(arr, l+1, r-1, x, lev+1)

In [20]:
la = np.random.randint(10000, size=990000)
# log.debug("array: %s", la)
times = [time.time()]
print(unsorted_search(la, 50))
print("max level:", level)
times.append(time.time())
try:
    print(recSearch(la, 50))
except RecursionError:
    print('Execution failed with recursion error')
print("max level:", level)
times.append(time.time())
df = pd.DataFrame({'start': times[0:2], 'end': times[1:3]}, index=['recursive', 'linear'])
df -= times[0]
df['diff'] = df['end'] - df['start']
df


4645
max level: 20
Execution failed with recursion error
max level: 2961
Out[20]:
start end diff
recursive 0.00000 0.010480 0.010480
linear 0.01048 0.018966 0.008486

In [29]:
def contains(st, pat):
    pl = len(pat)
    end = len(st) - pl
    if end < 0:
        return -1
    return _contains(st, pat, 0, end, pl)
    
def _contains(st, pat, l, r, pl):
    if l > r:
        return -1
    m = (l+r)//2
    if _test(st, pat, m, 0, pl):
        return m
    else:
        return (_contains(st, pat, l, m-1, pl) or
                _contains(st, pat, m+1, r, pl))
    
def _test(st, pat, sti, pti, lng):
    log.debug("testing indices: string: %d, pattern: %d", sti, pti)
    if pti == lng:
        return True
    elif st[sti] != pat[pti]:
        return False
    else:
        return _test(st, pat, sti+1, pti+1, lng)`

In [31]:
s = "THIS IS A TEST TEXT"
p = "TEST"
print(contains(s, p))

s = "geeksforgeeks"
p = "quiz"
print(contains(s, p))


DEBUG:sorttest:testing indices: string: 7, pattern: 0
DEBUG:sorttest:testing indices: string: 3, pattern: 0
DEBUG:sorttest:testing indices: string: 1, pattern: 0
DEBUG:sorttest:testing indices: string: 0, pattern: 0
DEBUG:sorttest:testing indices: string: 1, pattern: 1
DEBUG:sorttest:testing indices: string: 2, pattern: 0
DEBUG:sorttest:testing indices: string: 5, pattern: 0
DEBUG:sorttest:testing indices: string: 4, pattern: 0
DEBUG:sorttest:testing indices: string: 6, pattern: 0
DEBUG:sorttest:testing indices: string: 11, pattern: 0
DEBUG:sorttest:testing indices: string: 9, pattern: 0
DEBUG:sorttest:testing indices: string: 8, pattern: 0
DEBUG:sorttest:testing indices: string: 10, pattern: 0
DEBUG:sorttest:testing indices: string: 11, pattern: 1
DEBUG:sorttest:testing indices: string: 12, pattern: 2
DEBUG:sorttest:testing indices: string: 13, pattern: 3
DEBUG:sorttest:testing indices: string: 14, pattern: 4
DEBUG:sorttest:testing indices: string: 4, pattern: 0
DEBUG:sorttest:testing indices: string: 1, pattern: 0
DEBUG:sorttest:testing indices: string: 0, pattern: 0
DEBUG:sorttest:testing indices: string: 2, pattern: 0
DEBUG:sorttest:testing indices: string: 3, pattern: 0
DEBUG:sorttest:testing indices: string: 7, pattern: 0
DEBUG:sorttest:testing indices: string: 5, pattern: 0
DEBUG:sorttest:testing indices: string: 6, pattern: 0
DEBUG:sorttest:testing indices: string: 8, pattern: 0
DEBUG:sorttest:testing indices: string: 9, pattern: 0
True
False

In [33]:
np.random.randint(50, size=5)


Out[33]:
array([15,  8, 31, 34,  5])